feat: indie status page MVP -- FastAPI + SQLite
- 8 DB models (services, incidents, monitors, subscribers, etc.) - Full CRUD API for services, incidents, monitors - Public status page with live data - Incident detail page with timeline - API key authentication - Uptime monitoring scheduler - 13 tests passing - TECHNICAL_DESIGN.md with full spec
This commit is contained in:
commit
902133edd3
4655 changed files with 1342691 additions and 0 deletions
1020
venv/lib/python3.11/site-packages/mypyc/lib-rt/CPy.h
Normal file
1020
venv/lib/python3.11/site-packages/mypyc/lib-rt/CPy.h
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,76 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_AVX
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute push (__attribute__((target("avx"))), apply_to=function)
|
||||
#else
|
||||
#pragma GCC target("avx")
|
||||
#endif
|
||||
#include <immintrin.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
|
||||
#ifndef BASE64_AVX_USE_ASM
|
||||
# if (defined(__GNUC__) || defined(__clang__)) && BASE64_WORDSIZE == 64
|
||||
# define BASE64_AVX_USE_ASM 1
|
||||
# else
|
||||
# define BASE64_AVX_USE_ASM 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "../ssse3/dec_reshuffle.c"
|
||||
#include "../ssse3/dec_loop.c"
|
||||
|
||||
#if BASE64_AVX_USE_ASM
|
||||
# include "./enc_loop_asm.c"
|
||||
#else
|
||||
# include "../ssse3/enc_translate.c"
|
||||
# include "../ssse3/enc_reshuffle.c"
|
||||
# include "../ssse3/enc_loop.c"
|
||||
#endif
|
||||
|
||||
#endif // HAVE_AVX
|
||||
|
||||
void
|
||||
base64_stream_encode_avx BASE64_ENC_PARAMS
|
||||
{
|
||||
#if HAVE_AVX
|
||||
#include "../generic/enc_head.c"
|
||||
|
||||
// For supported compilers, use a hand-optimized inline assembly
|
||||
// encoder. Otherwise fall back on the SSSE3 encoder, but compiled with
|
||||
// AVX flags to generate better optimized AVX code.
|
||||
|
||||
#if BASE64_AVX_USE_ASM
|
||||
enc_loop_avx(&s, &slen, &o, &olen);
|
||||
#else
|
||||
enc_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#endif
|
||||
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
base64_enc_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
base64_stream_decode_avx BASE64_DEC_PARAMS
|
||||
{
|
||||
#if HAVE_AVX
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute pop
|
||||
#endif
|
||||
#else
|
||||
return base64_dec_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,264 @@
|
|||
// Apologies in advance for combining the preprocessor with inline assembly,
|
||||
// two notoriously gnarly parts of C, but it was necessary to avoid a lot of
|
||||
// code repetition. The preprocessor is used to template large sections of
|
||||
// inline assembly that differ only in the registers used. If the code was
|
||||
// written out by hand, it would become very large and hard to audit.
|
||||
|
||||
// Generate a block of inline assembly that loads register R0 from memory. The
|
||||
// offset at which the register is loaded is set by the given round.
|
||||
#define LOAD(R0, ROUND) \
|
||||
"vlddqu ("#ROUND" * 12)(%[src]), %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that deinterleaves and shuffles register
|
||||
// R0 using preloaded constants. Outputs in R0 and R1.
|
||||
#define SHUF(R0, R1, R2) \
|
||||
"vpshufb %[lut0], %["R0"], %["R1"] \n\t" \
|
||||
"vpand %["R1"], %[msk0], %["R2"] \n\t" \
|
||||
"vpand %["R1"], %[msk2], %["R1"] \n\t" \
|
||||
"vpmulhuw %["R2"], %[msk1], %["R2"] \n\t" \
|
||||
"vpmullw %["R1"], %[msk3], %["R1"] \n\t" \
|
||||
"vpor %["R1"], %["R2"], %["R1"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that takes R0 and R1 and translates
|
||||
// their contents to the base64 alphabet, using preloaded constants.
|
||||
#define TRAN(R0, R1, R2) \
|
||||
"vpsubusb %[n51], %["R1"], %["R0"] \n\t" \
|
||||
"vpcmpgtb %[n25], %["R1"], %["R2"] \n\t" \
|
||||
"vpsubb %["R2"], %["R0"], %["R0"] \n\t" \
|
||||
"vpshufb %["R0"], %[lut1], %["R2"] \n\t" \
|
||||
"vpaddb %["R1"], %["R2"], %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that stores the given register R0 at an
|
||||
// offset set by the given round.
|
||||
#define STOR(R0, ROUND) \
|
||||
"vmovdqu %["R0"], ("#ROUND" * 16)(%[dst]) \n\t"
|
||||
|
||||
// Generate a block of inline assembly that generates a single self-contained
|
||||
// encoder round: fetch the data, process it, and store the result. Then update
|
||||
// the source and destination pointers.
|
||||
#define ROUND() \
|
||||
LOAD("a", 0) \
|
||||
SHUF("a", "b", "c") \
|
||||
TRAN("a", "b", "c") \
|
||||
STOR("a", 0) \
|
||||
"add $12, %[src] \n\t" \
|
||||
"add $16, %[dst] \n\t"
|
||||
|
||||
// Define a macro that initiates a three-way interleaved encoding round by
|
||||
// preloading registers a, b and c from memory.
|
||||
// The register graph shows which registers are in use during each step, and
|
||||
// is a visual aid for choosing registers for that step. Symbol index:
|
||||
//
|
||||
// + indicates that a register is loaded by that step.
|
||||
// | indicates that a register is in use and must not be touched.
|
||||
// - indicates that a register is decommissioned by that step.
|
||||
// x indicates that a register is used as a temporary by that step.
|
||||
// V indicates that a register is an input or output to the macro.
|
||||
//
|
||||
#define ROUND_3_INIT() /* a b c d e f */ \
|
||||
LOAD("a", 0) /* + */ \
|
||||
SHUF("a", "d", "e") /* | + x */ \
|
||||
LOAD("b", 1) /* | + | */ \
|
||||
TRAN("a", "d", "e") /* | | - x */ \
|
||||
LOAD("c", 2) /* V V V */
|
||||
|
||||
// Define a macro that translates, shuffles and stores the input registers A, B
|
||||
// and C, and preloads registers D, E and F for the next round.
|
||||
// This macro can be arbitrarily daisy-chained by feeding output registers D, E
|
||||
// and F back into the next round as input registers A, B and C. The macro
|
||||
// carefully interleaves memory operations with data operations for optimal
|
||||
// pipelined performance.
|
||||
|
||||
#define ROUND_3(ROUND, A,B,C,D,E,F) /* A B C D E F */ \
|
||||
LOAD(D, (ROUND + 3)) /* V V V + */ \
|
||||
SHUF(B, E, F) /* | | | | + x */ \
|
||||
STOR(A, (ROUND + 0)) /* - | | | | */ \
|
||||
TRAN(B, E, F) /* | | | - x */ \
|
||||
LOAD(E, (ROUND + 4)) /* | | | + */ \
|
||||
SHUF(C, A, F) /* + | | | | x */ \
|
||||
STOR(B, (ROUND + 1)) /* | - | | | */ \
|
||||
TRAN(C, A, F) /* - | | | x */ \
|
||||
LOAD(F, (ROUND + 5)) /* | | | + */ \
|
||||
SHUF(D, A, B) /* + x | | | | */ \
|
||||
STOR(C, (ROUND + 2)) /* | - | | | */ \
|
||||
TRAN(D, A, B) /* - x V V V */
|
||||
|
||||
// Define a macro that terminates a ROUND_3 macro by taking pre-loaded
|
||||
// registers D, E and F, and translating, shuffling and storing them.
|
||||
#define ROUND_3_END(ROUND, A,B,C,D,E,F) /* A B C D E F */ \
|
||||
SHUF(E, A, B) /* + x V V V */ \
|
||||
STOR(D, (ROUND + 3)) /* | - | | */ \
|
||||
TRAN(E, A, B) /* - x | | */ \
|
||||
SHUF(F, C, D) /* + x | | */ \
|
||||
STOR(E, (ROUND + 4)) /* | - | */ \
|
||||
TRAN(F, C, D) /* - x | */ \
|
||||
STOR(F, (ROUND + 5)) /* - */
|
||||
|
||||
// Define a type A round. Inputs are a, b, and c, outputs are d, e, and f.
|
||||
#define ROUND_3_A(ROUND) \
|
||||
ROUND_3(ROUND, "a", "b", "c", "d", "e", "f")
|
||||
|
||||
// Define a type B round. Inputs and outputs are swapped with regard to type A.
|
||||
#define ROUND_3_B(ROUND) \
|
||||
ROUND_3(ROUND, "d", "e", "f", "a", "b", "c")
|
||||
|
||||
// Terminating macro for a type A round.
|
||||
#define ROUND_3_A_LAST(ROUND) \
|
||||
ROUND_3_A(ROUND) \
|
||||
ROUND_3_END(ROUND, "a", "b", "c", "d", "e", "f")
|
||||
|
||||
// Terminating macro for a type B round.
|
||||
#define ROUND_3_B_LAST(ROUND) \
|
||||
ROUND_3_B(ROUND) \
|
||||
ROUND_3_END(ROUND, "d", "e", "f", "a", "b", "c")
|
||||
|
||||
// Suppress clang's warning that the literal string in the asm statement is
|
||||
// overlong (longer than the ISO-mandated minimum size of 4095 bytes for C99
|
||||
// compilers). It may be true, but the goal here is not C99 portability.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Woverlength-strings"
|
||||
|
||||
static inline void
|
||||
enc_loop_avx (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
// For a clearer explanation of the algorithm used by this function,
|
||||
// please refer to the plain (not inline assembly) implementation. This
|
||||
// function follows the same basic logic.
|
||||
|
||||
if (*slen < 16) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 12 bytes at a time. Input is read in blocks of 16
|
||||
// bytes, so "reserve" four bytes from the input buffer to ensure that
|
||||
// we never read beyond the end of the input buffer.
|
||||
size_t rounds = (*slen - 4) / 12;
|
||||
|
||||
*slen -= rounds * 12; // 12 bytes consumed per round
|
||||
*olen += rounds * 16; // 16 bytes produced per round
|
||||
|
||||
// Number of times to go through the 36x loop.
|
||||
size_t loops = rounds / 36;
|
||||
|
||||
// Number of rounds remaining after the 36x loop.
|
||||
rounds %= 36;
|
||||
|
||||
// Lookup tables.
|
||||
const __m128i lut0 = _mm_set_epi8(
|
||||
10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1);
|
||||
|
||||
const __m128i lut1 = _mm_setr_epi8(
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0);
|
||||
|
||||
// Temporary registers.
|
||||
__m128i a, b, c, d, e, f;
|
||||
|
||||
__asm__ volatile (
|
||||
|
||||
// If there are 36 rounds or more, enter a 36x unrolled loop of
|
||||
// interleaved encoding rounds. The rounds interleave memory
|
||||
// operations (load/store) with data operations (table lookups,
|
||||
// etc) to maximize pipeline throughput.
|
||||
" test %[loops], %[loops] \n\t"
|
||||
" jz 18f \n\t"
|
||||
" jmp 36f \n\t"
|
||||
" \n\t"
|
||||
".balign 64 \n\t"
|
||||
"36: " ROUND_3_INIT()
|
||||
" " ROUND_3_A( 0)
|
||||
" " ROUND_3_B( 3)
|
||||
" " ROUND_3_A( 6)
|
||||
" " ROUND_3_B( 9)
|
||||
" " ROUND_3_A(12)
|
||||
" " ROUND_3_B(15)
|
||||
" " ROUND_3_A(18)
|
||||
" " ROUND_3_B(21)
|
||||
" " ROUND_3_A(24)
|
||||
" " ROUND_3_B(27)
|
||||
" " ROUND_3_A_LAST(30)
|
||||
" add $(12 * 36), %[src] \n\t"
|
||||
" add $(16 * 36), %[dst] \n\t"
|
||||
" dec %[loops] \n\t"
|
||||
" jnz 36b \n\t"
|
||||
|
||||
// Enter an 18x unrolled loop for rounds of 18 or more.
|
||||
"18: cmp $18, %[rounds] \n\t"
|
||||
" jl 9f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A(0)
|
||||
" " ROUND_3_B(3)
|
||||
" " ROUND_3_A(6)
|
||||
" " ROUND_3_B(9)
|
||||
" " ROUND_3_A_LAST(12)
|
||||
" sub $18, %[rounds] \n\t"
|
||||
" add $(12 * 18), %[src] \n\t"
|
||||
" add $(16 * 18), %[dst] \n\t"
|
||||
|
||||
// Enter a 9x unrolled loop for rounds of 9 or more.
|
||||
"9: cmp $9, %[rounds] \n\t"
|
||||
" jl 6f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A(0)
|
||||
" " ROUND_3_B_LAST(3)
|
||||
" sub $9, %[rounds] \n\t"
|
||||
" add $(12 * 9), %[src] \n\t"
|
||||
" add $(16 * 9), %[dst] \n\t"
|
||||
|
||||
// Enter a 6x unrolled loop for rounds of 6 or more.
|
||||
"6: cmp $6, %[rounds] \n\t"
|
||||
" jl 55f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A_LAST(0)
|
||||
" sub $6, %[rounds] \n\t"
|
||||
" add $(12 * 6), %[src] \n\t"
|
||||
" add $(16 * 6), %[dst] \n\t"
|
||||
|
||||
// Dispatch the remaining rounds 0..5.
|
||||
"55: cmp $3, %[rounds] \n\t"
|
||||
" jg 45f \n\t"
|
||||
" je 3f \n\t"
|
||||
" cmp $1, %[rounds] \n\t"
|
||||
" jg 2f \n\t"
|
||||
" je 1f \n\t"
|
||||
" jmp 0f \n\t"
|
||||
|
||||
"45: cmp $4, %[rounds] \n\t"
|
||||
" je 4f \n\t"
|
||||
|
||||
// Block of non-interlaced encoding rounds, which can each
|
||||
// individually be jumped to. Rounds fall through to the next.
|
||||
"5: " ROUND()
|
||||
"4: " ROUND()
|
||||
"3: " ROUND()
|
||||
"2: " ROUND()
|
||||
"1: " ROUND()
|
||||
"0: \n\t"
|
||||
|
||||
// Outputs (modified).
|
||||
: [rounds] "+r" (rounds),
|
||||
[loops] "+r" (loops),
|
||||
[src] "+r" (*s),
|
||||
[dst] "+r" (*o),
|
||||
[a] "=&x" (a),
|
||||
[b] "=&x" (b),
|
||||
[c] "=&x" (c),
|
||||
[d] "=&x" (d),
|
||||
[e] "=&x" (e),
|
||||
[f] "=&x" (f)
|
||||
|
||||
// Inputs (not modified).
|
||||
: [lut0] "x" (lut0),
|
||||
[lut1] "x" (lut1),
|
||||
[msk0] "x" (_mm_set1_epi32(0x0FC0FC00)),
|
||||
[msk1] "x" (_mm_set1_epi32(0x04000040)),
|
||||
[msk2] "x" (_mm_set1_epi32(0x003F03F0)),
|
||||
[msk3] "x" (_mm_set1_epi32(0x01000010)),
|
||||
[n51] "x" (_mm_set1_epi8(51)),
|
||||
[n25] "x" (_mm_set1_epi8(25))
|
||||
|
||||
// Clobbers.
|
||||
: "cc", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_AVX2
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
|
||||
#else
|
||||
#pragma GCC target("avx2")
|
||||
#endif
|
||||
#include <immintrin.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
|
||||
#ifndef BASE64_AVX2_USE_ASM
|
||||
# if (defined(__GNUC__) || defined(__clang__)) && BASE64_WORDSIZE == 64
|
||||
# define BASE64_AVX2_USE_ASM 1
|
||||
# else
|
||||
# define BASE64_AVX2_USE_ASM 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "./dec_reshuffle.c"
|
||||
#include "./dec_loop.c"
|
||||
|
||||
#if BASE64_AVX2_USE_ASM
|
||||
# include "./enc_loop_asm.c"
|
||||
#else
|
||||
# include "./enc_translate.c"
|
||||
# include "./enc_reshuffle.c"
|
||||
# include "./enc_loop.c"
|
||||
#endif
|
||||
|
||||
#endif // HAVE_AVX2
|
||||
|
||||
void
|
||||
base64_stream_encode_avx2 BASE64_ENC_PARAMS
|
||||
{
|
||||
#if HAVE_AVX2
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_avx2(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
base64_enc_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
base64_stream_decode_avx2 BASE64_DEC_PARAMS
|
||||
{
|
||||
#if HAVE_AVX2
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_avx2(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute pop
|
||||
#endif
|
||||
#else
|
||||
return base64_dec_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,110 @@
|
|||
static BASE64_FORCE_INLINE int
|
||||
dec_loop_avx2_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
|
||||
{
|
||||
const __m256i lut_lo = _mm256_setr_epi8(
|
||||
0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
|
||||
0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
|
||||
|
||||
const __m256i lut_hi = _mm256_setr_epi8(
|
||||
0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
|
||||
0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
|
||||
|
||||
const __m256i lut_roll = _mm256_setr_epi8(
|
||||
0, 16, 19, 4, -65, -65, -71, -71,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 16, 19, 4, -65, -65, -71, -71,
|
||||
0, 0, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
const __m256i mask_2F = _mm256_set1_epi8(0x2F);
|
||||
|
||||
// Load input:
|
||||
__m256i str = _mm256_loadu_si256((__m256i *) *s);
|
||||
|
||||
// See the SSSE3 decoder for an explanation of the algorithm.
|
||||
const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F);
|
||||
const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F);
|
||||
const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
|
||||
const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
|
||||
|
||||
if (!_mm256_testz_si256(lo, hi)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F);
|
||||
const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles));
|
||||
|
||||
// Now simply add the delta values to the input:
|
||||
str = _mm256_add_epi8(str, roll);
|
||||
|
||||
// Reshuffle the input to packed 12-byte output format:
|
||||
str = dec_reshuffle(str);
|
||||
|
||||
// Store the output:
|
||||
_mm256_storeu_si256((__m256i *) *o, str);
|
||||
|
||||
*s += 32;
|
||||
*o += 24;
|
||||
*rounds -= 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_loop_avx2 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 45) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 32 bytes per round. Because 8 extra zero bytes are
|
||||
// written after the output, ensure that there will be at least 13
|
||||
// bytes of input data left to cover the gap. (11 data bytes and up to
|
||||
// two end-of-string markers.)
|
||||
size_t rounds = (*slen - 13) / 32;
|
||||
|
||||
*slen -= rounds * 32; // 32 bytes consumed per round
|
||||
*olen += rounds * 24; // 24 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
if (dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
if (dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
if (dec_loop_avx2_inner(s, o, &rounds) &&
|
||||
dec_loop_avx2_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
dec_loop_avx2_inner(s, o, &rounds);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 32;
|
||||
*olen -= rounds * 24;
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
static BASE64_FORCE_INLINE __m256i
|
||||
dec_reshuffle (const __m256i in)
|
||||
{
|
||||
// in, lower lane, bits, upper case are most significant bits, lower
|
||||
// case are least significant bits:
|
||||
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
|
||||
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
|
||||
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
|
||||
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
|
||||
|
||||
const __m256i merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
|
||||
// 0000kkkk LLllllll 0000JJJJ JJjjKKKK
|
||||
// 0000hhhh IIiiiiii 0000GGGG GGggHHHH
|
||||
// 0000eeee FFffffff 0000DDDD DDddEEEE
|
||||
// 0000bbbb CCcccccc 0000AAAA AAaaBBBB
|
||||
|
||||
__m256i out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
|
||||
// 00000000 JJJJJJjj KKKKkkkk LLllllll
|
||||
// 00000000 GGGGGGgg HHHHhhhh IIiiiiii
|
||||
// 00000000 DDDDDDdd EEEEeeee FFffffff
|
||||
// 00000000 AAAAAAaa BBBBbbbb CCcccccc
|
||||
|
||||
// Pack bytes together in each lane:
|
||||
out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
|
||||
2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
|
||||
2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
|
||||
// 00000000 00000000 00000000 00000000
|
||||
// LLllllll KKKKkkkk JJJJJJjj IIiiiiii
|
||||
// HHHHhhhh GGGGGGgg FFffffff EEEEeeee
|
||||
// DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa
|
||||
|
||||
// Pack lanes:
|
||||
return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
|
||||
}
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
static BASE64_FORCE_INLINE void
|
||||
enc_loop_avx2_inner_first (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// First load is done at s - 0 to not get a segfault:
|
||||
__m256i src = _mm256_loadu_si256((__m256i *) *s);
|
||||
|
||||
// Shift by 4 bytes, as required by enc_reshuffle:
|
||||
src = _mm256_permutevar8x32_epi32(src, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
|
||||
|
||||
// Reshuffle, translate, store:
|
||||
src = enc_reshuffle(src);
|
||||
src = enc_translate(src);
|
||||
_mm256_storeu_si256((__m256i *) *o, src);
|
||||
|
||||
// Subsequent loads will be done at s - 4, set pointer for next round:
|
||||
*s += 20;
|
||||
*o += 32;
|
||||
}
|
||||
|
||||
static BASE64_FORCE_INLINE void
|
||||
enc_loop_avx2_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// Load input:
|
||||
__m256i src = _mm256_loadu_si256((__m256i *) *s);
|
||||
|
||||
// Reshuffle, translate, store:
|
||||
src = enc_reshuffle(src);
|
||||
src = enc_translate(src);
|
||||
_mm256_storeu_si256((__m256i *) *o, src);
|
||||
|
||||
*s += 24;
|
||||
*o += 32;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_avx2 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 32) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 24 bytes at a time. Because blocks are loaded 32
|
||||
// bytes at a time an offset of -4, ensure that there will be at least
|
||||
// 4 remaining bytes after the last round, so that the final read will
|
||||
// not pass beyond the bounds of the input buffer:
|
||||
size_t rounds = (*slen - 4) / 24;
|
||||
|
||||
*slen -= rounds * 24; // 24 bytes consumed per round
|
||||
*olen += rounds * 32; // 32 bytes produced per round
|
||||
|
||||
// The first loop iteration requires special handling to ensure that
|
||||
// the read, which is done at an offset, does not underflow the buffer:
|
||||
enc_loop_avx2_inner_first(s, o);
|
||||
rounds--;
|
||||
|
||||
while (rounds > 0) {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_avx2_inner(s, o);
|
||||
enc_loop_avx2_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_avx2_inner(s, o);
|
||||
break;
|
||||
}
|
||||
|
||||
// Add the offset back:
|
||||
*s += 4;
|
||||
}
|
||||
|
|
@ -0,0 +1,291 @@
|
|||
// Apologies in advance for combining the preprocessor with inline assembly,
|
||||
// two notoriously gnarly parts of C, but it was necessary to avoid a lot of
|
||||
// code repetition. The preprocessor is used to template large sections of
|
||||
// inline assembly that differ only in the registers used. If the code was
|
||||
// written out by hand, it would become very large and hard to audit.
|
||||
|
||||
// Generate a block of inline assembly that loads register R0 from memory. The
|
||||
// offset at which the register is loaded is set by the given round and a
|
||||
// constant offset.
|
||||
#define LOAD(R0, ROUND, OFFSET) \
|
||||
"vlddqu ("#ROUND" * 24 + "#OFFSET")(%[src]), %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that deinterleaves and shuffles register
|
||||
// R0 using preloaded constants. Outputs in R0 and R1.
|
||||
#define SHUF(R0, R1, R2) \
|
||||
"vpshufb %[lut0], %["R0"], %["R1"] \n\t" \
|
||||
"vpand %["R1"], %[msk0], %["R2"] \n\t" \
|
||||
"vpand %["R1"], %[msk2], %["R1"] \n\t" \
|
||||
"vpmulhuw %["R2"], %[msk1], %["R2"] \n\t" \
|
||||
"vpmullw %["R1"], %[msk3], %["R1"] \n\t" \
|
||||
"vpor %["R1"], %["R2"], %["R1"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that takes R0 and R1 and translates
|
||||
// their contents to the base64 alphabet, using preloaded constants.
|
||||
#define TRAN(R0, R1, R2) \
|
||||
"vpsubusb %[n51], %["R1"], %["R0"] \n\t" \
|
||||
"vpcmpgtb %[n25], %["R1"], %["R2"] \n\t" \
|
||||
"vpsubb %["R2"], %["R0"], %["R0"] \n\t" \
|
||||
"vpshufb %["R0"], %[lut1], %["R2"] \n\t" \
|
||||
"vpaddb %["R1"], %["R2"], %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that stores the given register R0 at an
|
||||
// offset set by the given round.
|
||||
#define STOR(R0, ROUND) \
|
||||
"vmovdqu %["R0"], ("#ROUND" * 32)(%[dst]) \n\t"
|
||||
|
||||
// Generate a block of inline assembly that generates a single self-contained
|
||||
// encoder round: fetch the data, process it, and store the result. Then update
|
||||
// the source and destination pointers.
|
||||
#define ROUND() \
|
||||
LOAD("a", 0, -4) \
|
||||
SHUF("a", "b", "c") \
|
||||
TRAN("a", "b", "c") \
|
||||
STOR("a", 0) \
|
||||
"add $24, %[src] \n\t" \
|
||||
"add $32, %[dst] \n\t"
|
||||
|
||||
// Define a macro that initiates a three-way interleaved encoding round by
|
||||
// preloading registers a, b and c from memory.
|
||||
// The register graph shows which registers are in use during each step, and
|
||||
// is a visual aid for choosing registers for that step. Symbol index:
|
||||
//
|
||||
// + indicates that a register is loaded by that step.
|
||||
// | indicates that a register is in use and must not be touched.
|
||||
// - indicates that a register is decommissioned by that step.
|
||||
// x indicates that a register is used as a temporary by that step.
|
||||
// V indicates that a register is an input or output to the macro.
|
||||
//
|
||||
#define ROUND_3_INIT() /* a b c d e f */ \
|
||||
LOAD("a", 0, -4) /* + */ \
|
||||
SHUF("a", "d", "e") /* | + x */ \
|
||||
LOAD("b", 1, -4) /* | + | */ \
|
||||
TRAN("a", "d", "e") /* | | - x */ \
|
||||
LOAD("c", 2, -4) /* V V V */
|
||||
|
||||
// Define a macro that translates, shuffles and stores the input registers A, B
|
||||
// and C, and preloads registers D, E and F for the next round.
|
||||
// This macro can be arbitrarily daisy-chained by feeding output registers D, E
|
||||
// and F back into the next round as input registers A, B and C. The macro
|
||||
// carefully interleaves memory operations with data operations for optimal
|
||||
// pipelined performance.
|
||||
|
||||
#define ROUND_3(ROUND, A,B,C,D,E,F) /* A B C D E F */ \
|
||||
LOAD(D, (ROUND + 3), -4) /* V V V + */ \
|
||||
SHUF(B, E, F) /* | | | | + x */ \
|
||||
STOR(A, (ROUND + 0)) /* - | | | | */ \
|
||||
TRAN(B, E, F) /* | | | - x */ \
|
||||
LOAD(E, (ROUND + 4), -4) /* | | | + */ \
|
||||
SHUF(C, A, F) /* + | | | | x */ \
|
||||
STOR(B, (ROUND + 1)) /* | - | | | */ \
|
||||
TRAN(C, A, F) /* - | | | x */ \
|
||||
LOAD(F, (ROUND + 5), -4) /* | | | + */ \
|
||||
SHUF(D, A, B) /* + x | | | | */ \
|
||||
STOR(C, (ROUND + 2)) /* | - | | | */ \
|
||||
TRAN(D, A, B) /* - x V V V */
|
||||
|
||||
// Define a macro that terminates a ROUND_3 macro by taking pre-loaded
|
||||
// registers D, E and F, and translating, shuffling and storing them.
|
||||
#define ROUND_3_END(ROUND, A,B,C,D,E,F) /* A B C D E F */ \
|
||||
SHUF(E, A, B) /* + x V V V */ \
|
||||
STOR(D, (ROUND + 3)) /* | - | | */ \
|
||||
TRAN(E, A, B) /* - x | | */ \
|
||||
SHUF(F, C, D) /* + x | | */ \
|
||||
STOR(E, (ROUND + 4)) /* | - | */ \
|
||||
TRAN(F, C, D) /* - x | */ \
|
||||
STOR(F, (ROUND + 5)) /* - */
|
||||
|
||||
// Define a type A round. Inputs are a, b, and c, outputs are d, e, and f.
|
||||
#define ROUND_3_A(ROUND) \
|
||||
ROUND_3(ROUND, "a", "b", "c", "d", "e", "f")
|
||||
|
||||
// Define a type B round. Inputs and outputs are swapped with regard to type A.
|
||||
#define ROUND_3_B(ROUND) \
|
||||
ROUND_3(ROUND, "d", "e", "f", "a", "b", "c")
|
||||
|
||||
// Terminating macro for a type A round.
|
||||
#define ROUND_3_A_LAST(ROUND) \
|
||||
ROUND_3_A(ROUND) \
|
||||
ROUND_3_END(ROUND, "a", "b", "c", "d", "e", "f")
|
||||
|
||||
// Terminating macro for a type B round.
|
||||
#define ROUND_3_B_LAST(ROUND) \
|
||||
ROUND_3_B(ROUND) \
|
||||
ROUND_3_END(ROUND, "d", "e", "f", "a", "b", "c")
|
||||
|
||||
// Suppress clang's warning that the literal string in the asm statement is
|
||||
// overlong (longer than the ISO-mandated minimum size of 4095 bytes for C99
|
||||
// compilers). It may be true, but the goal here is not C99 portability.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Woverlength-strings"
|
||||
|
||||
static inline void
|
||||
enc_loop_avx2 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
// For a clearer explanation of the algorithm used by this function,
|
||||
// please refer to the plain (not inline assembly) implementation. This
|
||||
// function follows the same basic logic.
|
||||
|
||||
if (*slen < 32) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 24 bytes at a time. Because blocks are loaded 32
|
||||
// bytes at a time an offset of -4, ensure that there will be at least
|
||||
// 4 remaining bytes after the last round, so that the final read will
|
||||
// not pass beyond the bounds of the input buffer.
|
||||
size_t rounds = (*slen - 4) / 24;
|
||||
|
||||
*slen -= rounds * 24; // 24 bytes consumed per round
|
||||
*olen += rounds * 32; // 32 bytes produced per round
|
||||
|
||||
// Pre-decrement the number of rounds to get the number of rounds
|
||||
// *after* the first round, which is handled as a special case.
|
||||
rounds--;
|
||||
|
||||
// Number of times to go through the 36x loop.
|
||||
size_t loops = rounds / 36;
|
||||
|
||||
// Number of rounds remaining after the 36x loop.
|
||||
rounds %= 36;
|
||||
|
||||
// Lookup tables.
|
||||
const __m256i lut0 = _mm256_set_epi8(
|
||||
10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1,
|
||||
14, 15, 13, 14, 11, 12, 10, 11, 8, 9, 7, 8, 5, 6, 4, 5);
|
||||
|
||||
const __m256i lut1 = _mm256_setr_epi8(
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0,
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0);
|
||||
|
||||
// Temporary registers.
|
||||
__m256i a, b, c, d, e;
|
||||
|
||||
// Temporary register f doubles as the shift mask for the first round.
|
||||
__m256i f = _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6);
|
||||
|
||||
__asm__ volatile (
|
||||
|
||||
// The first loop iteration requires special handling to ensure
|
||||
// that the read, which is normally done at an offset of -4,
|
||||
// does not underflow the buffer. Load the buffer at an offset
|
||||
// of 0 and permute the input to achieve the same effect.
|
||||
LOAD("a", 0, 0)
|
||||
"vpermd %[a], %[f], %[a] \n\t"
|
||||
|
||||
// Perform the standard shuffling and translation steps.
|
||||
SHUF("a", "b", "c")
|
||||
TRAN("a", "b", "c")
|
||||
|
||||
// Store the result and increment the source and dest pointers.
|
||||
"vmovdqu %[a], (%[dst]) \n\t"
|
||||
"add $24, %[src] \n\t"
|
||||
"add $32, %[dst] \n\t"
|
||||
|
||||
// If there are 36 rounds or more, enter a 36x unrolled loop of
|
||||
// interleaved encoding rounds. The rounds interleave memory
|
||||
// operations (load/store) with data operations (table lookups,
|
||||
// etc) to maximize pipeline throughput.
|
||||
" test %[loops], %[loops] \n\t"
|
||||
" jz 18f \n\t"
|
||||
" jmp 36f \n\t"
|
||||
" \n\t"
|
||||
".balign 64 \n\t"
|
||||
"36: " ROUND_3_INIT()
|
||||
" " ROUND_3_A( 0)
|
||||
" " ROUND_3_B( 3)
|
||||
" " ROUND_3_A( 6)
|
||||
" " ROUND_3_B( 9)
|
||||
" " ROUND_3_A(12)
|
||||
" " ROUND_3_B(15)
|
||||
" " ROUND_3_A(18)
|
||||
" " ROUND_3_B(21)
|
||||
" " ROUND_3_A(24)
|
||||
" " ROUND_3_B(27)
|
||||
" " ROUND_3_A_LAST(30)
|
||||
" add $(24 * 36), %[src] \n\t"
|
||||
" add $(32 * 36), %[dst] \n\t"
|
||||
" dec %[loops] \n\t"
|
||||
" jnz 36b \n\t"
|
||||
|
||||
// Enter an 18x unrolled loop for rounds of 18 or more.
|
||||
"18: cmp $18, %[rounds] \n\t"
|
||||
" jl 9f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A(0)
|
||||
" " ROUND_3_B(3)
|
||||
" " ROUND_3_A(6)
|
||||
" " ROUND_3_B(9)
|
||||
" " ROUND_3_A_LAST(12)
|
||||
" sub $18, %[rounds] \n\t"
|
||||
" add $(24 * 18), %[src] \n\t"
|
||||
" add $(32 * 18), %[dst] \n\t"
|
||||
|
||||
// Enter a 9x unrolled loop for rounds of 9 or more.
|
||||
"9: cmp $9, %[rounds] \n\t"
|
||||
" jl 6f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A(0)
|
||||
" " ROUND_3_B_LAST(3)
|
||||
" sub $9, %[rounds] \n\t"
|
||||
" add $(24 * 9), %[src] \n\t"
|
||||
" add $(32 * 9), %[dst] \n\t"
|
||||
|
||||
// Enter a 6x unrolled loop for rounds of 6 or more.
|
||||
"6: cmp $6, %[rounds] \n\t"
|
||||
" jl 55f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A_LAST(0)
|
||||
" sub $6, %[rounds] \n\t"
|
||||
" add $(24 * 6), %[src] \n\t"
|
||||
" add $(32 * 6), %[dst] \n\t"
|
||||
|
||||
// Dispatch the remaining rounds 0..5.
|
||||
"55: cmp $3, %[rounds] \n\t"
|
||||
" jg 45f \n\t"
|
||||
" je 3f \n\t"
|
||||
" cmp $1, %[rounds] \n\t"
|
||||
" jg 2f \n\t"
|
||||
" je 1f \n\t"
|
||||
" jmp 0f \n\t"
|
||||
|
||||
"45: cmp $4, %[rounds] \n\t"
|
||||
" je 4f \n\t"
|
||||
|
||||
// Block of non-interlaced encoding rounds, which can each
|
||||
// individually be jumped to. Rounds fall through to the next.
|
||||
"5: " ROUND()
|
||||
"4: " ROUND()
|
||||
"3: " ROUND()
|
||||
"2: " ROUND()
|
||||
"1: " ROUND()
|
||||
"0: \n\t"
|
||||
|
||||
// Outputs (modified).
|
||||
: [rounds] "+r" (rounds),
|
||||
[loops] "+r" (loops),
|
||||
[src] "+r" (*s),
|
||||
[dst] "+r" (*o),
|
||||
[a] "=&x" (a),
|
||||
[b] "=&x" (b),
|
||||
[c] "=&x" (c),
|
||||
[d] "=&x" (d),
|
||||
[e] "=&x" (e),
|
||||
[f] "+x" (f)
|
||||
|
||||
// Inputs (not modified).
|
||||
: [lut0] "x" (lut0),
|
||||
[lut1] "x" (lut1),
|
||||
[msk0] "x" (_mm256_set1_epi32(0x0FC0FC00)),
|
||||
[msk1] "x" (_mm256_set1_epi32(0x04000040)),
|
||||
[msk2] "x" (_mm256_set1_epi32(0x003F03F0)),
|
||||
[msk3] "x" (_mm256_set1_epi32(0x01000010)),
|
||||
[n51] "x" (_mm256_set1_epi8(51)),
|
||||
[n25] "x" (_mm256_set1_epi8(25))
|
||||
|
||||
// Clobbers.
|
||||
: "cc", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
static BASE64_FORCE_INLINE __m256i
|
||||
enc_reshuffle (const __m256i input)
|
||||
{
|
||||
// Translation of the SSSE3 reshuffling algorithm to AVX2. This one
|
||||
// works with shifted (4 bytes) input in order to be able to work
|
||||
// efficiently in the two 128-bit lanes.
|
||||
|
||||
// Input, bytes MSB to LSB:
|
||||
// 0 0 0 0 x w v u t s r q p o n m
|
||||
// l k j i h g f e d c b a 0 0 0 0
|
||||
|
||||
const __m256i in = _mm256_shuffle_epi8(input, _mm256_set_epi8(
|
||||
10, 11, 9, 10,
|
||||
7, 8, 6, 7,
|
||||
4, 5, 3, 4,
|
||||
1, 2, 0, 1,
|
||||
|
||||
14, 15, 13, 14,
|
||||
11, 12, 10, 11,
|
||||
8, 9, 7, 8,
|
||||
5, 6, 4, 5));
|
||||
// in, bytes MSB to LSB:
|
||||
// w x v w
|
||||
// t u s t
|
||||
// q r p q
|
||||
// n o m n
|
||||
// k l j k
|
||||
// h i g h
|
||||
// e f d e
|
||||
// b c a b
|
||||
|
||||
const __m256i t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0FC0FC00));
|
||||
// bits, upper case are most significant bits, lower case are least
|
||||
// significant bits.
|
||||
// 0000wwww XX000000 VVVVVV00 00000000
|
||||
// 0000tttt UU000000 SSSSSS00 00000000
|
||||
// 0000qqqq RR000000 PPPPPP00 00000000
|
||||
// 0000nnnn OO000000 MMMMMM00 00000000
|
||||
// 0000kkkk LL000000 JJJJJJ00 00000000
|
||||
// 0000hhhh II000000 GGGGGG00 00000000
|
||||
// 0000eeee FF000000 DDDDDD00 00000000
|
||||
// 0000bbbb CC000000 AAAAAA00 00000000
|
||||
|
||||
const __m256i t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
|
||||
// 00000000 00wwwwXX 00000000 00VVVVVV
|
||||
// 00000000 00ttttUU 00000000 00SSSSSS
|
||||
// 00000000 00qqqqRR 00000000 00PPPPPP
|
||||
// 00000000 00nnnnOO 00000000 00MMMMMM
|
||||
// 00000000 00kkkkLL 00000000 00JJJJJJ
|
||||
// 00000000 00hhhhII 00000000 00GGGGGG
|
||||
// 00000000 00eeeeFF 00000000 00DDDDDD
|
||||
// 00000000 00bbbbCC 00000000 00AAAAAA
|
||||
|
||||
const __m256i t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003F03F0));
|
||||
// 00000000 00xxxxxx 000000vv WWWW0000
|
||||
// 00000000 00uuuuuu 000000ss TTTT0000
|
||||
// 00000000 00rrrrrr 000000pp QQQQ0000
|
||||
// 00000000 00oooooo 000000mm NNNN0000
|
||||
// 00000000 00llllll 000000jj KKKK0000
|
||||
// 00000000 00iiiiii 000000gg HHHH0000
|
||||
// 00000000 00ffffff 000000dd EEEE0000
|
||||
// 00000000 00cccccc 000000aa BBBB0000
|
||||
|
||||
const __m256i t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
|
||||
// 00xxxxxx 00000000 00vvWWWW 00000000
|
||||
// 00uuuuuu 00000000 00ssTTTT 00000000
|
||||
// 00rrrrrr 00000000 00ppQQQQ 00000000
|
||||
// 00oooooo 00000000 00mmNNNN 00000000
|
||||
// 00llllll 00000000 00jjKKKK 00000000
|
||||
// 00iiiiii 00000000 00ggHHHH 00000000
|
||||
// 00ffffff 00000000 00ddEEEE 00000000
|
||||
// 00cccccc 00000000 00aaBBBB 00000000
|
||||
|
||||
return _mm256_or_si256(t1, t3);
|
||||
// 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
|
||||
// 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
|
||||
// 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
|
||||
// 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
|
||||
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
|
||||
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
|
||||
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
|
||||
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
static BASE64_FORCE_INLINE __m256i
|
||||
enc_translate (const __m256i in)
|
||||
{
|
||||
// A lookup table containing the absolute offsets for all ranges:
|
||||
const __m256i lut = _mm256_setr_epi8(
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0,
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0);
|
||||
|
||||
// Translate values 0..63 to the Base64 alphabet. There are five sets:
|
||||
// # From To Abs Index Characters
|
||||
// 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
// 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
|
||||
// 2 [52..61] [48..57] -4 [2..11] 0123456789
|
||||
// 3 [62] [43] -19 12 +
|
||||
// 4 [63] [47] -16 13 /
|
||||
|
||||
// Create LUT indices from the input. The index for range #0 is right,
|
||||
// others are 1 less than expected:
|
||||
__m256i indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
|
||||
|
||||
// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
|
||||
const __m256i mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
|
||||
|
||||
// Subtract -1, so add 1 to indices for range #[1..4]. All indices are
|
||||
// now correct:
|
||||
indices = _mm256_sub_epi8(indices, mask);
|
||||
|
||||
// Add offsets to input values:
|
||||
return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
|
||||
}
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_AVX512
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute push (__attribute__((target("avx512vbmi"))), apply_to=function)
|
||||
#else
|
||||
#pragma GCC target("avx512vbmi")
|
||||
#endif
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "../avx2/dec_reshuffle.c"
|
||||
#include "../avx2/dec_loop.c"
|
||||
#include "enc_reshuffle_translate.c"
|
||||
#include "enc_loop.c"
|
||||
|
||||
#endif // HAVE_AVX512
|
||||
|
||||
void
|
||||
base64_stream_encode_avx512 BASE64_ENC_PARAMS
|
||||
{
|
||||
#if HAVE_AVX512
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_avx512(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
base64_enc_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Reuse AVX2 decoding. Not supporting AVX512 at present
|
||||
int
|
||||
base64_stream_decode_avx512 BASE64_DEC_PARAMS
|
||||
{
|
||||
#if HAVE_AVX512
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_avx2(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute pop
|
||||
#endif
|
||||
#else
|
||||
return base64_dec_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
static BASE64_FORCE_INLINE void
|
||||
enc_loop_avx512_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// Load input.
|
||||
__m512i src = _mm512_loadu_si512((__m512i *) *s);
|
||||
|
||||
// Reshuffle, translate, store.
|
||||
src = enc_reshuffle_translate(src);
|
||||
_mm512_storeu_si512((__m512i *) *o, src);
|
||||
|
||||
*s += 48;
|
||||
*o += 64;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_avx512 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 64) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 48 bytes at a time. Because blocks are loaded 64
|
||||
// bytes at a time, ensure that there will be at least 24 remaining
|
||||
// bytes after the last round, so that the final read will not pass
|
||||
// beyond the bounds of the input buffer.
|
||||
size_t rounds = (*slen - 24) / 48;
|
||||
|
||||
*slen -= rounds * 48; // 48 bytes consumed per round
|
||||
*olen += rounds * 64; // 64 bytes produced per round
|
||||
|
||||
while (rounds > 0) {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_avx512_inner(s, o);
|
||||
enc_loop_avx512_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_avx512_inner(s, o);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
// AVX512 algorithm is based on permutevar and multishift. The code is based on
|
||||
// https://github.com/WojciechMula/base64simd which is under BSD-2 license.
|
||||
|
||||
static BASE64_FORCE_INLINE __m512i
|
||||
enc_reshuffle_translate (const __m512i input)
|
||||
{
|
||||
// 32-bit input
|
||||
// [ 0 0 0 0 0 0 0 0|c1 c0 d5 d4 d3 d2 d1 d0|
|
||||
// b3 b2 b1 b0 c5 c4 c3 c2|a5 a4 a3 a2 a1 a0 b5 b4]
|
||||
// output order [1, 2, 0, 1]
|
||||
// [b3 b2 b1 b0 c5 c4 c3 c2|c1 c0 d5 d4 d3 d2 d1 d0|
|
||||
// a5 a4 a3 a2 a1 a0 b5 b4|b3 b2 b1 b0 c3 c2 c1 c0]
|
||||
|
||||
const __m512i shuffle_input = _mm512_setr_epi32(0x01020001,
|
||||
0x04050304,
|
||||
0x07080607,
|
||||
0x0a0b090a,
|
||||
0x0d0e0c0d,
|
||||
0x10110f10,
|
||||
0x13141213,
|
||||
0x16171516,
|
||||
0x191a1819,
|
||||
0x1c1d1b1c,
|
||||
0x1f201e1f,
|
||||
0x22232122,
|
||||
0x25262425,
|
||||
0x28292728,
|
||||
0x2b2c2a2b,
|
||||
0x2e2f2d2e);
|
||||
|
||||
// Reorder bytes
|
||||
// [b3 b2 b1 b0 c5 c4 c3 c2|c1 c0 d5 d4 d3 d2 d1 d0|
|
||||
// a5 a4 a3 a2 a1 a0 b5 b4|b3 b2 b1 b0 c3 c2 c1 c0]
|
||||
const __m512i in = _mm512_permutexvar_epi8(shuffle_input, input);
|
||||
|
||||
// After multishift a single 32-bit lane has following layout
|
||||
// [c1 c0 d5 d4 d3 d2 d1 d0|b1 b0 c5 c4 c3 c2 c1 c0|
|
||||
// a1 a0 b5 b4 b3 b2 b1 b0|d1 d0 a5 a4 a3 a2 a1 a0]
|
||||
// (a = [10:17], b = [4:11], c = [22:27], d = [16:21])
|
||||
|
||||
// 48, 54, 36, 42, 16, 22, 4, 10
|
||||
const __m512i shifts = _mm512_set1_epi64(0x3036242a1016040alu);
|
||||
__m512i shuffled_in = _mm512_multishift_epi64_epi8(shifts, in);
|
||||
|
||||
// Translate immediately after reshuffled.
|
||||
const __m512i lookup = _mm512_loadu_si512(base64_table_enc_6bit);
|
||||
|
||||
// Translation 6-bit values to ASCII.
|
||||
return _mm512_permutexvar_epi8(shuffled_in, lookup);
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
static BASE64_FORCE_INLINE int
|
||||
dec_loop_generic_32_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
|
||||
{
|
||||
const uint32_t str
|
||||
= base64_table_dec_32bit_d0[(*s)[0]]
|
||||
| base64_table_dec_32bit_d1[(*s)[1]]
|
||||
| base64_table_dec_32bit_d2[(*s)[2]]
|
||||
| base64_table_dec_32bit_d3[(*s)[3]];
|
||||
|
||||
#if BASE64_LITTLE_ENDIAN
|
||||
|
||||
// LUTs for little-endian set MSB in case of invalid character:
|
||||
if (str & UINT32_C(0x80000000)) {
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
// LUTs for big-endian set LSB in case of invalid character:
|
||||
if (str & UINT32_C(1)) {
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
// Store the output:
|
||||
memcpy(*o, &str, sizeof (str));
|
||||
|
||||
*s += 4;
|
||||
*o += 3;
|
||||
*rounds -= 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_loop_generic_32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 8) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 4 bytes per round. Because one extra zero byte is
|
||||
// written after the output, ensure that there will be at least 4 bytes
|
||||
// of input data left to cover the gap. (Two data bytes and up to two
|
||||
// end-of-string markers.)
|
||||
size_t rounds = (*slen - 4) / 4;
|
||||
|
||||
*slen -= rounds * 4; // 4 bytes consumed per round
|
||||
*olen += rounds * 3; // 3 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
if (dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
if (dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
if (dec_loop_generic_32_inner(s, o, &rounds) &&
|
||||
dec_loop_generic_32_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
dec_loop_generic_32_inner(s, o, &rounds);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 4;
|
||||
*olen -= rounds * 3;
|
||||
}
|
||||
|
|
@ -0,0 +1,73 @@
|
|||
static BASE64_FORCE_INLINE void
|
||||
enc_loop_generic_32_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
uint32_t src;
|
||||
|
||||
// Load input:
|
||||
memcpy(&src, *s, sizeof (src));
|
||||
|
||||
// Reorder to 32-bit big-endian, if not already in that format. The
|
||||
// workset must be in big-endian, otherwise the shifted bits do not
|
||||
// carry over properly among adjacent bytes:
|
||||
src = BASE64_HTOBE32(src);
|
||||
|
||||
// Two indices for the 12-bit lookup table:
|
||||
const size_t index0 = (src >> 20) & 0xFFFU;
|
||||
const size_t index1 = (src >> 8) & 0xFFFU;
|
||||
|
||||
// Table lookup and store:
|
||||
memcpy(*o + 0, base64_table_enc_12bit + index0, 2);
|
||||
memcpy(*o + 2, base64_table_enc_12bit + index1, 2);
|
||||
|
||||
*s += 3;
|
||||
*o += 4;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_generic_32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 4) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 3 bytes at a time. Because blocks are loaded 4
|
||||
// bytes at a time, ensure that there will be at least one remaining
|
||||
// byte after the last round, so that the final read will not pass
|
||||
// beyond the bounds of the input buffer:
|
||||
size_t rounds = (*slen - 1) / 3;
|
||||
|
||||
*slen -= rounds * 3; // 3 bytes consumed per round
|
||||
*olen += rounds * 4; // 4 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_generic_32_inner(s, o);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
}
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
static BASE64_FORCE_INLINE void
|
||||
enc_loop_generic_64_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
uint64_t src;
|
||||
|
||||
// Load input:
|
||||
memcpy(&src, *s, sizeof (src));
|
||||
|
||||
// Reorder to 64-bit big-endian, if not already in that format. The
|
||||
// workset must be in big-endian, otherwise the shifted bits do not
|
||||
// carry over properly among adjacent bytes:
|
||||
src = BASE64_HTOBE64(src);
|
||||
|
||||
// Four indices for the 12-bit lookup table:
|
||||
const size_t index0 = (src >> 52) & 0xFFFU;
|
||||
const size_t index1 = (src >> 40) & 0xFFFU;
|
||||
const size_t index2 = (src >> 28) & 0xFFFU;
|
||||
const size_t index3 = (src >> 16) & 0xFFFU;
|
||||
|
||||
// Table lookup and store:
|
||||
memcpy(*o + 0, base64_table_enc_12bit + index0, 2);
|
||||
memcpy(*o + 2, base64_table_enc_12bit + index1, 2);
|
||||
memcpy(*o + 4, base64_table_enc_12bit + index2, 2);
|
||||
memcpy(*o + 6, base64_table_enc_12bit + index3, 2);
|
||||
|
||||
*s += 6;
|
||||
*o += 8;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_generic_64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 8) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 6 bytes at a time. Because blocks are loaded 8
|
||||
// bytes at a time, ensure that there will be at least 2 remaining
|
||||
// bytes after the last round, so that the final read will not pass
|
||||
// beyond the bounds of the input buffer:
|
||||
size_t rounds = (*slen - 2) / 6;
|
||||
|
||||
*slen -= rounds * 6; // 6 bytes consumed per round
|
||||
*olen += rounds * 8; // 8 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_generic_64_inner(s, o);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if BASE64_WORDSIZE == 32
|
||||
# include "32/enc_loop.c"
|
||||
#elif BASE64_WORDSIZE == 64
|
||||
# include "64/enc_loop.c"
|
||||
#endif
|
||||
|
||||
#if BASE64_WORDSIZE >= 32
|
||||
# include "32/dec_loop.c"
|
||||
#endif
|
||||
|
||||
void
|
||||
base64_stream_encode_plain BASE64_ENC_PARAMS
|
||||
{
|
||||
#include "enc_head.c"
|
||||
#if BASE64_WORDSIZE == 32
|
||||
enc_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#elif BASE64_WORDSIZE == 64
|
||||
enc_loop_generic_64(&s, &slen, &o, &olen);
|
||||
#endif
|
||||
#include "enc_tail.c"
|
||||
}
|
||||
|
||||
int
|
||||
base64_stream_decode_plain BASE64_DEC_PARAMS
|
||||
{
|
||||
#include "dec_head.c"
|
||||
#if BASE64_WORDSIZE >= 32
|
||||
dec_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#endif
|
||||
#include "dec_tail.c"
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
int ret = 0;
|
||||
const uint8_t *s = (const uint8_t *) src;
|
||||
uint8_t *o = (uint8_t *) out;
|
||||
uint8_t q;
|
||||
|
||||
// Use local temporaries to avoid cache thrashing:
|
||||
size_t olen = 0;
|
||||
size_t slen = srclen;
|
||||
struct base64_state st;
|
||||
st.eof = state->eof;
|
||||
st.bytes = state->bytes;
|
||||
st.carry = state->carry;
|
||||
|
||||
// If we previously saw an EOF or an invalid character, bail out:
|
||||
if (st.eof) {
|
||||
*outlen = 0;
|
||||
ret = 0;
|
||||
// If there was a trailing '=' to check, check it:
|
||||
if (slen && (st.eof == BASE64_AEOF)) {
|
||||
state->bytes = 0;
|
||||
state->eof = BASE64_EOF;
|
||||
ret = ((base64_table_dec_8bit[*s++] == 254) && (slen == 1)) ? 1 : 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Turn four 6-bit numbers into three bytes:
|
||||
// out[0] = 11111122
|
||||
// out[1] = 22223333
|
||||
// out[2] = 33444444
|
||||
|
||||
// Duff's device again:
|
||||
switch (st.bytes)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
case 0:
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
if (slen-- == 0) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
if ((q = base64_table_dec_8bit[*s++]) >= 254) {
|
||||
st.eof = BASE64_EOF;
|
||||
// Treat character '=' as invalid for byte 0:
|
||||
break;
|
||||
}
|
||||
st.carry = q << 2;
|
||||
st.bytes++;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 1: if (slen-- == 0) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
if ((q = base64_table_dec_8bit[*s++]) >= 254) {
|
||||
st.eof = BASE64_EOF;
|
||||
// Treat character '=' as invalid for byte 1:
|
||||
break;
|
||||
}
|
||||
*o++ = st.carry | (q >> 4);
|
||||
st.carry = q << 4;
|
||||
st.bytes++;
|
||||
olen++;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 2: if (slen-- == 0) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
if ((q = base64_table_dec_8bit[*s++]) >= 254) {
|
||||
st.bytes++;
|
||||
// When q == 254, the input char is '='.
|
||||
// Check if next byte is also '=':
|
||||
if (q == 254) {
|
||||
if (slen-- != 0) {
|
||||
st.bytes = 0;
|
||||
// EOF:
|
||||
st.eof = BASE64_EOF;
|
||||
q = base64_table_dec_8bit[*s++];
|
||||
ret = ((q == 254) && (slen == 0)) ? 1 : 0;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
// Almost EOF
|
||||
st.eof = BASE64_AEOF;
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If we get here, there was an error:
|
||||
break;
|
||||
}
|
||||
*o++ = st.carry | (q >> 2);
|
||||
st.carry = q << 6;
|
||||
st.bytes++;
|
||||
olen++;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 3: if (slen-- == 0) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
if ((q = base64_table_dec_8bit[*s++]) >= 254) {
|
||||
st.bytes = 0;
|
||||
st.eof = BASE64_EOF;
|
||||
// When q == 254, the input char is '='. Return 1 and EOF.
|
||||
// When q == 255, the input char is invalid. Return 0 and EOF.
|
||||
ret = ((q == 254) && (slen == 0)) ? 1 : 0;
|
||||
break;
|
||||
}
|
||||
*o++ = st.carry | q;
|
||||
st.carry = 0;
|
||||
st.bytes = 0;
|
||||
olen++;
|
||||
}
|
||||
}
|
||||
|
||||
state->eof = st.eof;
|
||||
state->bytes = st.bytes;
|
||||
state->carry = st.carry;
|
||||
*outlen = olen;
|
||||
return ret;
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
// Assume that *out is large enough to contain the output.
|
||||
// Theoretically it should be 4/3 the length of src.
|
||||
const uint8_t *s = (const uint8_t *) src;
|
||||
uint8_t *o = (uint8_t *) out;
|
||||
|
||||
// Use local temporaries to avoid cache thrashing:
|
||||
size_t olen = 0;
|
||||
size_t slen = srclen;
|
||||
struct base64_state st;
|
||||
st.bytes = state->bytes;
|
||||
st.carry = state->carry;
|
||||
|
||||
// Turn three bytes into four 6-bit numbers:
|
||||
// in[0] = 00111111
|
||||
// in[1] = 00112222
|
||||
// in[2] = 00222233
|
||||
// in[3] = 00333333
|
||||
|
||||
// Duff's device, a for() loop inside a switch() statement. Legal!
|
||||
switch (st.bytes)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
case 0:
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
if (slen-- == 0) {
|
||||
break;
|
||||
}
|
||||
*o++ = base64_table_enc_6bit[*s >> 2];
|
||||
st.carry = (*s++ << 4) & 0x30;
|
||||
st.bytes++;
|
||||
olen += 1;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 1: if (slen-- == 0) {
|
||||
break;
|
||||
}
|
||||
*o++ = base64_table_enc_6bit[st.carry | (*s >> 4)];
|
||||
st.carry = (*s++ << 2) & 0x3C;
|
||||
st.bytes++;
|
||||
olen += 1;
|
||||
|
||||
// Deliberate fallthrough:
|
||||
BASE64_FALLTHROUGH
|
||||
|
||||
case 2: if (slen-- == 0) {
|
||||
break;
|
||||
}
|
||||
*o++ = base64_table_enc_6bit[st.carry | (*s >> 6)];
|
||||
*o++ = base64_table_enc_6bit[*s++ & 0x3F];
|
||||
st.bytes = 0;
|
||||
olen += 2;
|
||||
}
|
||||
}
|
||||
state->bytes = st.bytes;
|
||||
state->carry = st.carry;
|
||||
*outlen = olen;
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#ifdef __arm__
|
||||
# if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON32
|
||||
# define BASE64_USE_NEON32
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef BASE64_USE_NEON32
|
||||
#include <arm_neon.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers.
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define BASE64_NEON32_USE_ASM
|
||||
#endif
|
||||
|
||||
static BASE64_FORCE_INLINE uint8x16_t
|
||||
vqtbl1q_u8 (const uint8x16_t lut, const uint8x16_t indices)
|
||||
{
|
||||
// NEON32 only supports 64-bit wide lookups in 128-bit tables. Emulate
|
||||
// the NEON64 `vqtbl1q_u8` intrinsic to do 128-bit wide lookups.
|
||||
uint8x8x2_t lut2;
|
||||
uint8x8x2_t result;
|
||||
|
||||
lut2.val[0] = vget_low_u8(lut);
|
||||
lut2.val[1] = vget_high_u8(lut);
|
||||
|
||||
result.val[0] = vtbl2_u8(lut2, vget_low_u8(indices));
|
||||
result.val[1] = vtbl2_u8(lut2, vget_high_u8(indices));
|
||||
|
||||
return vcombine_u8(result.val[0], result.val[1]);
|
||||
}
|
||||
|
||||
#include "../generic/32/dec_loop.c"
|
||||
#include "../generic/32/enc_loop.c"
|
||||
#include "dec_loop.c"
|
||||
#include "enc_reshuffle.c"
|
||||
#include "enc_translate.c"
|
||||
#include "enc_loop.c"
|
||||
|
||||
#endif // BASE64_USE_NEON32
|
||||
|
||||
// Stride size is so large on these NEON 32-bit functions
|
||||
// (48 bytes encode, 32 bytes decode) that we inline the
|
||||
// uint32 codec to stay performant on smaller inputs.
|
||||
|
||||
void
|
||||
base64_stream_encode_neon32 BASE64_ENC_PARAMS
|
||||
{
|
||||
#ifdef BASE64_USE_NEON32
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_neon32(&s, &slen, &o, &olen);
|
||||
enc_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
base64_enc_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
base64_stream_decode_neon32 BASE64_DEC_PARAMS
|
||||
{
|
||||
#ifdef BASE64_USE_NEON32
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_neon32(&s, &slen, &o, &olen);
|
||||
dec_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
return base64_dec_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
static BASE64_FORCE_INLINE int
|
||||
is_nonzero (const uint8x16_t v)
|
||||
{
|
||||
uint64_t u64;
|
||||
const uint64x2_t v64 = vreinterpretq_u64_u8(v);
|
||||
const uint32x2_t v32 = vqmovn_u64(v64);
|
||||
|
||||
vst1_u64(&u64, vreinterpret_u64_u32(v32));
|
||||
return u64 != 0;
|
||||
}
|
||||
|
||||
static BASE64_FORCE_INLINE uint8x16_t
|
||||
delta_lookup (const uint8x16_t v)
|
||||
{
|
||||
const uint8x8_t lut = {
|
||||
0, 16, 19, 4, (uint8_t) -65, (uint8_t) -65, (uint8_t) -71, (uint8_t) -71,
|
||||
};
|
||||
|
||||
return vcombine_u8(
|
||||
vtbl1_u8(lut, vget_low_u8(v)),
|
||||
vtbl1_u8(lut, vget_high_u8(v)));
|
||||
}
|
||||
|
||||
static BASE64_FORCE_INLINE uint8x16_t
|
||||
dec_loop_neon32_lane (uint8x16_t *lane)
|
||||
{
|
||||
// See the SSSE3 decoder for an explanation of the algorithm.
|
||||
const uint8x16_t lut_lo = {
|
||||
0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A
|
||||
};
|
||||
|
||||
const uint8x16_t lut_hi = {
|
||||
0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
|
||||
};
|
||||
|
||||
const uint8x16_t mask_0F = vdupq_n_u8(0x0F);
|
||||
const uint8x16_t mask_2F = vdupq_n_u8(0x2F);
|
||||
|
||||
const uint8x16_t hi_nibbles = vshrq_n_u8(*lane, 4);
|
||||
const uint8x16_t lo_nibbles = vandq_u8(*lane, mask_0F);
|
||||
const uint8x16_t eq_2F = vceqq_u8(*lane, mask_2F);
|
||||
|
||||
const uint8x16_t hi = vqtbl1q_u8(lut_hi, hi_nibbles);
|
||||
const uint8x16_t lo = vqtbl1q_u8(lut_lo, lo_nibbles);
|
||||
|
||||
// Now simply add the delta values to the input:
|
||||
*lane = vaddq_u8(*lane, delta_lookup(vaddq_u8(eq_2F, hi_nibbles)));
|
||||
|
||||
// Return the validity mask:
|
||||
return vandq_u8(lo, hi);
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_loop_neon32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 64) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 64 bytes per round. Unlike the SSE codecs, no
|
||||
// extra trailing zero bytes are written, so it is not necessary to
|
||||
// reserve extra input bytes:
|
||||
size_t rounds = *slen / 64;
|
||||
|
||||
*slen -= rounds * 64; // 64 bytes consumed per round
|
||||
*olen += rounds * 48; // 48 bytes produced per round
|
||||
|
||||
do {
|
||||
uint8x16x3_t dec;
|
||||
|
||||
// Load 64 bytes and deinterleave:
|
||||
uint8x16x4_t str = vld4q_u8(*s);
|
||||
|
||||
// Decode each lane, collect a mask of invalid inputs:
|
||||
const uint8x16_t classified
|
||||
= dec_loop_neon32_lane(&str.val[0])
|
||||
| dec_loop_neon32_lane(&str.val[1])
|
||||
| dec_loop_neon32_lane(&str.val[2])
|
||||
| dec_loop_neon32_lane(&str.val[3]);
|
||||
|
||||
// Check for invalid input: if any of the delta values are
|
||||
// zero, fall back on bytewise code to do error checking and
|
||||
// reporting:
|
||||
if (is_nonzero(classified)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Compress four bytes into three:
|
||||
dec.val[0] = vorrq_u8(vshlq_n_u8(str.val[0], 2), vshrq_n_u8(str.val[1], 4));
|
||||
dec.val[1] = vorrq_u8(vshlq_n_u8(str.val[1], 4), vshrq_n_u8(str.val[2], 2));
|
||||
dec.val[2] = vorrq_u8(vshlq_n_u8(str.val[2], 6), str.val[3]);
|
||||
|
||||
// Interleave and store decoded result:
|
||||
vst3q_u8(*o, dec);
|
||||
|
||||
*s += 64;
|
||||
*o += 48;
|
||||
|
||||
} while (--rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 64;
|
||||
*olen -= rounds * 48;
|
||||
}
|
||||
|
|
@ -0,0 +1,170 @@
|
|||
#ifdef BASE64_NEON32_USE_ASM
|
||||
static BASE64_FORCE_INLINE void
|
||||
enc_loop_neon32_inner_asm (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// This function duplicates the functionality of enc_loop_neon32_inner,
|
||||
// but entirely with inline assembly. This gives a significant speedup
|
||||
// over using NEON intrinsics, which do not always generate very good
|
||||
// code. The logic of the assembly is directly lifted from the
|
||||
// intrinsics version, so it can be used as a guide to this code.
|
||||
|
||||
// Temporary registers, used as scratch space.
|
||||
uint8x16_t tmp0, tmp1, tmp2, tmp3;
|
||||
uint8x16_t mask0, mask1, mask2, mask3;
|
||||
|
||||
// A lookup table containing the absolute offsets for all ranges.
|
||||
const uint8x16_t lut = {
|
||||
65U, 71U, 252U, 252U,
|
||||
252U, 252U, 252U, 252U,
|
||||
252U, 252U, 252U, 252U,
|
||||
237U, 240U, 0U, 0U
|
||||
};
|
||||
|
||||
// Numeric constants.
|
||||
const uint8x16_t n51 = vdupq_n_u8(51);
|
||||
const uint8x16_t n25 = vdupq_n_u8(25);
|
||||
const uint8x16_t n63 = vdupq_n_u8(63);
|
||||
|
||||
__asm__ (
|
||||
|
||||
// Load 48 bytes and deinterleave. The bytes are loaded to
|
||||
// hard-coded registers q12, q13 and q14, to ensure that they
|
||||
// are contiguous. Increment the source pointer.
|
||||
"vld3.8 {d24, d26, d28}, [%[src]]! \n\t"
|
||||
"vld3.8 {d25, d27, d29}, [%[src]]! \n\t"
|
||||
|
||||
// Reshuffle the bytes using temporaries.
|
||||
"vshr.u8 %q[t0], q12, #2 \n\t"
|
||||
"vshr.u8 %q[t1], q13, #4 \n\t"
|
||||
"vshr.u8 %q[t2], q14, #6 \n\t"
|
||||
"vsli.8 %q[t1], q12, #4 \n\t"
|
||||
"vsli.8 %q[t2], q13, #2 \n\t"
|
||||
"vand.u8 %q[t1], %q[t1], %q[n63] \n\t"
|
||||
"vand.u8 %q[t2], %q[t2], %q[n63] \n\t"
|
||||
"vand.u8 %q[t3], q14, %q[n63] \n\t"
|
||||
|
||||
// t0..t3 are the reshuffled inputs. Create LUT indices.
|
||||
"vqsub.u8 q12, %q[t0], %q[n51] \n\t"
|
||||
"vqsub.u8 q13, %q[t1], %q[n51] \n\t"
|
||||
"vqsub.u8 q14, %q[t2], %q[n51] \n\t"
|
||||
"vqsub.u8 q15, %q[t3], %q[n51] \n\t"
|
||||
|
||||
// Create the mask for range #0.
|
||||
"vcgt.u8 %q[m0], %q[t0], %q[n25] \n\t"
|
||||
"vcgt.u8 %q[m1], %q[t1], %q[n25] \n\t"
|
||||
"vcgt.u8 %q[m2], %q[t2], %q[n25] \n\t"
|
||||
"vcgt.u8 %q[m3], %q[t3], %q[n25] \n\t"
|
||||
|
||||
// Subtract -1 to correct the LUT indices.
|
||||
"vsub.u8 q12, %q[m0] \n\t"
|
||||
"vsub.u8 q13, %q[m1] \n\t"
|
||||
"vsub.u8 q14, %q[m2] \n\t"
|
||||
"vsub.u8 q15, %q[m3] \n\t"
|
||||
|
||||
// Lookup the delta values.
|
||||
"vtbl.u8 d24, {%q[lut]}, d24 \n\t"
|
||||
"vtbl.u8 d25, {%q[lut]}, d25 \n\t"
|
||||
"vtbl.u8 d26, {%q[lut]}, d26 \n\t"
|
||||
"vtbl.u8 d27, {%q[lut]}, d27 \n\t"
|
||||
"vtbl.u8 d28, {%q[lut]}, d28 \n\t"
|
||||
"vtbl.u8 d29, {%q[lut]}, d29 \n\t"
|
||||
"vtbl.u8 d30, {%q[lut]}, d30 \n\t"
|
||||
"vtbl.u8 d31, {%q[lut]}, d31 \n\t"
|
||||
|
||||
// Add the delta values.
|
||||
"vadd.u8 q12, %q[t0] \n\t"
|
||||
"vadd.u8 q13, %q[t1] \n\t"
|
||||
"vadd.u8 q14, %q[t2] \n\t"
|
||||
"vadd.u8 q15, %q[t3] \n\t"
|
||||
|
||||
// Store 64 bytes and interleave. Increment the dest pointer.
|
||||
"vst4.8 {d24, d26, d28, d30}, [%[dst]]! \n\t"
|
||||
"vst4.8 {d25, d27, d29, d31}, [%[dst]]! \n\t"
|
||||
|
||||
// Outputs (modified).
|
||||
: [src] "+r" (*s),
|
||||
[dst] "+r" (*o),
|
||||
[t0] "=&w" (tmp0),
|
||||
[t1] "=&w" (tmp1),
|
||||
[t2] "=&w" (tmp2),
|
||||
[t3] "=&w" (tmp3),
|
||||
[m0] "=&w" (mask0),
|
||||
[m1] "=&w" (mask1),
|
||||
[m2] "=&w" (mask2),
|
||||
[m3] "=&w" (mask3)
|
||||
|
||||
// Inputs (not modified).
|
||||
: [lut] "w" (lut),
|
||||
[n25] "w" (n25),
|
||||
[n51] "w" (n51),
|
||||
[n63] "w" (n63)
|
||||
|
||||
// Clobbers.
|
||||
: "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
|
||||
"cc", "memory"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
static BASE64_FORCE_INLINE void
|
||||
enc_loop_neon32_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
#ifdef BASE64_NEON32_USE_ASM
|
||||
enc_loop_neon32_inner_asm(s, o);
|
||||
#else
|
||||
// Load 48 bytes and deinterleave:
|
||||
uint8x16x3_t src = vld3q_u8(*s);
|
||||
|
||||
// Reshuffle:
|
||||
uint8x16x4_t out = enc_reshuffle(src);
|
||||
|
||||
// Translate reshuffled bytes to the Base64 alphabet:
|
||||
out = enc_translate(out);
|
||||
|
||||
// Interleave and store output:
|
||||
vst4q_u8(*o, out);
|
||||
|
||||
*s += 48;
|
||||
*o += 64;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_neon32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
size_t rounds = *slen / 48;
|
||||
|
||||
*slen -= rounds * 48; // 48 bytes consumed per round
|
||||
*olen += rounds * 64; // 64 bytes produced per round
|
||||
|
||||
while (rounds > 0) {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_neon32_inner(s, o);
|
||||
enc_loop_neon32_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_neon32_inner(s, o);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
static BASE64_FORCE_INLINE uint8x16x4_t
|
||||
enc_reshuffle (uint8x16x3_t in)
|
||||
{
|
||||
uint8x16x4_t out;
|
||||
|
||||
// Input:
|
||||
// in[0] = a7 a6 a5 a4 a3 a2 a1 a0
|
||||
// in[1] = b7 b6 b5 b4 b3 b2 b1 b0
|
||||
// in[2] = c7 c6 c5 c4 c3 c2 c1 c0
|
||||
|
||||
// Output:
|
||||
// out[0] = 00 00 a7 a6 a5 a4 a3 a2
|
||||
// out[1] = 00 00 a1 a0 b7 b6 b5 b4
|
||||
// out[2] = 00 00 b3 b2 b1 b0 c7 c6
|
||||
// out[3] = 00 00 c5 c4 c3 c2 c1 c0
|
||||
|
||||
// Move the input bits to where they need to be in the outputs. Except
|
||||
// for the first output, the high two bits are not cleared.
|
||||
out.val[0] = vshrq_n_u8(in.val[0], 2);
|
||||
out.val[1] = vshrq_n_u8(in.val[1], 4);
|
||||
out.val[2] = vshrq_n_u8(in.val[2], 6);
|
||||
out.val[1] = vsliq_n_u8(out.val[1], in.val[0], 4);
|
||||
out.val[2] = vsliq_n_u8(out.val[2], in.val[1], 2);
|
||||
|
||||
// Clear the high two bits in the second, third and fourth output.
|
||||
out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
|
||||
out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
|
||||
out.val[3] = vandq_u8(in.val[2], vdupq_n_u8(0x3F));
|
||||
|
||||
return out;
|
||||
}
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
static BASE64_FORCE_INLINE uint8x16x4_t
|
||||
enc_translate (const uint8x16x4_t in)
|
||||
{
|
||||
// A lookup table containing the absolute offsets for all ranges:
|
||||
const uint8x16_t lut = {
|
||||
65U, 71U, 252U, 252U,
|
||||
252U, 252U, 252U, 252U,
|
||||
252U, 252U, 252U, 252U,
|
||||
237U, 240U, 0U, 0U
|
||||
};
|
||||
|
||||
const uint8x16_t offset = vdupq_n_u8(51);
|
||||
|
||||
uint8x16x4_t indices, mask, delta, out;
|
||||
|
||||
// Translate values 0..63 to the Base64 alphabet. There are five sets:
|
||||
// # From To Abs Index Characters
|
||||
// 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
// 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
|
||||
// 2 [52..61] [48..57] -4 [2..11] 0123456789
|
||||
// 3 [62] [43] -19 12 +
|
||||
// 4 [63] [47] -16 13 /
|
||||
|
||||
// Create LUT indices from input:
|
||||
// the index for range #0 is right, others are 1 less than expected:
|
||||
indices.val[0] = vqsubq_u8(in.val[0], offset);
|
||||
indices.val[1] = vqsubq_u8(in.val[1], offset);
|
||||
indices.val[2] = vqsubq_u8(in.val[2], offset);
|
||||
indices.val[3] = vqsubq_u8(in.val[3], offset);
|
||||
|
||||
// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
|
||||
mask.val[0] = vcgtq_u8(in.val[0], vdupq_n_u8(25));
|
||||
mask.val[1] = vcgtq_u8(in.val[1], vdupq_n_u8(25));
|
||||
mask.val[2] = vcgtq_u8(in.val[2], vdupq_n_u8(25));
|
||||
mask.val[3] = vcgtq_u8(in.val[3], vdupq_n_u8(25));
|
||||
|
||||
// Subtract -1, so add 1 to indices for range #[1..4], All indices are
|
||||
// now correct:
|
||||
indices.val[0] = vsubq_u8(indices.val[0], mask.val[0]);
|
||||
indices.val[1] = vsubq_u8(indices.val[1], mask.val[1]);
|
||||
indices.val[2] = vsubq_u8(indices.val[2], mask.val[2]);
|
||||
indices.val[3] = vsubq_u8(indices.val[3], mask.val[3]);
|
||||
|
||||
// Lookup delta values:
|
||||
delta.val[0] = vqtbl1q_u8(lut, indices.val[0]);
|
||||
delta.val[1] = vqtbl1q_u8(lut, indices.val[1]);
|
||||
delta.val[2] = vqtbl1q_u8(lut, indices.val[2]);
|
||||
delta.val[3] = vqtbl1q_u8(lut, indices.val[3]);
|
||||
|
||||
// Add delta values:
|
||||
out.val[0] = vaddq_u8(in.val[0], delta.val[0]);
|
||||
out.val[1] = vaddq_u8(in.val[1], delta.val[1]);
|
||||
out.val[2] = vaddq_u8(in.val[2], delta.val[2]);
|
||||
out.val[3] = vaddq_u8(in.val[3], delta.val[3]);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_NEON64
|
||||
#include <arm_neon.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers.
|
||||
#if !defined(__wasm__) && (defined(__GNUC__) || defined(__clang__))
|
||||
#define BASE64_NEON64_USE_ASM
|
||||
#endif
|
||||
|
||||
static BASE64_FORCE_INLINE uint8x16x4_t
|
||||
load_64byte_table (const uint8_t *p)
|
||||
{
|
||||
#ifdef BASE64_NEON64_USE_ASM
|
||||
|
||||
// Force the table to be loaded into contiguous registers. GCC will not
|
||||
// normally allocate contiguous registers for a `uint8x16x4_t'. These
|
||||
// registers are chosen to not conflict with the ones in the enc loop.
|
||||
register uint8x16_t t0 __asm__ ("v8");
|
||||
register uint8x16_t t1 __asm__ ("v9");
|
||||
register uint8x16_t t2 __asm__ ("v10");
|
||||
register uint8x16_t t3 __asm__ ("v11");
|
||||
|
||||
__asm__ (
|
||||
"ld1 {%[t0].16b, %[t1].16b, %[t2].16b, %[t3].16b}, [%[src]], #64 \n\t"
|
||||
: [src] "+r" (p),
|
||||
[t0] "=w" (t0),
|
||||
[t1] "=w" (t1),
|
||||
[t2] "=w" (t2),
|
||||
[t3] "=w" (t3)
|
||||
);
|
||||
|
||||
return (uint8x16x4_t) {
|
||||
.val[0] = t0,
|
||||
.val[1] = t1,
|
||||
.val[2] = t2,
|
||||
.val[3] = t3,
|
||||
};
|
||||
#else
|
||||
return vld1q_u8_x4(p);
|
||||
#endif
|
||||
}
|
||||
|
||||
#include "../generic/32/dec_loop.c"
|
||||
#include "../generic/64/enc_loop.c"
|
||||
#include "dec_loop.c"
|
||||
|
||||
#ifdef BASE64_NEON64_USE_ASM
|
||||
# include "enc_loop_asm.c"
|
||||
#else
|
||||
# include "enc_reshuffle.c"
|
||||
# include "enc_loop.c"
|
||||
#endif
|
||||
|
||||
#endif // HAVE_NEON64
|
||||
|
||||
// Stride size is so large on these NEON 64-bit functions
|
||||
// (48 bytes encode, 64 bytes decode) that we inline the
|
||||
// uint64 codec to stay performant on smaller inputs.
|
||||
|
||||
void
|
||||
base64_stream_encode_neon64 BASE64_ENC_PARAMS
|
||||
{
|
||||
#if HAVE_NEON64
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_neon64(&s, &slen, &o, &olen);
|
||||
enc_loop_generic_64(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
base64_enc_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
base64_stream_decode_neon64 BASE64_DEC_PARAMS
|
||||
{
|
||||
#if HAVE_NEON64
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_neon64(&s, &slen, &o, &olen);
|
||||
dec_loop_generic_32(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#else
|
||||
return base64_dec_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
// The input consists of five valid character sets in the Base64 alphabet,
|
||||
// which we need to map back to the 6-bit values they represent.
|
||||
// There are three ranges, two singles, and then there's the rest.
|
||||
//
|
||||
// # From To LUT Characters
|
||||
// 1 [0..42] [255] #1 invalid input
|
||||
// 2 [43] [62] #1 +
|
||||
// 3 [44..46] [255] #1 invalid input
|
||||
// 4 [47] [63] #1 /
|
||||
// 5 [48..57] [52..61] #1 0..9
|
||||
// 6 [58..63] [255] #1 invalid input
|
||||
// 7 [64] [255] #2 invalid input
|
||||
// 8 [65..90] [0..25] #2 A..Z
|
||||
// 9 [91..96] [255] #2 invalid input
|
||||
// 10 [97..122] [26..51] #2 a..z
|
||||
// 11 [123..126] [255] #2 invalid input
|
||||
// (12) Everything else => invalid input
|
||||
|
||||
// The first LUT will use the VTBL instruction (out of range indices are set to
|
||||
// 0 in destination).
|
||||
static const uint8_t dec_lut1[] = {
|
||||
255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U,
|
||||
255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U,
|
||||
255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 62U, 255U, 255U, 255U, 63U,
|
||||
52U, 53U, 54U, 55U, 56U, 57U, 58U, 59U, 60U, 61U, 255U, 255U, 255U, 255U, 255U, 255U,
|
||||
};
|
||||
|
||||
// The second LUT will use the VTBX instruction (out of range indices will be
|
||||
// unchanged in destination). Input [64..126] will be mapped to index [1..63]
|
||||
// in this LUT. Index 0 means that value comes from LUT #1.
|
||||
static const uint8_t dec_lut2[] = {
|
||||
0U, 255U, 0U, 1U, 2U, 3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U, 13U,
|
||||
14U, 15U, 16U, 17U, 18U, 19U, 20U, 21U, 22U, 23U, 24U, 25U, 255U, 255U, 255U, 255U,
|
||||
255U, 255U, 26U, 27U, 28U, 29U, 30U, 31U, 32U, 33U, 34U, 35U, 36U, 37U, 38U, 39U,
|
||||
40U, 41U, 42U, 43U, 44U, 45U, 46U, 47U, 48U, 49U, 50U, 51U, 255U, 255U, 255U, 255U,
|
||||
};
|
||||
|
||||
// All input values in range for the first look-up will be 0U in the second
|
||||
// look-up result. All input values out of range for the first look-up will be
|
||||
// 0U in the first look-up result. Thus, the two results can be ORed without
|
||||
// conflicts.
|
||||
//
|
||||
// Invalid characters that are in the valid range for either look-up will be
|
||||
// set to 255U in the combined result. Other invalid characters will just be
|
||||
// passed through with the second look-up result (using the VTBX instruction).
|
||||
// Since the second LUT is 64 bytes, those passed-through values are guaranteed
|
||||
// to have a value greater than 63U. Therefore, valid characters will be mapped
|
||||
// to the valid [0..63] range and all invalid characters will be mapped to
|
||||
// values greater than 63.
|
||||
|
||||
static inline void
|
||||
dec_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 64) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 64 bytes per round. Unlike the SSE codecs, no
|
||||
// extra trailing zero bytes are written, so it is not necessary to
|
||||
// reserve extra input bytes:
|
||||
size_t rounds = *slen / 64;
|
||||
|
||||
*slen -= rounds * 64; // 64 bytes consumed per round
|
||||
*olen += rounds * 48; // 48 bytes produced per round
|
||||
|
||||
const uint8x16x4_t tbl_dec1 = load_64byte_table(dec_lut1);
|
||||
const uint8x16x4_t tbl_dec2 = load_64byte_table(dec_lut2);
|
||||
|
||||
do {
|
||||
const uint8x16_t offset = vdupq_n_u8(63U);
|
||||
uint8x16x4_t dec1, dec2;
|
||||
uint8x16x3_t dec;
|
||||
|
||||
// Load 64 bytes and deinterleave:
|
||||
uint8x16x4_t str = vld4q_u8((uint8_t *) *s);
|
||||
|
||||
// Get indices for second LUT:
|
||||
dec2.val[0] = vqsubq_u8(str.val[0], offset);
|
||||
dec2.val[1] = vqsubq_u8(str.val[1], offset);
|
||||
dec2.val[2] = vqsubq_u8(str.val[2], offset);
|
||||
dec2.val[3] = vqsubq_u8(str.val[3], offset);
|
||||
|
||||
// Get values from first LUT:
|
||||
dec1.val[0] = vqtbl4q_u8(tbl_dec1, str.val[0]);
|
||||
dec1.val[1] = vqtbl4q_u8(tbl_dec1, str.val[1]);
|
||||
dec1.val[2] = vqtbl4q_u8(tbl_dec1, str.val[2]);
|
||||
dec1.val[3] = vqtbl4q_u8(tbl_dec1, str.val[3]);
|
||||
|
||||
// Get values from second LUT:
|
||||
dec2.val[0] = vqtbx4q_u8(dec2.val[0], tbl_dec2, dec2.val[0]);
|
||||
dec2.val[1] = vqtbx4q_u8(dec2.val[1], tbl_dec2, dec2.val[1]);
|
||||
dec2.val[2] = vqtbx4q_u8(dec2.val[2], tbl_dec2, dec2.val[2]);
|
||||
dec2.val[3] = vqtbx4q_u8(dec2.val[3], tbl_dec2, dec2.val[3]);
|
||||
|
||||
// Get final values:
|
||||
str.val[0] = vorrq_u8(dec1.val[0], dec2.val[0]);
|
||||
str.val[1] = vorrq_u8(dec1.val[1], dec2.val[1]);
|
||||
str.val[2] = vorrq_u8(dec1.val[2], dec2.val[2]);
|
||||
str.val[3] = vorrq_u8(dec1.val[3], dec2.val[3]);
|
||||
|
||||
// Check for invalid input, any value larger than 63:
|
||||
const uint8x16_t classified
|
||||
= vorrq_u8(
|
||||
vorrq_u8(vcgtq_u8(str.val[0], vdupq_n_u8(63)), vcgtq_u8(str.val[1], vdupq_n_u8(63))),
|
||||
vorrq_u8(vcgtq_u8(str.val[2], vdupq_n_u8(63)), vcgtq_u8(str.val[3], vdupq_n_u8(63)))
|
||||
);
|
||||
|
||||
// Check that all bits are zero:
|
||||
if (vmaxvq_u8(classified) != 0U) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Compress four bytes into three:
|
||||
dec.val[0] = vorrq_u8(vshlq_n_u8(str.val[0], 2), vshrq_n_u8(str.val[1], 4));
|
||||
dec.val[1] = vorrq_u8(vshlq_n_u8(str.val[1], 4), vshrq_n_u8(str.val[2], 2));
|
||||
dec.val[2] = vorrq_u8(vshlq_n_u8(str.val[2], 6), str.val[3]);
|
||||
|
||||
// Interleave and store decoded result:
|
||||
vst3q_u8((uint8_t *) *o, dec);
|
||||
|
||||
*s += 64;
|
||||
*o += 48;
|
||||
|
||||
} while (--rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 64;
|
||||
*olen -= rounds * 48;
|
||||
}
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
static BASE64_FORCE_INLINE void
|
||||
enc_loop_neon64_inner (const uint8_t **s, uint8_t **o, const uint8x16x4_t tbl_enc)
|
||||
{
|
||||
// Load 48 bytes and deinterleave:
|
||||
uint8x16x3_t src = vld3q_u8(*s);
|
||||
|
||||
// Divide bits of three input bytes over four output bytes:
|
||||
uint8x16x4_t out = enc_reshuffle(src);
|
||||
|
||||
// The bits have now been shifted to the right locations;
|
||||
// translate their values 0..63 to the Base64 alphabet.
|
||||
// Use a 64-byte table lookup:
|
||||
out.val[0] = vqtbl4q_u8(tbl_enc, out.val[0]);
|
||||
out.val[1] = vqtbl4q_u8(tbl_enc, out.val[1]);
|
||||
out.val[2] = vqtbl4q_u8(tbl_enc, out.val[2]);
|
||||
out.val[3] = vqtbl4q_u8(tbl_enc, out.val[3]);
|
||||
|
||||
// Interleave and store output:
|
||||
vst4q_u8(*o, out);
|
||||
|
||||
*s += 48;
|
||||
*o += 64;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
size_t rounds = *slen / 48;
|
||||
|
||||
*slen -= rounds * 48; // 48 bytes consumed per round
|
||||
*olen += rounds * 64; // 64 bytes produced per round
|
||||
|
||||
// Load the encoding table:
|
||||
const uint8x16x4_t tbl_enc = load_64byte_table(base64_table_enc_6bit);
|
||||
|
||||
while (rounds > 0) {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_neon64_inner(s, o, tbl_enc);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
// Apologies in advance for combining the preprocessor with inline assembly,
|
||||
// two notoriously gnarly parts of C, but it was necessary to avoid a lot of
|
||||
// code repetition. The preprocessor is used to template large sections of
|
||||
// inline assembly that differ only in the registers used. If the code was
|
||||
// written out by hand, it would become very large and hard to audit.
|
||||
|
||||
// Generate a block of inline assembly that loads three user-defined registers
|
||||
// A, B, C from memory and deinterleaves them, post-incrementing the src
|
||||
// pointer. The register set should be sequential.
|
||||
#define LOAD(A, B, C) \
|
||||
"ld3 {"A".16b, "B".16b, "C".16b}, [%[src]], #48 \n\t"
|
||||
|
||||
// Generate a block of inline assembly that takes three deinterleaved registers
|
||||
// and shuffles the bytes. The output is in temporary registers t0..t3.
|
||||
#define SHUF(A, B, C) \
|
||||
"ushr %[t0].16b, "A".16b, #2 \n\t" \
|
||||
"ushr %[t1].16b, "B".16b, #4 \n\t" \
|
||||
"ushr %[t2].16b, "C".16b, #6 \n\t" \
|
||||
"sli %[t1].16b, "A".16b, #4 \n\t" \
|
||||
"sli %[t2].16b, "B".16b, #2 \n\t" \
|
||||
"and %[t1].16b, %[t1].16b, %[n63].16b \n\t" \
|
||||
"and %[t2].16b, %[t2].16b, %[n63].16b \n\t" \
|
||||
"and %[t3].16b, "C".16b, %[n63].16b \n\t"
|
||||
|
||||
// Generate a block of inline assembly that takes temporary registers t0..t3
|
||||
// and translates them to the base64 alphabet, using a table loaded into
|
||||
// v8..v11. The output is in user-defined registers A..D.
|
||||
#define TRAN(A, B, C, D) \
|
||||
"tbl "A".16b, {v8.16b-v11.16b}, %[t0].16b \n\t" \
|
||||
"tbl "B".16b, {v8.16b-v11.16b}, %[t1].16b \n\t" \
|
||||
"tbl "C".16b, {v8.16b-v11.16b}, %[t2].16b \n\t" \
|
||||
"tbl "D".16b, {v8.16b-v11.16b}, %[t3].16b \n\t"
|
||||
|
||||
// Generate a block of inline assembly that interleaves four registers and
|
||||
// stores them, post-incrementing the destination pointer.
|
||||
#define STOR(A, B, C, D) \
|
||||
"st4 {"A".16b, "B".16b, "C".16b, "D".16b}, [%[dst]], #64 \n\t"
|
||||
|
||||
// Generate a block of inline assembly that generates a single self-contained
|
||||
// encoder round: fetch the data, process it, and store the result.
|
||||
#define ROUND() \
|
||||
LOAD("v12", "v13", "v14") \
|
||||
SHUF("v12", "v13", "v14") \
|
||||
TRAN("v12", "v13", "v14", "v15") \
|
||||
STOR("v12", "v13", "v14", "v15")
|
||||
|
||||
// Generate a block of assembly that generates a type A interleaved encoder
|
||||
// round. It uses registers that were loaded by the previous type B round, and
|
||||
// in turn loads registers for the next type B round.
|
||||
#define ROUND_A() \
|
||||
SHUF("v2", "v3", "v4") \
|
||||
LOAD("v12", "v13", "v14") \
|
||||
TRAN("v2", "v3", "v4", "v5") \
|
||||
STOR("v2", "v3", "v4", "v5")
|
||||
|
||||
// Type B interleaved encoder round. Same as type A, but register sets swapped.
|
||||
#define ROUND_B() \
|
||||
SHUF("v12", "v13", "v14") \
|
||||
LOAD("v2", "v3", "v4") \
|
||||
TRAN("v12", "v13", "v14", "v15") \
|
||||
STOR("v12", "v13", "v14", "v15")
|
||||
|
||||
// The first type A round needs to load its own registers.
|
||||
#define ROUND_A_FIRST() \
|
||||
LOAD("v2", "v3", "v4") \
|
||||
ROUND_A()
|
||||
|
||||
// The last type B round omits the load for the next step.
|
||||
#define ROUND_B_LAST() \
|
||||
SHUF("v12", "v13", "v14") \
|
||||
TRAN("v12", "v13", "v14", "v15") \
|
||||
STOR("v12", "v13", "v14", "v15")
|
||||
|
||||
// Suppress clang's warning that the literal string in the asm statement is
|
||||
// overlong (longer than the ISO-mandated minimum size of 4095 bytes for C99
|
||||
// compilers). It may be true, but the goal here is not C99 portability.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Woverlength-strings"
|
||||
|
||||
static inline void
|
||||
enc_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
size_t rounds = *slen / 48;
|
||||
|
||||
if (rounds == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
*slen -= rounds * 48; // 48 bytes consumed per round.
|
||||
*olen += rounds * 64; // 64 bytes produced per round.
|
||||
|
||||
// Number of times to go through the 8x loop.
|
||||
size_t loops = rounds / 8;
|
||||
|
||||
// Number of rounds remaining after the 8x loop.
|
||||
rounds %= 8;
|
||||
|
||||
// Temporary registers, used as scratch space.
|
||||
uint8x16_t tmp0, tmp1, tmp2, tmp3;
|
||||
|
||||
__asm__ volatile (
|
||||
|
||||
// Load the encoding table into v8..v11.
|
||||
" ld1 {v8.16b-v11.16b}, [%[tbl]] \n\t"
|
||||
|
||||
// If there are eight rounds or more, enter an 8x unrolled loop
|
||||
// of interleaved encoding rounds. The rounds interleave memory
|
||||
// operations (load/store) with data operations to maximize
|
||||
// pipeline throughput.
|
||||
" cbz %[loops], 4f \n\t"
|
||||
|
||||
// The SIMD instructions do not touch the flags.
|
||||
"88: subs %[loops], %[loops], #1 \n\t"
|
||||
" " ROUND_A_FIRST()
|
||||
" " ROUND_B()
|
||||
" " ROUND_A()
|
||||
" " ROUND_B()
|
||||
" " ROUND_A()
|
||||
" " ROUND_B()
|
||||
" " ROUND_A()
|
||||
" " ROUND_B_LAST()
|
||||
" b.ne 88b \n\t"
|
||||
|
||||
// Enter a 4x unrolled loop for rounds of 4 or more.
|
||||
"4: cmp %[rounds], #4 \n\t"
|
||||
" b.lt 30f \n\t"
|
||||
" " ROUND_A_FIRST()
|
||||
" " ROUND_B()
|
||||
" " ROUND_A()
|
||||
" " ROUND_B_LAST()
|
||||
" sub %[rounds], %[rounds], #4 \n\t"
|
||||
|
||||
// Dispatch the remaining rounds 0..3.
|
||||
"30: cbz %[rounds], 0f \n\t"
|
||||
" cmp %[rounds], #2 \n\t"
|
||||
" b.eq 2f \n\t"
|
||||
" b.lt 1f \n\t"
|
||||
|
||||
// Block of non-interlaced encoding rounds, which can each
|
||||
// individually be jumped to. Rounds fall through to the next.
|
||||
"3: " ROUND()
|
||||
"2: " ROUND()
|
||||
"1: " ROUND()
|
||||
"0: \n\t"
|
||||
|
||||
// Outputs (modified).
|
||||
: [loops] "+r" (loops),
|
||||
[src] "+r" (*s),
|
||||
[dst] "+r" (*o),
|
||||
[t0] "=&w" (tmp0),
|
||||
[t1] "=&w" (tmp1),
|
||||
[t2] "=&w" (tmp2),
|
||||
[t3] "=&w" (tmp3)
|
||||
|
||||
// Inputs (not modified).
|
||||
: [rounds] "r" (rounds),
|
||||
[tbl] "r" (base64_table_enc_6bit),
|
||||
[n63] "w" (vdupq_n_u8(63))
|
||||
|
||||
// Clobbers.
|
||||
: "v2", "v3", "v4", "v5",
|
||||
"v8", "v9", "v10", "v11",
|
||||
"v12", "v13", "v14", "v15",
|
||||
"cc", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
static BASE64_FORCE_INLINE uint8x16x4_t
|
||||
enc_reshuffle (const uint8x16x3_t in)
|
||||
{
|
||||
uint8x16x4_t out;
|
||||
|
||||
// Input:
|
||||
// in[0] = a7 a6 a5 a4 a3 a2 a1 a0
|
||||
// in[1] = b7 b6 b5 b4 b3 b2 b1 b0
|
||||
// in[2] = c7 c6 c5 c4 c3 c2 c1 c0
|
||||
|
||||
// Output:
|
||||
// out[0] = 00 00 a7 a6 a5 a4 a3 a2
|
||||
// out[1] = 00 00 a1 a0 b7 b6 b5 b4
|
||||
// out[2] = 00 00 b3 b2 b1 b0 c7 c6
|
||||
// out[3] = 00 00 c5 c4 c3 c2 c1 c0
|
||||
|
||||
// Move the input bits to where they need to be in the outputs. Except
|
||||
// for the first output, the high two bits are not cleared.
|
||||
out.val[0] = vshrq_n_u8(in.val[0], 2);
|
||||
out.val[1] = vshrq_n_u8(in.val[1], 4);
|
||||
out.val[2] = vshrq_n_u8(in.val[2], 6);
|
||||
out.val[1] = vsliq_n_u8(out.val[1], in.val[0], 4);
|
||||
out.val[2] = vsliq_n_u8(out.val[2], in.val[1], 2);
|
||||
|
||||
// Clear the high two bits in the second, third and fourth output.
|
||||
out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
|
||||
out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
|
||||
out.val[3] = vandq_u8(in.val[2], vdupq_n_u8(0x3F));
|
||||
|
||||
return out;
|
||||
}
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_SSE41
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute push (__attribute__((target("sse4.1"))), apply_to=function)
|
||||
#else
|
||||
#pragma GCC target("sse4.1")
|
||||
#endif
|
||||
#include <smmintrin.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
|
||||
#ifndef BASE64_SSE41_USE_ASM
|
||||
# if (defined(__GNUC__) || defined(__clang__)) && BASE64_WORDSIZE == 64
|
||||
# define BASE64_SSE41_USE_ASM 1
|
||||
# else
|
||||
# define BASE64_SSE41_USE_ASM 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "../ssse3/dec_reshuffle.c"
|
||||
#include "../ssse3/dec_loop.c"
|
||||
|
||||
#if BASE64_SSE41_USE_ASM
|
||||
# include "../ssse3/enc_loop_asm.c"
|
||||
#else
|
||||
# include "../ssse3/enc_translate.c"
|
||||
# include "../ssse3/enc_reshuffle.c"
|
||||
# include "../ssse3/enc_loop.c"
|
||||
#endif
|
||||
|
||||
#endif // HAVE_SSE41
|
||||
|
||||
void
|
||||
base64_stream_encode_sse41 BASE64_ENC_PARAMS
|
||||
{
|
||||
#if HAVE_SSE41
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
base64_enc_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
base64_stream_decode_sse41 BASE64_DEC_PARAMS
|
||||
{
|
||||
#if HAVE_SSE41
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute pop
|
||||
#endif
|
||||
#else
|
||||
return base64_dec_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_SSE42
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute push (__attribute__((target("sse4.2"))), apply_to=function)
|
||||
#else
|
||||
#pragma GCC target("sse4.2")
|
||||
#endif
|
||||
#include <nmmintrin.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
|
||||
#ifndef BASE64_SSE42_USE_ASM
|
||||
# if (defined(__GNUC__) || defined(__clang__)) && BASE64_WORDSIZE == 64
|
||||
# define BASE64_SSE42_USE_ASM 1
|
||||
# else
|
||||
# define BASE64_SSE42_USE_ASM 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "../ssse3/dec_reshuffle.c"
|
||||
#include "../ssse3/dec_loop.c"
|
||||
|
||||
#if BASE64_SSE42_USE_ASM
|
||||
# include "../ssse3/enc_loop_asm.c"
|
||||
#else
|
||||
# include "../ssse3/enc_translate.c"
|
||||
# include "../ssse3/enc_reshuffle.c"
|
||||
# include "../ssse3/enc_loop.c"
|
||||
#endif
|
||||
|
||||
#endif // HAVE_SSE42
|
||||
|
||||
void
|
||||
base64_stream_encode_sse42 BASE64_ENC_PARAMS
|
||||
{
|
||||
#if HAVE_SSE42
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
base64_enc_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
base64_stream_decode_sse42 BASE64_DEC_PARAMS
|
||||
{
|
||||
#if HAVE_SSE42
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute pop
|
||||
#endif
|
||||
#else
|
||||
return base64_dec_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "libbase64.h"
|
||||
#include "../../tables/tables.h"
|
||||
#include "../../codecs.h"
|
||||
#include "config.h"
|
||||
#include "../../env.h"
|
||||
|
||||
#if HAVE_SSSE3
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute push (__attribute__((target("ssse3"))), apply_to=function)
|
||||
#else
|
||||
#pragma GCC target("ssse3")
|
||||
#endif
|
||||
#include <tmmintrin.h>
|
||||
|
||||
// Only enable inline assembly on supported compilers and on 64-bit CPUs.
|
||||
// 32-bit CPUs with SSSE3 support, such as low-end Atoms, only have eight XMM
|
||||
// registers, which is not enough to run the inline assembly.
|
||||
#ifndef BASE64_SSSE3_USE_ASM
|
||||
# if (defined(__GNUC__) || defined(__clang__)) && BASE64_WORDSIZE == 64
|
||||
# define BASE64_SSSE3_USE_ASM 1
|
||||
# else
|
||||
# define BASE64_SSSE3_USE_ASM 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "dec_reshuffle.c"
|
||||
#include "dec_loop.c"
|
||||
|
||||
#if BASE64_SSSE3_USE_ASM
|
||||
# include "enc_loop_asm.c"
|
||||
#else
|
||||
# include "enc_reshuffle.c"
|
||||
# include "enc_translate.c"
|
||||
# include "enc_loop.c"
|
||||
#endif
|
||||
|
||||
#endif // HAVE_SSSE3
|
||||
|
||||
void
|
||||
base64_stream_encode_ssse3 BASE64_ENC_PARAMS
|
||||
{
|
||||
#if HAVE_SSSE3
|
||||
#include "../generic/enc_head.c"
|
||||
enc_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/enc_tail.c"
|
||||
#else
|
||||
base64_enc_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
base64_stream_decode_ssse3 BASE64_DEC_PARAMS
|
||||
{
|
||||
#if HAVE_SSSE3
|
||||
#include "../generic/dec_head.c"
|
||||
dec_loop_ssse3(&s, &slen, &o, &olen);
|
||||
#include "../generic/dec_tail.c"
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute pop
|
||||
#endif
|
||||
#else
|
||||
return base64_dec_stub(state, src, srclen, out, outlen);
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,173 @@
|
|||
// The input consists of six character sets in the Base64 alphabet, which we
|
||||
// need to map back to the 6-bit values they represent. There are three ranges,
|
||||
// two singles, and then there's the rest.
|
||||
//
|
||||
// # From To Add Characters
|
||||
// 1 [43] [62] +19 +
|
||||
// 2 [47] [63] +16 /
|
||||
// 3 [48..57] [52..61] +4 0..9
|
||||
// 4 [65..90] [0..25] -65 A..Z
|
||||
// 5 [97..122] [26..51] -71 a..z
|
||||
// (6) Everything else => invalid input
|
||||
//
|
||||
// We will use lookup tables for character validation and offset computation.
|
||||
// Remember that 0x2X and 0x0X are the same index for _mm_shuffle_epi8, this
|
||||
// allows to mask with 0x2F instead of 0x0F and thus save one constant
|
||||
// declaration (register and/or memory access).
|
||||
//
|
||||
// For offsets:
|
||||
// Perfect hash for lut = ((src >> 4) & 0x2F) + ((src == 0x2F) ? 0xFF : 0x00)
|
||||
// 0000 = garbage
|
||||
// 0001 = /
|
||||
// 0010 = +
|
||||
// 0011 = 0-9
|
||||
// 0100 = A-Z
|
||||
// 0101 = A-Z
|
||||
// 0110 = a-z
|
||||
// 0111 = a-z
|
||||
// 1000 >= garbage
|
||||
//
|
||||
// For validation, here's the table.
|
||||
// A character is valid if and only if the AND of the 2 lookups equals 0:
|
||||
//
|
||||
// hi \ lo 0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
|
||||
// LUT 0x15 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x13 0x1A 0x1B 0x1B 0x1B 0x1A
|
||||
//
|
||||
// 0000 0x10 char NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI
|
||||
// andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
//
|
||||
// 0001 0x10 char DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US
|
||||
// andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
//
|
||||
// 0010 0x01 char ! " # $ % & ' ( ) * + , - . /
|
||||
// andlut 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x00 0x01 0x01 0x01 0x00
|
||||
//
|
||||
// 0011 0x02 char 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
||||
// andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x02 0x02 0x02 0x02 0x02 0x02
|
||||
//
|
||||
// 0100 0x04 char @ A B C D E F G H I J K L M N O
|
||||
// andlut 0x04 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
|
||||
//
|
||||
// 0101 0x08 char P Q R S T U V W X Y Z [ \ ] ^ _
|
||||
// andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08
|
||||
//
|
||||
// 0110 0x04 char ` a b c d e f g h i j k l m n o
|
||||
// andlut 0x04 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
|
||||
// 0111 0x08 char p q r s t u v w x y z { | } ~
|
||||
// andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08
|
||||
//
|
||||
// 1000 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1001 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1010 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1011 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1100 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1101 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1110 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
// 1111 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
|
||||
|
||||
static BASE64_FORCE_INLINE int
|
||||
dec_loop_ssse3_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
|
||||
{
|
||||
const __m128i lut_lo = _mm_setr_epi8(
|
||||
0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
|
||||
0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
|
||||
|
||||
const __m128i lut_hi = _mm_setr_epi8(
|
||||
0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
|
||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
|
||||
|
||||
const __m128i lut_roll = _mm_setr_epi8(
|
||||
0, 16, 19, 4, -65, -65, -71, -71,
|
||||
0, 0, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
const __m128i mask_2F = _mm_set1_epi8(0x2F);
|
||||
|
||||
// Load input:
|
||||
__m128i str = _mm_loadu_si128((__m128i *) *s);
|
||||
|
||||
// Table lookups:
|
||||
const __m128i hi_nibbles = _mm_and_si128(_mm_srli_epi32(str, 4), mask_2F);
|
||||
const __m128i lo_nibbles = _mm_and_si128(str, mask_2F);
|
||||
const __m128i hi = _mm_shuffle_epi8(lut_hi, hi_nibbles);
|
||||
const __m128i lo = _mm_shuffle_epi8(lut_lo, lo_nibbles);
|
||||
|
||||
// Check for invalid input: if any "and" values from lo and hi are not
|
||||
// zero, fall back on bytewise code to do error checking and reporting:
|
||||
if (_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_setzero_si128())) != 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const __m128i eq_2F = _mm_cmpeq_epi8(str, mask_2F);
|
||||
const __m128i roll = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2F, hi_nibbles));
|
||||
|
||||
// Now simply add the delta values to the input:
|
||||
str = _mm_add_epi8(str, roll);
|
||||
|
||||
// Reshuffle the input to packed 12-byte output format:
|
||||
str = dec_reshuffle(str);
|
||||
|
||||
// Store the output:
|
||||
_mm_storeu_si128((__m128i *) *o, str);
|
||||
|
||||
*s += 16;
|
||||
*o += 12;
|
||||
*rounds -= 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 24) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 16 bytes per round. Because 4 extra zero bytes are
|
||||
// written after the output, ensure that there will be at least 8 bytes
|
||||
// of input data left to cover the gap. (6 data bytes and up to two
|
||||
// end-of-string markers.)
|
||||
size_t rounds = (*slen - 8) / 16;
|
||||
|
||||
*slen -= rounds * 16; // 16 bytes consumed per round
|
||||
*olen += rounds * 12; // 12 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
if (dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
if (dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
if (dec_loop_ssse3_inner(s, o, &rounds) &&
|
||||
dec_loop_ssse3_inner(s, o, &rounds)) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
dec_loop_ssse3_inner(s, o, &rounds);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
|
||||
// Adjust for any rounds that were skipped:
|
||||
*slen += rounds * 16;
|
||||
*olen -= rounds * 12;
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
static BASE64_FORCE_INLINE __m128i
|
||||
dec_reshuffle (const __m128i in)
|
||||
{
|
||||
// in, bits, upper case are most significant bits, lower case are least significant bits
|
||||
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
|
||||
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
|
||||
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
|
||||
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
|
||||
|
||||
const __m128i merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
|
||||
// 0000kkkk LLllllll 0000JJJJ JJjjKKKK
|
||||
// 0000hhhh IIiiiiii 0000GGGG GGggHHHH
|
||||
// 0000eeee FFffffff 0000DDDD DDddEEEE
|
||||
// 0000bbbb CCcccccc 0000AAAA AAaaBBBB
|
||||
|
||||
const __m128i out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
|
||||
// 00000000 JJJJJJjj KKKKkkkk LLllllll
|
||||
// 00000000 GGGGGGgg HHHHhhhh IIiiiiii
|
||||
// 00000000 DDDDDDdd EEEEeeee FFffffff
|
||||
// 00000000 AAAAAAaa BBBBbbbb CCcccccc
|
||||
|
||||
// Pack bytes together:
|
||||
return _mm_shuffle_epi8(out, _mm_setr_epi8(
|
||||
2, 1, 0,
|
||||
6, 5, 4,
|
||||
10, 9, 8,
|
||||
14, 13, 12,
|
||||
-1, -1, -1, -1));
|
||||
// 00000000 00000000 00000000 00000000
|
||||
// LLllllll KKKKkkkk JJJJJJjj IIiiiiii
|
||||
// HHHHhhhh GGGGGGgg FFffffff EEEEeeee
|
||||
// DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa
|
||||
}
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
static BASE64_FORCE_INLINE void
|
||||
enc_loop_ssse3_inner (const uint8_t **s, uint8_t **o)
|
||||
{
|
||||
// Load input:
|
||||
__m128i str = _mm_loadu_si128((__m128i *) *s);
|
||||
|
||||
// Reshuffle:
|
||||
str = enc_reshuffle(str);
|
||||
|
||||
// Translate reshuffled bytes to the Base64 alphabet:
|
||||
str = enc_translate(str);
|
||||
|
||||
// Store:
|
||||
_mm_storeu_si128((__m128i *) *o, str);
|
||||
|
||||
*s += 12;
|
||||
*o += 16;
|
||||
}
|
||||
|
||||
static inline void
|
||||
enc_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
if (*slen < 16) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 12 bytes at a time. Because blocks are loaded 16
|
||||
// bytes at a time, ensure that there will be at least 4 remaining
|
||||
// bytes after the last round, so that the final read will not pass
|
||||
// beyond the bounds of the input buffer:
|
||||
size_t rounds = (*slen - 4) / 12;
|
||||
|
||||
*slen -= rounds * 12; // 12 bytes consumed per round
|
||||
*olen += rounds * 16; // 16 bytes produced per round
|
||||
|
||||
do {
|
||||
if (rounds >= 8) {
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
rounds -= 8;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 4) {
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
rounds -= 4;
|
||||
continue;
|
||||
}
|
||||
if (rounds >= 2) {
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
rounds -= 2;
|
||||
continue;
|
||||
}
|
||||
enc_loop_ssse3_inner(s, o);
|
||||
break;
|
||||
|
||||
} while (rounds > 0);
|
||||
}
|
||||
|
|
@ -0,0 +1,268 @@
|
|||
// Apologies in advance for combining the preprocessor with inline assembly,
|
||||
// two notoriously gnarly parts of C, but it was necessary to avoid a lot of
|
||||
// code repetition. The preprocessor is used to template large sections of
|
||||
// inline assembly that differ only in the registers used. If the code was
|
||||
// written out by hand, it would become very large and hard to audit.
|
||||
|
||||
// Generate a block of inline assembly that loads register R0 from memory. The
|
||||
// offset at which the register is loaded is set by the given round.
|
||||
#define LOAD(R0, ROUND) \
|
||||
"lddqu ("#ROUND" * 12)(%[src]), %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that deinterleaves and shuffles register
|
||||
// R0 using preloaded constants. Outputs in R0 and R1.
|
||||
#define SHUF(R0, R1) \
|
||||
"pshufb %[lut0], %["R0"] \n\t" \
|
||||
"movdqa %["R0"], %["R1"] \n\t" \
|
||||
"pand %[msk0], %["R0"] \n\t" \
|
||||
"pand %[msk2], %["R1"] \n\t" \
|
||||
"pmulhuw %[msk1], %["R0"] \n\t" \
|
||||
"pmullw %[msk3], %["R1"] \n\t" \
|
||||
"por %["R1"], %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that takes R0 and R1 and translates
|
||||
// their contents to the base64 alphabet, using preloaded constants.
|
||||
#define TRAN(R0, R1, R2) \
|
||||
"movdqa %["R0"], %["R1"] \n\t" \
|
||||
"movdqa %["R0"], %["R2"] \n\t" \
|
||||
"psubusb %[n51], %["R1"] \n\t" \
|
||||
"pcmpgtb %[n25], %["R2"] \n\t" \
|
||||
"psubb %["R2"], %["R1"] \n\t" \
|
||||
"movdqa %[lut1], %["R2"] \n\t" \
|
||||
"pshufb %["R1"], %["R2"] \n\t" \
|
||||
"paddb %["R2"], %["R0"] \n\t"
|
||||
|
||||
// Generate a block of inline assembly that stores the given register R0 at an
|
||||
// offset set by the given round.
|
||||
#define STOR(R0, ROUND) \
|
||||
"movdqu %["R0"], ("#ROUND" * 16)(%[dst]) \n\t"
|
||||
|
||||
// Generate a block of inline assembly that generates a single self-contained
|
||||
// encoder round: fetch the data, process it, and store the result. Then update
|
||||
// the source and destination pointers.
|
||||
#define ROUND() \
|
||||
LOAD("a", 0) \
|
||||
SHUF("a", "b") \
|
||||
TRAN("a", "b", "c") \
|
||||
STOR("a", 0) \
|
||||
"add $12, %[src] \n\t" \
|
||||
"add $16, %[dst] \n\t"
|
||||
|
||||
// Define a macro that initiates a three-way interleaved encoding round by
|
||||
// preloading registers a, b and c from memory.
|
||||
// The register graph shows which registers are in use during each step, and
|
||||
// is a visual aid for choosing registers for that step. Symbol index:
|
||||
//
|
||||
// + indicates that a register is loaded by that step.
|
||||
// | indicates that a register is in use and must not be touched.
|
||||
// - indicates that a register is decommissioned by that step.
|
||||
// x indicates that a register is used as a temporary by that step.
|
||||
// V indicates that a register is an input or output to the macro.
|
||||
//
|
||||
#define ROUND_3_INIT() /* a b c d e f */ \
|
||||
LOAD("a", 0) /* + */ \
|
||||
SHUF("a", "d") /* | + */ \
|
||||
LOAD("b", 1) /* | + | */ \
|
||||
TRAN("a", "d", "e") /* | | - x */ \
|
||||
LOAD("c", 2) /* V V V */
|
||||
|
||||
// Define a macro that translates, shuffles and stores the input registers A, B
|
||||
// and C, and preloads registers D, E and F for the next round.
|
||||
// This macro can be arbitrarily daisy-chained by feeding output registers D, E
|
||||
// and F back into the next round as input registers A, B and C. The macro
|
||||
// carefully interleaves memory operations with data operations for optimal
|
||||
// pipelined performance.
|
||||
|
||||
#define ROUND_3(ROUND, A,B,C,D,E,F) /* A B C D E F */ \
|
||||
LOAD(D, (ROUND + 3)) /* V V V + */ \
|
||||
SHUF(B, E) /* | | | | + */ \
|
||||
STOR(A, (ROUND + 0)) /* - | | | | */ \
|
||||
TRAN(B, E, F) /* | | | - x */ \
|
||||
LOAD(E, (ROUND + 4)) /* | | | + */ \
|
||||
SHUF(C, A) /* + | | | | */ \
|
||||
STOR(B, (ROUND + 1)) /* | - | | | */ \
|
||||
TRAN(C, A, F) /* - | | | x */ \
|
||||
LOAD(F, (ROUND + 5)) /* | | | + */ \
|
||||
SHUF(D, A) /* + | | | | */ \
|
||||
STOR(C, (ROUND + 2)) /* | - | | | */ \
|
||||
TRAN(D, A, B) /* - x V V V */
|
||||
|
||||
// Define a macro that terminates a ROUND_3 macro by taking pre-loaded
|
||||
// registers D, E and F, and translating, shuffling and storing them.
|
||||
#define ROUND_3_END(ROUND, A,B,C,D,E,F) /* A B C D E F */ \
|
||||
SHUF(E, A) /* + V V V */ \
|
||||
STOR(D, (ROUND + 3)) /* | - | | */ \
|
||||
TRAN(E, A, B) /* - x | | */ \
|
||||
SHUF(F, C) /* + | | */ \
|
||||
STOR(E, (ROUND + 4)) /* | - | */ \
|
||||
TRAN(F, C, D) /* - x | */ \
|
||||
STOR(F, (ROUND + 5)) /* - */
|
||||
|
||||
// Define a type A round. Inputs are a, b, and c, outputs are d, e, and f.
|
||||
#define ROUND_3_A(ROUND) \
|
||||
ROUND_3(ROUND, "a", "b", "c", "d", "e", "f")
|
||||
|
||||
// Define a type B round. Inputs and outputs are swapped with regard to type A.
|
||||
#define ROUND_3_B(ROUND) \
|
||||
ROUND_3(ROUND, "d", "e", "f", "a", "b", "c")
|
||||
|
||||
// Terminating macro for a type A round.
|
||||
#define ROUND_3_A_LAST(ROUND) \
|
||||
ROUND_3_A(ROUND) \
|
||||
ROUND_3_END(ROUND, "a", "b", "c", "d", "e", "f")
|
||||
|
||||
// Terminating macro for a type B round.
|
||||
#define ROUND_3_B_LAST(ROUND) \
|
||||
ROUND_3_B(ROUND) \
|
||||
ROUND_3_END(ROUND, "d", "e", "f", "a", "b", "c")
|
||||
|
||||
// Suppress clang's warning that the literal string in the asm statement is
|
||||
// overlong (longer than the ISO-mandated minimum size of 4095 bytes for C99
|
||||
// compilers). It may be true, but the goal here is not C99 portability.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Woverlength-strings"
|
||||
|
||||
static inline void
|
||||
enc_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
|
||||
{
|
||||
// For a clearer explanation of the algorithm used by this function,
|
||||
// please refer to the plain (not inline assembly) implementation. This
|
||||
// function follows the same basic logic.
|
||||
|
||||
if (*slen < 16) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process blocks of 12 bytes at a time. Input is read in blocks of 16
|
||||
// bytes, so "reserve" four bytes from the input buffer to ensure that
|
||||
// we never read beyond the end of the input buffer.
|
||||
size_t rounds = (*slen - 4) / 12;
|
||||
|
||||
*slen -= rounds * 12; // 12 bytes consumed per round
|
||||
*olen += rounds * 16; // 16 bytes produced per round
|
||||
|
||||
// Number of times to go through the 36x loop.
|
||||
size_t loops = rounds / 36;
|
||||
|
||||
// Number of rounds remaining after the 36x loop.
|
||||
rounds %= 36;
|
||||
|
||||
// Lookup tables.
|
||||
const __m128i lut0 = _mm_set_epi8(
|
||||
10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1);
|
||||
|
||||
const __m128i lut1 = _mm_setr_epi8(
|
||||
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0);
|
||||
|
||||
// Temporary registers.
|
||||
__m128i a, b, c, d, e, f;
|
||||
|
||||
__asm__ volatile (
|
||||
|
||||
// If there are 36 rounds or more, enter a 36x unrolled loop of
|
||||
// interleaved encoding rounds. The rounds interleave memory
|
||||
// operations (load/store) with data operations (table lookups,
|
||||
// etc) to maximize pipeline throughput.
|
||||
" test %[loops], %[loops] \n\t"
|
||||
" jz 18f \n\t"
|
||||
" jmp 36f \n\t"
|
||||
" \n\t"
|
||||
".balign 64 \n\t"
|
||||
"36: " ROUND_3_INIT()
|
||||
" " ROUND_3_A( 0)
|
||||
" " ROUND_3_B( 3)
|
||||
" " ROUND_3_A( 6)
|
||||
" " ROUND_3_B( 9)
|
||||
" " ROUND_3_A(12)
|
||||
" " ROUND_3_B(15)
|
||||
" " ROUND_3_A(18)
|
||||
" " ROUND_3_B(21)
|
||||
" " ROUND_3_A(24)
|
||||
" " ROUND_3_B(27)
|
||||
" " ROUND_3_A_LAST(30)
|
||||
" add $(12 * 36), %[src] \n\t"
|
||||
" add $(16 * 36), %[dst] \n\t"
|
||||
" dec %[loops] \n\t"
|
||||
" jnz 36b \n\t"
|
||||
|
||||
// Enter an 18x unrolled loop for rounds of 18 or more.
|
||||
"18: cmp $18, %[rounds] \n\t"
|
||||
" jl 9f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A(0)
|
||||
" " ROUND_3_B(3)
|
||||
" " ROUND_3_A(6)
|
||||
" " ROUND_3_B(9)
|
||||
" " ROUND_3_A_LAST(12)
|
||||
" sub $18, %[rounds] \n\t"
|
||||
" add $(12 * 18), %[src] \n\t"
|
||||
" add $(16 * 18), %[dst] \n\t"
|
||||
|
||||
// Enter a 9x unrolled loop for rounds of 9 or more.
|
||||
"9: cmp $9, %[rounds] \n\t"
|
||||
" jl 6f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A(0)
|
||||
" " ROUND_3_B_LAST(3)
|
||||
" sub $9, %[rounds] \n\t"
|
||||
" add $(12 * 9), %[src] \n\t"
|
||||
" add $(16 * 9), %[dst] \n\t"
|
||||
|
||||
// Enter a 6x unrolled loop for rounds of 6 or more.
|
||||
"6: cmp $6, %[rounds] \n\t"
|
||||
" jl 55f \n\t"
|
||||
" " ROUND_3_INIT()
|
||||
" " ROUND_3_A_LAST(0)
|
||||
" sub $6, %[rounds] \n\t"
|
||||
" add $(12 * 6), %[src] \n\t"
|
||||
" add $(16 * 6), %[dst] \n\t"
|
||||
|
||||
// Dispatch the remaining rounds 0..5.
|
||||
"55: cmp $3, %[rounds] \n\t"
|
||||
" jg 45f \n\t"
|
||||
" je 3f \n\t"
|
||||
" cmp $1, %[rounds] \n\t"
|
||||
" jg 2f \n\t"
|
||||
" je 1f \n\t"
|
||||
" jmp 0f \n\t"
|
||||
|
||||
"45: cmp $4, %[rounds] \n\t"
|
||||
" je 4f \n\t"
|
||||
|
||||
// Block of non-interlaced encoding rounds, which can each
|
||||
// individually be jumped to. Rounds fall through to the next.
|
||||
"5: " ROUND()
|
||||
"4: " ROUND()
|
||||
"3: " ROUND()
|
||||
"2: " ROUND()
|
||||
"1: " ROUND()
|
||||
"0: \n\t"
|
||||
|
||||
// Outputs (modified).
|
||||
: [rounds] "+r" (rounds),
|
||||
[loops] "+r" (loops),
|
||||
[src] "+r" (*s),
|
||||
[dst] "+r" (*o),
|
||||
[a] "=&x" (a),
|
||||
[b] "=&x" (b),
|
||||
[c] "=&x" (c),
|
||||
[d] "=&x" (d),
|
||||
[e] "=&x" (e),
|
||||
[f] "=&x" (f)
|
||||
|
||||
// Inputs (not modified).
|
||||
: [lut0] "x" (lut0),
|
||||
[lut1] "x" (lut1),
|
||||
[msk0] "x" (_mm_set1_epi32(0x0FC0FC00)),
|
||||
[msk1] "x" (_mm_set1_epi32(0x04000040)),
|
||||
[msk2] "x" (_mm_set1_epi32(0x003F03F0)),
|
||||
[msk3] "x" (_mm_set1_epi32(0x01000010)),
|
||||
[n51] "x" (_mm_set1_epi8(51)),
|
||||
[n25] "x" (_mm_set1_epi8(25))
|
||||
|
||||
// Clobbers.
|
||||
: "cc", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
static BASE64_FORCE_INLINE __m128i
|
||||
enc_reshuffle (__m128i in)
|
||||
{
|
||||
// Input, bytes MSB to LSB:
|
||||
// 0 0 0 0 l k j i h g f e d c b a
|
||||
|
||||
in = _mm_shuffle_epi8(in, _mm_set_epi8(
|
||||
10, 11, 9, 10,
|
||||
7, 8, 6, 7,
|
||||
4, 5, 3, 4,
|
||||
1, 2, 0, 1));
|
||||
// in, bytes MSB to LSB:
|
||||
// k l j k
|
||||
// h i g h
|
||||
// e f d e
|
||||
// b c a b
|
||||
|
||||
const __m128i t0 = _mm_and_si128(in, _mm_set1_epi32(0x0FC0FC00));
|
||||
// bits, upper case are most significant bits, lower case are least significant bits
|
||||
// 0000kkkk LL000000 JJJJJJ00 00000000
|
||||
// 0000hhhh II000000 GGGGGG00 00000000
|
||||
// 0000eeee FF000000 DDDDDD00 00000000
|
||||
// 0000bbbb CC000000 AAAAAA00 00000000
|
||||
|
||||
const __m128i t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
|
||||
// 00000000 00kkkkLL 00000000 00JJJJJJ
|
||||
// 00000000 00hhhhII 00000000 00GGGGGG
|
||||
// 00000000 00eeeeFF 00000000 00DDDDDD
|
||||
// 00000000 00bbbbCC 00000000 00AAAAAA
|
||||
|
||||
const __m128i t2 = _mm_and_si128(in, _mm_set1_epi32(0x003F03F0));
|
||||
// 00000000 00llllll 000000jj KKKK0000
|
||||
// 00000000 00iiiiii 000000gg HHHH0000
|
||||
// 00000000 00ffffff 000000dd EEEE0000
|
||||
// 00000000 00cccccc 000000aa BBBB0000
|
||||
|
||||
const __m128i t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
|
||||
// 00llllll 00000000 00jjKKKK 00000000
|
||||
// 00iiiiii 00000000 00ggHHHH 00000000
|
||||
// 00ffffff 00000000 00ddEEEE 00000000
|
||||
// 00cccccc 00000000 00aaBBBB 00000000
|
||||
|
||||
return _mm_or_si128(t1, t3);
|
||||
// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
|
||||
// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
|
||||
// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
|
||||
// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
static BASE64_FORCE_INLINE __m128i
|
||||
enc_translate (const __m128i in)
|
||||
{
|
||||
// A lookup table containing the absolute offsets for all ranges:
|
||||
const __m128i lut = _mm_setr_epi8(
|
||||
65, 71, -4, -4,
|
||||
-4, -4, -4, -4,
|
||||
-4, -4, -4, -4,
|
||||
-19, -16, 0, 0
|
||||
);
|
||||
|
||||
// Translate values 0..63 to the Base64 alphabet. There are five sets:
|
||||
// # From To Abs Index Characters
|
||||
// 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
// 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
|
||||
// 2 [52..61] [48..57] -4 [2..11] 0123456789
|
||||
// 3 [62] [43] -19 12 +
|
||||
// 4 [63] [47] -16 13 /
|
||||
|
||||
// Create LUT indices from the input. The index for range #0 is right,
|
||||
// others are 1 less than expected:
|
||||
__m128i indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
|
||||
|
||||
// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
|
||||
__m128i mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
|
||||
|
||||
// Subtract -1, so add 1 to indices for range #[1..4]. All indices are
|
||||
// now correct:
|
||||
indices = _mm_sub_epi8(indices, mask);
|
||||
|
||||
// Add offsets to input values:
|
||||
return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
|
||||
}
|
||||
|
|
@ -0,0 +1,314 @@
|
|||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "libbase64.h"
|
||||
#include "codecs.h"
|
||||
#include "config.h"
|
||||
#include "env.h"
|
||||
|
||||
#if (__x86_64__ || __i386__ || _M_X86 || _M_X64)
|
||||
#define BASE64_X86
|
||||
#if (HAVE_SSSE3 || HAVE_SSE41 || HAVE_SSE42 || HAVE_AVX || HAVE_AVX2 || HAVE_AVX512)
|
||||
#define BASE64_X86_SIMD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef BASE64_X86
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
|
||||
{ \
|
||||
int info[4]; \
|
||||
__cpuidex(info, __level, __count); \
|
||||
__eax = info[0]; \
|
||||
__ebx = info[1]; \
|
||||
__ecx = info[2]; \
|
||||
__edx = info[3]; \
|
||||
}
|
||||
#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
|
||||
__cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx)
|
||||
#else
|
||||
#include <cpuid.h>
|
||||
#if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX
|
||||
#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3))
|
||||
static inline uint64_t _xgetbv (uint32_t index)
|
||||
{
|
||||
uint32_t eax, edx;
|
||||
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
||||
return ((uint64_t)edx << 32) | eax;
|
||||
}
|
||||
#else
|
||||
#error "Platform not supported"
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef bit_AVX512vl
|
||||
#define bit_AVX512vl (1 << 31)
|
||||
#endif
|
||||
#ifndef bit_AVX512vbmi
|
||||
#define bit_AVX512vbmi (1 << 1)
|
||||
#endif
|
||||
#ifndef bit_AVX2
|
||||
#define bit_AVX2 (1 << 5)
|
||||
#endif
|
||||
#ifndef bit_SSSE3
|
||||
#define bit_SSSE3 (1 << 9)
|
||||
#endif
|
||||
#ifndef bit_SSE41
|
||||
#define bit_SSE41 (1 << 19)
|
||||
#endif
|
||||
#ifndef bit_SSE42
|
||||
#define bit_SSE42 (1 << 20)
|
||||
#endif
|
||||
#ifndef bit_AVX
|
||||
#define bit_AVX (1 << 28)
|
||||
#endif
|
||||
|
||||
#define bit_XSAVE_XRSTORE (1 << 27)
|
||||
|
||||
#ifndef _XCR_XFEATURE_ENABLED_MASK
|
||||
#define _XCR_XFEATURE_ENABLED_MASK 0
|
||||
#endif
|
||||
|
||||
#define bit_XMM (1 << 1)
|
||||
#define bit_YMM (1 << 2)
|
||||
#define bit_OPMASK (1 << 5)
|
||||
#define bit_ZMM (1 << 6)
|
||||
#define bit_HIGH_ZMM (1 << 7)
|
||||
|
||||
#define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS (bit_XMM | bit_YMM)
|
||||
|
||||
#define _AVX_512_ENABLED_BY_OS (bit_XMM | bit_YMM | bit_OPMASK | bit_ZMM | bit_HIGH_ZMM)
|
||||
|
||||
#endif
|
||||
|
||||
// Function declarations:
|
||||
#define BASE64_CODEC_FUNCS(arch) \
|
||||
extern void base64_stream_encode_ ## arch BASE64_ENC_PARAMS; \
|
||||
extern int base64_stream_decode_ ## arch BASE64_DEC_PARAMS;
|
||||
|
||||
BASE64_CODEC_FUNCS(avx512)
|
||||
BASE64_CODEC_FUNCS(avx2)
|
||||
BASE64_CODEC_FUNCS(neon32)
|
||||
BASE64_CODEC_FUNCS(neon64)
|
||||
BASE64_CODEC_FUNCS(plain)
|
||||
BASE64_CODEC_FUNCS(ssse3)
|
||||
BASE64_CODEC_FUNCS(sse41)
|
||||
BASE64_CODEC_FUNCS(sse42)
|
||||
BASE64_CODEC_FUNCS(avx)
|
||||
|
||||
static bool
|
||||
codec_choose_forced (struct codec *codec, int flags)
|
||||
{
|
||||
// If the user wants to use a certain codec,
|
||||
// always allow it, even if the codec is a no-op.
|
||||
// For testing purposes.
|
||||
|
||||
if (!(flags & 0xFFFF)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (flags & BASE64_FORCE_AVX2) {
|
||||
codec->enc = base64_stream_encode_avx2;
|
||||
codec->dec = base64_stream_decode_avx2;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_NEON32) {
|
||||
codec->enc = base64_stream_encode_neon32;
|
||||
codec->dec = base64_stream_decode_neon32;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_NEON64) {
|
||||
codec->enc = base64_stream_encode_neon64;
|
||||
codec->dec = base64_stream_decode_neon64;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_PLAIN) {
|
||||
codec->enc = base64_stream_encode_plain;
|
||||
codec->dec = base64_stream_decode_plain;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_SSSE3) {
|
||||
codec->enc = base64_stream_encode_ssse3;
|
||||
codec->dec = base64_stream_decode_ssse3;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_SSE41) {
|
||||
codec->enc = base64_stream_encode_sse41;
|
||||
codec->dec = base64_stream_decode_sse41;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_SSE42) {
|
||||
codec->enc = base64_stream_encode_sse42;
|
||||
codec->dec = base64_stream_decode_sse42;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_AVX) {
|
||||
codec->enc = base64_stream_encode_avx;
|
||||
codec->dec = base64_stream_decode_avx;
|
||||
return true;
|
||||
}
|
||||
if (flags & BASE64_FORCE_AVX512) {
|
||||
codec->enc = base64_stream_encode_avx512;
|
||||
codec->dec = base64_stream_decode_avx512;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
codec_choose_arm (struct codec *codec)
|
||||
{
|
||||
#if HAVE_NEON64 || ((defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON32)
|
||||
|
||||
// Unfortunately there is no portable way to check for NEON
|
||||
// support at runtime from userland in the same way that x86
|
||||
// has cpuid, so just stick to the compile-time configuration:
|
||||
|
||||
#if HAVE_NEON64
|
||||
codec->enc = base64_stream_encode_neon64;
|
||||
codec->dec = base64_stream_decode_neon64;
|
||||
#else
|
||||
codec->enc = base64_stream_encode_neon32;
|
||||
codec->dec = base64_stream_decode_neon32;
|
||||
#endif
|
||||
|
||||
return true;
|
||||
|
||||
#else
|
||||
(void)codec;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool
|
||||
codec_choose_x86 (struct codec *codec)
|
||||
{
|
||||
#ifdef BASE64_X86_SIMD
|
||||
|
||||
unsigned int eax, ebx = 0, ecx = 0, edx;
|
||||
unsigned int max_level;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
int info[4];
|
||||
__cpuidex(info, 0, 0);
|
||||
max_level = info[0];
|
||||
#else
|
||||
max_level = __get_cpuid_max(0, NULL);
|
||||
#endif
|
||||
|
||||
#if HAVE_AVX512 || HAVE_AVX2 || HAVE_AVX
|
||||
// Check for AVX/AVX2/AVX512 support:
|
||||
// Checking for AVX requires 3 things:
|
||||
// 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions
|
||||
// (allowing saving YMM registers on context switch)
|
||||
// 2) CPUID indicates support for AVX
|
||||
// 3) XGETBV indicates the AVX registers will be saved and restored on
|
||||
// context switch
|
||||
//
|
||||
// Note that XGETBV is only available on 686 or later CPUs, so the
|
||||
// instruction needs to be conditionally run.
|
||||
if (max_level >= 1) {
|
||||
__cpuid_count(1, 0, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_XSAVE_XRSTORE) {
|
||||
uint64_t xcr_mask;
|
||||
xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
|
||||
if ((xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) == _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) { // check multiple bits at once
|
||||
#if HAVE_AVX512
|
||||
if (max_level >= 7 && ((xcr_mask & _AVX_512_ENABLED_BY_OS) == _AVX_512_ENABLED_BY_OS)) {
|
||||
__cpuid_count(7, 0, eax, ebx, ecx, edx);
|
||||
if ((ebx & bit_AVX512vl) && (ecx & bit_AVX512vbmi)) {
|
||||
codec->enc = base64_stream_encode_avx512;
|
||||
codec->dec = base64_stream_decode_avx512;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if HAVE_AVX2
|
||||
if (max_level >= 7) {
|
||||
__cpuid_count(7, 0, eax, ebx, ecx, edx);
|
||||
if (ebx & bit_AVX2) {
|
||||
codec->enc = base64_stream_encode_avx2;
|
||||
codec->dec = base64_stream_decode_avx2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if HAVE_AVX
|
||||
__cpuid_count(1, 0, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_AVX) {
|
||||
codec->enc = base64_stream_encode_avx;
|
||||
codec->dec = base64_stream_decode_avx;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE42
|
||||
// Check for SSE42 support:
|
||||
if (max_level >= 1) {
|
||||
__cpuid(1, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_SSE42) {
|
||||
codec->enc = base64_stream_encode_sse42;
|
||||
codec->dec = base64_stream_decode_sse42;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE41
|
||||
// Check for SSE41 support:
|
||||
if (max_level >= 1) {
|
||||
__cpuid(1, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_SSE41) {
|
||||
codec->enc = base64_stream_encode_sse41;
|
||||
codec->dec = base64_stream_decode_sse41;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3
|
||||
// Check for SSSE3 support:
|
||||
if (max_level >= 1) {
|
||||
__cpuid(1, eax, ebx, ecx, edx);
|
||||
if (ecx & bit_SSSE3) {
|
||||
codec->enc = base64_stream_encode_ssse3;
|
||||
codec->dec = base64_stream_decode_ssse3;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
(void)codec;
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
codec_choose (struct codec *codec, int flags)
|
||||
{
|
||||
// User forced a codec:
|
||||
if (codec_choose_forced(codec, flags)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Runtime feature detection:
|
||||
if (codec_choose_arm(codec)) {
|
||||
return;
|
||||
}
|
||||
if (codec_choose_x86(codec)) {
|
||||
return;
|
||||
}
|
||||
codec->enc = base64_stream_encode_plain;
|
||||
codec->dec = base64_stream_decode_plain;
|
||||
}
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
#include "libbase64.h"
|
||||
|
||||
// Function parameters for encoding functions:
|
||||
#define BASE64_ENC_PARAMS \
|
||||
( struct base64_state *state \
|
||||
, const char *src \
|
||||
, size_t srclen \
|
||||
, char *out \
|
||||
, size_t *outlen \
|
||||
)
|
||||
|
||||
// Function parameters for decoding functions:
|
||||
#define BASE64_DEC_PARAMS \
|
||||
( struct base64_state *state \
|
||||
, const char *src \
|
||||
, size_t srclen \
|
||||
, char *out \
|
||||
, size_t *outlen \
|
||||
)
|
||||
|
||||
// This function is used as a stub when a certain encoder is not compiled in.
|
||||
// It discards the inputs and returns zero output bytes.
|
||||
static inline void
|
||||
base64_enc_stub BASE64_ENC_PARAMS
|
||||
{
|
||||
(void) state;
|
||||
(void) src;
|
||||
(void) srclen;
|
||||
(void) out;
|
||||
|
||||
*outlen = 0;
|
||||
}
|
||||
|
||||
// This function is used as a stub when a certain decoder is not compiled in.
|
||||
// It discards the inputs and returns an invalid decoding result.
|
||||
static inline int
|
||||
base64_dec_stub BASE64_DEC_PARAMS
|
||||
{
|
||||
(void) state;
|
||||
(void) src;
|
||||
(void) srclen;
|
||||
(void) out;
|
||||
(void) outlen;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
typedef void (* base64_enc_fn) BASE64_ENC_PARAMS;
|
||||
typedef int (* base64_dec_fn) BASE64_DEC_PARAMS;
|
||||
|
||||
struct codec
|
||||
{
|
||||
base64_enc_fn enc;
|
||||
base64_dec_fn dec;
|
||||
};
|
||||
|
||||
extern void codec_choose (struct codec *, int flags);
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
#ifndef BASE64_CONFIG_H
|
||||
#define BASE64_CONFIG_H
|
||||
|
||||
#if !defined(__APPLE__) && ((defined(__x86_64__) && defined(__LP64__)) || defined(_M_X64))
|
||||
#define HAVE_SSSE3 1
|
||||
#define HAVE_SSE41 1
|
||||
#define HAVE_SSE42 1
|
||||
#define HAVE_AVX 1
|
||||
#define HAVE_AVX2 1
|
||||
#define HAVE_AVX512 0
|
||||
#elif (defined(__APPLE__) && defined(__aarch64__))
|
||||
#define HAVE_NEON64 1
|
||||
#elif (defined(__wasm__) && defined(__wasm_simd128__))
|
||||
#include "emscripten/version.h"
|
||||
#if __EMSCRIPTEN_major__ == 3
|
||||
#define HAVE_NEON32 1
|
||||
#elif __EMSCRIPTEN_major__ > 3
|
||||
#define HAVE_NEON64 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif // BASE64_CONFIG_H
|
||||
84
venv/lib/python3.11/site-packages/mypyc/lib-rt/base64/env.h
Normal file
84
venv/lib/python3.11/site-packages/mypyc/lib-rt/base64/env.h
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
#ifndef BASE64_ENV_H
|
||||
#define BASE64_ENV_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// This header file contains macro definitions that describe certain aspects of
|
||||
// the compile-time environment. Compatibility and portability macros go here.
|
||||
|
||||
// Define machine endianness. This is for GCC:
|
||||
#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
# define BASE64_LITTLE_ENDIAN 1
|
||||
#else
|
||||
# define BASE64_LITTLE_ENDIAN 0
|
||||
#endif
|
||||
|
||||
// This is for Clang:
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
# define BASE64_LITTLE_ENDIAN 1
|
||||
#endif
|
||||
|
||||
#ifdef __BIG_ENDIAN__
|
||||
# define BASE64_LITTLE_ENDIAN 0
|
||||
#endif
|
||||
|
||||
// MSVC++ needs intrin.h for _byteswap_uint64 (issue #68):
|
||||
#if BASE64_LITTLE_ENDIAN && defined(_MSC_VER)
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
// Endian conversion functions:
|
||||
#if BASE64_LITTLE_ENDIAN
|
||||
# ifdef _MSC_VER
|
||||
// Microsoft Visual C++:
|
||||
# define BASE64_HTOBE32(x) _byteswap_ulong(x)
|
||||
# define BASE64_HTOBE64(x) _byteswap_uint64(x)
|
||||
# else
|
||||
// GCC and Clang:
|
||||
# define BASE64_HTOBE32(x) __builtin_bswap32(x)
|
||||
# define BASE64_HTOBE64(x) __builtin_bswap64(x)
|
||||
# endif
|
||||
#else
|
||||
// No conversion needed:
|
||||
# define BASE64_HTOBE32(x) (x)
|
||||
# define BASE64_HTOBE64(x) (x)
|
||||
#endif
|
||||
|
||||
// Detect word size:
|
||||
#if defined (__x86_64__)
|
||||
// This also works for the x32 ABI, which has a 64-bit word size.
|
||||
# define BASE64_WORDSIZE 64
|
||||
#elif SIZE_MAX == UINT32_MAX
|
||||
# define BASE64_WORDSIZE 32
|
||||
#elif SIZE_MAX == UINT64_MAX
|
||||
# define BASE64_WORDSIZE 64
|
||||
#else
|
||||
# error BASE64_WORDSIZE_NOT_DEFINED
|
||||
#endif
|
||||
|
||||
// End-of-file definitions.
|
||||
// Almost end-of-file when waiting for the last '=' character:
|
||||
#define BASE64_AEOF 1
|
||||
// End-of-file when stream end has been reached or invalid input provided:
|
||||
#define BASE64_EOF 2
|
||||
|
||||
// GCC 7 defaults to issuing a warning for fallthrough in switch statements,
|
||||
// unless the fallthrough cases are marked with an attribute. As we use
|
||||
// fallthrough deliberately, define an alias for the attribute:
|
||||
#if __GNUC__ >= 7
|
||||
# define BASE64_FALLTHROUGH __attribute__((fallthrough));
|
||||
#else
|
||||
# define BASE64_FALLTHROUGH
|
||||
#endif
|
||||
|
||||
// Declare macros to ensure that functions that are intended to be inlined, are
|
||||
// actually inlined, even when no optimization is applied. A lot of inner loop
|
||||
// code is factored into separate functions for reasons of readability, but
|
||||
// that code should always be inlined (and optimized) in the main loop.
|
||||
#ifdef _MSC_VER
|
||||
# define BASE64_FORCE_INLINE __forceinline
|
||||
#else
|
||||
# define BASE64_FORCE_INLINE inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
#endif // BASE64_ENV_H
|
||||
164
venv/lib/python3.11/site-packages/mypyc/lib-rt/base64/lib.c
Normal file
164
venv/lib/python3.11/site-packages/mypyc/lib-rt/base64/lib.c
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include "libbase64.h"
|
||||
#include "tables/tables.h"
|
||||
#include "codecs.h"
|
||||
#include "env.h"
|
||||
|
||||
// These static function pointers are initialized once when the library is
|
||||
// first used, and remain in use for the remaining lifetime of the program.
|
||||
// The idea being that CPU features don't change at runtime.
|
||||
static struct codec codec = { NULL, NULL };
|
||||
|
||||
void
|
||||
base64_stream_encode_init (struct base64_state *state, int flags)
|
||||
{
|
||||
// If any of the codec flags are set, redo choice:
|
||||
if (codec.enc == NULL || flags & 0xFF) {
|
||||
codec_choose(&codec, flags);
|
||||
}
|
||||
state->eof = 0;
|
||||
state->bytes = 0;
|
||||
state->carry = 0;
|
||||
state->flags = flags;
|
||||
}
|
||||
|
||||
void
|
||||
base64_stream_encode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
)
|
||||
{
|
||||
codec.enc(state, src, srclen, out, outlen);
|
||||
}
|
||||
|
||||
void
|
||||
base64_stream_encode_final
|
||||
( struct base64_state *state
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
)
|
||||
{
|
||||
uint8_t *o = (uint8_t *)out;
|
||||
|
||||
if (state->bytes == 1) {
|
||||
*o++ = base64_table_enc_6bit[state->carry];
|
||||
*o++ = '=';
|
||||
*o++ = '=';
|
||||
*outlen = 3;
|
||||
return;
|
||||
}
|
||||
if (state->bytes == 2) {
|
||||
*o++ = base64_table_enc_6bit[state->carry];
|
||||
*o++ = '=';
|
||||
*outlen = 2;
|
||||
return;
|
||||
}
|
||||
*outlen = 0;
|
||||
}
|
||||
|
||||
void
|
||||
base64_stream_decode_init (struct base64_state *state, int flags)
|
||||
{
|
||||
// If any of the codec flags are set, redo choice:
|
||||
if (codec.dec == NULL || flags & 0xFFFF) {
|
||||
codec_choose(&codec, flags);
|
||||
}
|
||||
state->eof = 0;
|
||||
state->bytes = 0;
|
||||
state->carry = 0;
|
||||
state->flags = flags;
|
||||
}
|
||||
|
||||
int
|
||||
base64_stream_decode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
)
|
||||
{
|
||||
return codec.dec(state, src, srclen, out, outlen);
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
|
||||
// Due to the overhead of initializing OpenMP and creating a team of
|
||||
// threads, we require the data length to be larger than a threshold:
|
||||
#define OMP_THRESHOLD 20000
|
||||
|
||||
// Conditionally include OpenMP-accelerated codec implementations:
|
||||
#include "lib_openmp.c"
|
||||
#endif
|
||||
|
||||
void
|
||||
base64_encode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
)
|
||||
{
|
||||
size_t s;
|
||||
size_t t;
|
||||
struct base64_state state;
|
||||
|
||||
#ifdef _OPENMP
|
||||
if (srclen >= OMP_THRESHOLD) {
|
||||
base64_encode_openmp(src, srclen, out, outlen, flags);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Init the stream reader:
|
||||
base64_stream_encode_init(&state, flags);
|
||||
|
||||
// Feed the whole string to the stream reader:
|
||||
base64_stream_encode(&state, src, srclen, out, &s);
|
||||
|
||||
// Finalize the stream by writing trailer if any:
|
||||
base64_stream_encode_final(&state, out + s, &t);
|
||||
|
||||
// Final output length is stream length plus tail:
|
||||
*outlen = s + t;
|
||||
}
|
||||
|
||||
int
|
||||
base64_decode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
)
|
||||
{
|
||||
int ret;
|
||||
struct base64_state state;
|
||||
|
||||
#ifdef _OPENMP
|
||||
if (srclen >= OMP_THRESHOLD) {
|
||||
return base64_decode_openmp(src, srclen, out, outlen, flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Init the stream reader:
|
||||
base64_stream_decode_init(&state, flags);
|
||||
|
||||
// Feed the whole string to the stream reader:
|
||||
ret = base64_stream_decode(&state, src, srclen, out, outlen);
|
||||
|
||||
// If when decoding a whole block, we're still waiting for input then fail:
|
||||
if (ret && (state.bytes == 0)) {
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,149 @@
|
|||
// This code makes some assumptions on the implementation of
|
||||
// base64_stream_encode_init(), base64_stream_encode() and base64_stream_decode().
|
||||
// Basically these assumptions boil down to that when breaking the src into
|
||||
// parts, out parts can be written without side effects.
|
||||
// This is met when:
|
||||
// 1) base64_stream_encode() and base64_stream_decode() don't use globals;
|
||||
// 2) the shared variables src and out are not read or written outside of the
|
||||
// bounds of their parts, i.e. when base64_stream_encode() reads a multiple
|
||||
// of 3 bytes, it must write no more then a multiple of 4 bytes, not even
|
||||
// temporarily;
|
||||
// 3) the state flag can be discarded after base64_stream_encode() and
|
||||
// base64_stream_decode() on the parts.
|
||||
|
||||
static inline void
|
||||
base64_encode_openmp
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
)
|
||||
{
|
||||
size_t s;
|
||||
size_t t;
|
||||
size_t sum = 0, len, last_len;
|
||||
struct base64_state state, initial_state;
|
||||
int num_threads, i;
|
||||
|
||||
// Request a number of threads but not necessarily get them:
|
||||
#pragma omp parallel
|
||||
{
|
||||
// Get the number of threads used from one thread only,
|
||||
// as num_threads is a shared var:
|
||||
#pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
|
||||
// Split the input string into num_threads parts, each
|
||||
// part a multiple of 3 bytes. The remaining bytes will
|
||||
// be done later:
|
||||
len = srclen / (num_threads * 3);
|
||||
len *= 3;
|
||||
last_len = srclen - num_threads * len;
|
||||
|
||||
// Init the stream reader:
|
||||
base64_stream_encode_init(&state, flags);
|
||||
initial_state = state;
|
||||
}
|
||||
|
||||
// Single has an implicit barrier for all threads to wait here
|
||||
// for the above to complete:
|
||||
#pragma omp for firstprivate(state) private(s) reduction(+:sum) schedule(static,1)
|
||||
for (i = 0; i < num_threads; i++)
|
||||
{
|
||||
// Feed each part of the string to the stream reader:
|
||||
base64_stream_encode(&state, src + i * len, len, out + i * len * 4 / 3, &s);
|
||||
sum += s;
|
||||
}
|
||||
}
|
||||
|
||||
// As encoding should never fail and we encode an exact multiple
|
||||
// of 3 bytes, we can discard state:
|
||||
state = initial_state;
|
||||
|
||||
// Encode the remaining bytes:
|
||||
base64_stream_encode(&state, src + num_threads * len, last_len, out + num_threads * len * 4 / 3, &s);
|
||||
|
||||
// Finalize the stream by writing trailer if any:
|
||||
base64_stream_encode_final(&state, out + num_threads * len * 4 / 3 + s, &t);
|
||||
|
||||
// Final output length is stream length plus tail:
|
||||
sum += s + t;
|
||||
*outlen = sum;
|
||||
}
|
||||
|
||||
static inline int
|
||||
base64_decode_openmp
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
)
|
||||
{
|
||||
int num_threads, result = 0, i;
|
||||
size_t sum = 0, len, last_len, s;
|
||||
struct base64_state state, initial_state;
|
||||
|
||||
// Request a number of threads but not necessarily get them:
|
||||
#pragma omp parallel
|
||||
{
|
||||
// Get the number of threads used from one thread only,
|
||||
// as num_threads is a shared var:
|
||||
#pragma omp single
|
||||
{
|
||||
num_threads = omp_get_num_threads();
|
||||
|
||||
// Split the input string into num_threads parts, each
|
||||
// part a multiple of 4 bytes. The remaining bytes will
|
||||
// be done later:
|
||||
len = srclen / (num_threads * 4);
|
||||
len *= 4;
|
||||
last_len = srclen - num_threads * len;
|
||||
|
||||
// Init the stream reader:
|
||||
base64_stream_decode_init(&state, flags);
|
||||
|
||||
initial_state = state;
|
||||
}
|
||||
|
||||
// Single has an implicit barrier to wait here for the above to
|
||||
// complete:
|
||||
#pragma omp for firstprivate(state) private(s) reduction(+:sum, result) schedule(static,1)
|
||||
for (i = 0; i < num_threads; i++)
|
||||
{
|
||||
int this_result;
|
||||
|
||||
// Feed each part of the string to the stream reader:
|
||||
this_result = base64_stream_decode(&state, src + i * len, len, out + i * len * 3 / 4, &s);
|
||||
sum += s;
|
||||
result += this_result;
|
||||
}
|
||||
}
|
||||
|
||||
// If `result' equals `-num_threads', then all threads returned -1,
|
||||
// indicating that the requested codec is not available:
|
||||
if (result == -num_threads) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// If `result' does not equal `num_threads', then at least one of the
|
||||
// threads hit a decode error:
|
||||
if (result != num_threads) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// So far so good, now decode whatever remains in the buffer. Reuse the
|
||||
// initial state, since we are at a 4-byte boundary:
|
||||
state = initial_state;
|
||||
result = base64_stream_decode(&state, src + num_threads * len, last_len, out + num_threads * len * 3 / 4, &s);
|
||||
sum += s;
|
||||
*outlen = sum;
|
||||
|
||||
// If when decoding a whole block, we're still waiting for input then fail:
|
||||
if (result && (state.bytes == 0)) {
|
||||
return result;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,146 @@
|
|||
#ifndef LIBBASE64_H
|
||||
#define LIBBASE64_H
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
#define BASE64_SYMBOL_IMPORT __declspec(dllimport)
|
||||
#define BASE64_SYMBOL_EXPORT __declspec(dllexport)
|
||||
#define BASE64_SYMBOL_PRIVATE
|
||||
|
||||
#elif __GNUC__ >= 4
|
||||
#define BASE64_SYMBOL_IMPORT __attribute__ ((visibility ("default")))
|
||||
#define BASE64_SYMBOL_EXPORT __attribute__ ((visibility ("default")))
|
||||
#define BASE64_SYMBOL_PRIVATE __attribute__ ((visibility ("hidden")))
|
||||
|
||||
#else
|
||||
#define BASE64_SYMBOL_IMPORT
|
||||
#define BASE64_SYMBOL_EXPORT
|
||||
#define BASE64_SYMBOL_PRIVATE
|
||||
#endif
|
||||
|
||||
#if defined(BASE64_STATIC_DEFINE)
|
||||
#define BASE64_EXPORT
|
||||
#define BASE64_NO_EXPORT
|
||||
|
||||
#else
|
||||
#if defined(BASE64_EXPORTS) // defined if we are building the shared library
|
||||
#define BASE64_EXPORT BASE64_SYMBOL_EXPORT
|
||||
|
||||
#else
|
||||
#define BASE64_EXPORT BASE64_SYMBOL_IMPORT
|
||||
#endif
|
||||
|
||||
#define BASE64_NO_EXPORT BASE64_SYMBOL_PRIVATE
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* These are the flags that can be passed in the `flags` argument. The values
|
||||
* below force the use of a given codec, even if that codec is a no-op in the
|
||||
* current build. Used in testing. Set to 0 for the default behavior, which is
|
||||
* runtime feature detection on x86, a compile-time fixed codec on ARM, and
|
||||
* the plain codec on other platforms: */
|
||||
#define BASE64_FORCE_AVX2 (1 << 0)
|
||||
#define BASE64_FORCE_NEON32 (1 << 1)
|
||||
#define BASE64_FORCE_NEON64 (1 << 2)
|
||||
#define BASE64_FORCE_PLAIN (1 << 3)
|
||||
#define BASE64_FORCE_SSSE3 (1 << 4)
|
||||
#define BASE64_FORCE_SSE41 (1 << 5)
|
||||
#define BASE64_FORCE_SSE42 (1 << 6)
|
||||
#define BASE64_FORCE_AVX (1 << 7)
|
||||
#define BASE64_FORCE_AVX512 (1 << 8)
|
||||
|
||||
struct base64_state {
|
||||
int eof;
|
||||
int bytes;
|
||||
int flags;
|
||||
unsigned char carry;
|
||||
};
|
||||
|
||||
/* Wrapper function to encode a plain string of given length. Output is written
|
||||
* to *out without trailing zero. Output length in bytes is written to *outlen.
|
||||
* The buffer in `out` has been allocated by the caller and is at least 4/3 the
|
||||
* size of the input. See above for `flags`; set to 0 for default operation: */
|
||||
void BASE64_EXPORT base64_encode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
) ;
|
||||
|
||||
/* Call this before calling base64_stream_encode() to init the state. See above
|
||||
* for `flags`; set to 0 for default operation: */
|
||||
void BASE64_EXPORT base64_stream_encode_init
|
||||
( struct base64_state *state
|
||||
, int flags
|
||||
) ;
|
||||
|
||||
/* Encodes the block of data of given length at `src`, into the buffer at
|
||||
* `out`. Caller is responsible for allocating a large enough out-buffer; it
|
||||
* must be at least 4/3 the size of the in-buffer, but take some margin. Places
|
||||
* the number of new bytes written into `outlen` (which is set to zero when the
|
||||
* function starts). Does not zero-terminate or finalize the output. */
|
||||
void BASE64_EXPORT base64_stream_encode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
|
||||
/* Finalizes the output begun by previous calls to `base64_stream_encode()`.
|
||||
* Adds the required end-of-stream markers if appropriate. `outlen` is modified
|
||||
* and will contain the number of new bytes written at `out` (which will quite
|
||||
* often be zero). */
|
||||
void BASE64_EXPORT base64_stream_encode_final
|
||||
( struct base64_state *state
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
|
||||
/* Wrapper function to decode a plain string of given length. Output is written
|
||||
* to *out without trailing zero. Output length in bytes is written to *outlen.
|
||||
* The buffer in `out` has been allocated by the caller and is at least 3/4 the
|
||||
* size of the input. See above for `flags`, set to 0 for default operation: */
|
||||
int BASE64_EXPORT base64_decode
|
||||
( const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
, int flags
|
||||
) ;
|
||||
|
||||
/* Call this before calling base64_stream_decode() to init the state. See above
|
||||
* for `flags`; set to 0 for default operation: */
|
||||
void BASE64_EXPORT base64_stream_decode_init
|
||||
( struct base64_state *state
|
||||
, int flags
|
||||
) ;
|
||||
|
||||
/* Decodes the block of data of given length at `src`, into the buffer at
|
||||
* `out`. Caller is responsible for allocating a large enough out-buffer; it
|
||||
* must be at least 3/4 the size of the in-buffer, but take some margin. Places
|
||||
* the number of new bytes written into `outlen` (which is set to zero when the
|
||||
* function starts). Does not zero-terminate the output. Returns 1 if all is
|
||||
* well, and 0 if a decoding error was found, such as an invalid character.
|
||||
* Returns -1 if the chosen codec is not included in the current build. Used by
|
||||
* the test harness to check whether a codec is available for testing. */
|
||||
int BASE64_EXPORT base64_stream_decode
|
||||
( struct base64_state *state
|
||||
, const char *src
|
||||
, size_t srclen
|
||||
, char *out
|
||||
, size_t *outlen
|
||||
) ;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* LIBBASE64_H */
|
||||
|
|
@ -0,0 +1,387 @@
|
|||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define BASE64_EXPORTS
|
||||
|
||||
#include "librt_base64.h"
|
||||
#include "libbase64.h"
|
||||
#include "pythoncapi_compat.h"
|
||||
|
||||
static PyObject *
|
||||
b64decode_handle_invalid_input(
|
||||
PyObject *out_bytes, char *outbuf, size_t max_out, const char *src, size_t srclen, bool freesrc);
|
||||
|
||||
#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
|
||||
|
||||
#define STACK_BUFFER_SIZE 1024
|
||||
|
||||
static void
|
||||
convert_encoded_to_urlsafe(char *buf, size_t len) {
|
||||
// The loop is written to enable SIMD optimizations
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
char ch = buf[i];
|
||||
if (ch == '+') {
|
||||
ch = '-';
|
||||
} else if (ch == '/') {
|
||||
ch = '_';
|
||||
}
|
||||
buf[i] = ch;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
convert_urlsafe_to_encoded(const char *src, size_t len, char *buf) {
|
||||
// The loop is written to enable SIMD optimizations
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
char ch = src[i];
|
||||
if (ch == '-') {
|
||||
ch = '+';
|
||||
} else if (ch == '_') {
|
||||
ch = '/';
|
||||
}
|
||||
buf[i] = ch;
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
b64encode_internal(PyObject *obj, bool urlsafe) {
|
||||
unsigned char *ascii_data;
|
||||
char *bin_data;
|
||||
int leftbits = 0;
|
||||
unsigned char this_ch;
|
||||
unsigned int leftchar = 0;
|
||||
Py_ssize_t bin_len, out_len;
|
||||
PyBytesWriter *writer;
|
||||
int newline = 0; // TODO
|
||||
|
||||
if (!PyBytes_Check(obj)) {
|
||||
PyErr_SetString(PyExc_TypeError, "base64() expects a bytes object");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bin_data = PyBytes_AS_STRING(obj);
|
||||
bin_len = PyBytes_GET_SIZE(obj);
|
||||
assert(bin_len >= 0);
|
||||
|
||||
if (bin_len > BASE64_MAXBIN) {
|
||||
PyErr_SetString(PyExc_ValueError, "Too much data for base64 line");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_ssize_t buflen = 4 * bin_len / 3 + 4;
|
||||
char *buf;
|
||||
char stack_buf[STACK_BUFFER_SIZE];
|
||||
if (buflen <= STACK_BUFFER_SIZE) {
|
||||
buf = stack_buf;
|
||||
} else {
|
||||
buf = PyMem_Malloc(buflen);
|
||||
if (buf == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
}
|
||||
size_t actual_len;
|
||||
base64_encode(bin_data, bin_len, buf, &actual_len, 0);
|
||||
|
||||
if (urlsafe) {
|
||||
convert_encoded_to_urlsafe(buf, actual_len);
|
||||
}
|
||||
|
||||
PyObject *res = PyBytes_FromStringAndSize(buf, actual_len);
|
||||
if (buflen > STACK_BUFFER_SIZE)
|
||||
PyMem_Free(buf);
|
||||
return res;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
b64encode(PyObject *self, PyObject *const *args, size_t nargs) {
|
||||
if (nargs != 1) {
|
||||
PyErr_SetString(PyExc_TypeError, "b64encode() takes exactly one argument");
|
||||
return 0;
|
||||
}
|
||||
return b64encode_internal(args[0], false);
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
urlsafe_b64encode(PyObject *self, PyObject *const *args, size_t nargs) {
|
||||
if (nargs != 1) {
|
||||
PyErr_SetString(PyExc_TypeError, "urlsafe_b64encode() takes exactly one argument");
|
||||
return 0;
|
||||
}
|
||||
return b64encode_internal(args[0], true);
|
||||
}
|
||||
|
||||
static inline int
|
||||
is_valid_base64_char(char c, bool allow_padding) {
|
||||
return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
|
||||
(c >= '0' && c <= '9') || (c == '+') || (c == '/') || (allow_padding && c == '='));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
b64decode_internal(PyObject *arg, bool urlsafe) {
|
||||
const char *src;
|
||||
Py_ssize_t srclen_ssz;
|
||||
|
||||
// Get input pointer and length
|
||||
if (PyBytes_Check(arg)) {
|
||||
src = PyBytes_AS_STRING(arg);
|
||||
srclen_ssz = PyBytes_GET_SIZE(arg);
|
||||
} else if (PyUnicode_Check(arg)) {
|
||||
if (!PyUnicode_IS_ASCII(arg)) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"string argument should contain only ASCII characters");
|
||||
return NULL;
|
||||
}
|
||||
src = (const char *)PyUnicode_1BYTE_DATA(arg);
|
||||
srclen_ssz = PyUnicode_GET_LENGTH(arg);
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"argument should be a bytes-like object or ASCII string");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Fast-path: empty input
|
||||
if (srclen_ssz == 0) {
|
||||
return PyBytes_FromStringAndSize(NULL, 0);
|
||||
}
|
||||
|
||||
if (urlsafe) {
|
||||
char *new_src = PyMem_Malloc(srclen_ssz + 1);
|
||||
if (new_src == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
convert_urlsafe_to_encoded(src, srclen_ssz, new_src);
|
||||
src = new_src;
|
||||
}
|
||||
|
||||
// Quickly ignore invalid characters at the end. Other invalid characters
|
||||
// are also accepted, but they need a slow path.
|
||||
while (srclen_ssz > 0 && !is_valid_base64_char(src[srclen_ssz - 1], true)) {
|
||||
srclen_ssz--;
|
||||
}
|
||||
|
||||
// Compute an output capacity that's at least 3/4 of input, without overflow:
|
||||
// ceil(3/4 * N) == N - floor(N/4)
|
||||
size_t srclen = (size_t)srclen_ssz;
|
||||
size_t max_out = srclen - (srclen / 4);
|
||||
if (max_out == 0) {
|
||||
max_out = 1; // defensive (srclen > 0 implies >= 1 anyway)
|
||||
}
|
||||
if (max_out > (size_t)PY_SSIZE_T_MAX) {
|
||||
PyErr_SetString(PyExc_OverflowError, "input too large");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Allocate output bytes (uninitialized) of the max capacity
|
||||
PyObject *out_bytes = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)max_out);
|
||||
if (out_bytes == NULL) {
|
||||
if (urlsafe) {
|
||||
PyMem_Free((void *)src);
|
||||
}
|
||||
return NULL; // Propagate memory error
|
||||
}
|
||||
|
||||
char *outbuf = PyBytes_AS_STRING(out_bytes);
|
||||
size_t outlen = max_out;
|
||||
|
||||
int ret = base64_decode(src, srclen, outbuf, &outlen, 0);
|
||||
|
||||
if (ret != 1) {
|
||||
if (ret == 0) {
|
||||
// Slow path: handle non-base64 input
|
||||
return b64decode_handle_invalid_input(out_bytes, outbuf, max_out, src, srclen, urlsafe);
|
||||
}
|
||||
Py_DECREF(out_bytes);
|
||||
if (urlsafe) {
|
||||
PyMem_Free((void *)src);
|
||||
}
|
||||
if (ret == -1) {
|
||||
PyErr_SetString(PyExc_NotImplementedError, "base64 codec not available in this build");
|
||||
} else {
|
||||
PyErr_SetString(PyExc_RuntimeError, "base64_decode failed");
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (urlsafe) {
|
||||
PyMem_Free((void *)src);
|
||||
}
|
||||
|
||||
// Sanity-check contract (decoder must not overflow our buffer)
|
||||
if (outlen > max_out) {
|
||||
Py_DECREF(out_bytes);
|
||||
PyErr_SetString(PyExc_RuntimeError, "decoder wrote past output buffer");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Shrink in place to the actual decoded length
|
||||
if (_PyBytes_Resize(&out_bytes, (Py_ssize_t)outlen) < 0) {
|
||||
// _PyBytes_Resize sets an exception and may free the old object
|
||||
return NULL;
|
||||
}
|
||||
return out_bytes;
|
||||
}
|
||||
|
||||
// Process non-base64 input by ignoring non-base64 characters, for compatibility
|
||||
// with stdlib b64decode.
|
||||
static PyObject *
|
||||
b64decode_handle_invalid_input(
|
||||
PyObject *out_bytes, char *outbuf, size_t max_out, const char *src, size_t srclen, bool freesrc)
|
||||
{
|
||||
// Copy input to a temporary buffer, with non-base64 characters and extra suffix
|
||||
// characters removed
|
||||
size_t newbuf_len = 0;
|
||||
char *newbuf = PyMem_Malloc(srclen);
|
||||
if (newbuf == NULL) {
|
||||
Py_DECREF(out_bytes);
|
||||
if (freesrc) {
|
||||
PyMem_Free((void *)src);
|
||||
}
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
// Copy base64 characters and some padding to the new buffer
|
||||
for (size_t i = 0; i < srclen; i++) {
|
||||
char c = src[i];
|
||||
if (is_valid_base64_char(c, false)) {
|
||||
newbuf[newbuf_len++] = c;
|
||||
} else if (c == '=') {
|
||||
// Copy a necessary amount of padding
|
||||
int remainder = newbuf_len % 4;
|
||||
if (remainder == 0) {
|
||||
// No padding needed
|
||||
break;
|
||||
}
|
||||
int numpad = 4 - remainder;
|
||||
// Check that there is at least the required amount padding (CPython ignores
|
||||
// extra padding)
|
||||
while (numpad > 0) {
|
||||
if (i == srclen || src[i] != '=') {
|
||||
break;
|
||||
}
|
||||
newbuf[newbuf_len++] = '=';
|
||||
i++;
|
||||
numpad--;
|
||||
// Skip non-base64 alphabet characters within padding
|
||||
while (i < srclen && !is_valid_base64_char(src[i], true)) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Stdlib always performs a non-strict padding check
|
||||
if (newbuf_len % 4 != 0) {
|
||||
if (freesrc) {
|
||||
PyMem_Free((void *)src);
|
||||
}
|
||||
Py_DECREF(out_bytes);
|
||||
PyMem_Free(newbuf);
|
||||
PyErr_SetString(PyExc_ValueError, "Incorrect padding");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
size_t outlen = max_out;
|
||||
int ret = base64_decode(newbuf, newbuf_len, outbuf, &outlen, 0);
|
||||
PyMem_Free(newbuf);
|
||||
if (freesrc) {
|
||||
PyMem_Free((void *)src);
|
||||
}
|
||||
|
||||
if (ret != 1) {
|
||||
Py_DECREF(out_bytes);
|
||||
if (ret == 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "Only base64 data is allowed");
|
||||
}
|
||||
if (ret == -1) {
|
||||
PyErr_SetString(PyExc_NotImplementedError, "base64 codec not available in this build");
|
||||
} else {
|
||||
PyErr_SetString(PyExc_RuntimeError, "base64_decode failed");
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Shrink in place to the actual decoded length
|
||||
if (_PyBytes_Resize(&out_bytes, (Py_ssize_t)outlen) < 0) {
|
||||
// _PyBytes_Resize sets an exception and may free the old object
|
||||
return NULL;
|
||||
}
|
||||
return out_bytes;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
b64decode(PyObject *self, PyObject *const *args, size_t nargs) {
|
||||
if (nargs != 1) {
|
||||
PyErr_SetString(PyExc_TypeError, "b64decode() takes exactly one argument");
|
||||
return 0;
|
||||
}
|
||||
return b64decode_internal(args[0], false);
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
urlsafe_b64decode(PyObject *self, PyObject *const *args, size_t nargs) {
|
||||
if (nargs != 1) {
|
||||
PyErr_SetString(PyExc_TypeError, "urlsafe_b64decode() takes exactly one argument");
|
||||
return 0;
|
||||
}
|
||||
return b64decode_internal(args[0], true);
|
||||
}
|
||||
|
||||
static PyMethodDef librt_base64_module_methods[] = {
|
||||
{"b64encode", (PyCFunction)b64encode, METH_FASTCALL, PyDoc_STR("Encode bytes object using Base64.")},
|
||||
{"b64decode", (PyCFunction)b64decode, METH_FASTCALL, PyDoc_STR("Decode a Base64 encoded bytes object or ASCII string.")},
|
||||
{"urlsafe_b64encode", (PyCFunction)urlsafe_b64encode, METH_FASTCALL, PyDoc_STR("Encode bytes object using URL and file system safe Base64 alphabet.")},
|
||||
{"urlsafe_b64decode", (PyCFunction)urlsafe_b64decode, METH_FASTCALL, PyDoc_STR("Decode bytes or ASCII string using URL and file system safe Base64 alphabet.")},
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
||||
static int
|
||||
base64_abi_version(void) {
|
||||
return LIBRT_BASE64_ABI_VERSION;
|
||||
}
|
||||
|
||||
static int
|
||||
base64_api_version(void) {
|
||||
return LIBRT_BASE64_API_VERSION;
|
||||
}
|
||||
|
||||
static int
|
||||
librt_base64_module_exec(PyObject *m)
|
||||
{
|
||||
// Export mypy internal C API, be careful with the order!
|
||||
static void *base64_api[LIBRT_BASE64_API_LEN] = {
|
||||
(void *)base64_abi_version,
|
||||
(void *)base64_api_version,
|
||||
(void *)b64encode_internal,
|
||||
(void *)b64decode_internal,
|
||||
};
|
||||
PyObject *c_api_object = PyCapsule_New((void *)base64_api, "librt.base64._C_API", NULL);
|
||||
if (PyModule_Add(m, "_C_API", c_api_object) < 0) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static PyModuleDef_Slot librt_base64_module_slots[] = {
|
||||
{Py_mod_exec, librt_base64_module_exec},
|
||||
#ifdef Py_MOD_GIL_NOT_USED
|
||||
{Py_mod_gil, Py_MOD_GIL_NOT_USED},
|
||||
#endif
|
||||
{0, NULL}
|
||||
};
|
||||
|
||||
static PyModuleDef librt_base64_module = {
|
||||
.m_base = PyModuleDef_HEAD_INIT,
|
||||
.m_name = "base64",
|
||||
.m_doc = "Fast base64 encoding and decoding optimized for mypyc",
|
||||
.m_size = 0,
|
||||
.m_methods = librt_base64_module_methods,
|
||||
.m_slots = librt_base64_module_slots,
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC
|
||||
PyInit_base64(void)
|
||||
{
|
||||
return PyModuleDef_Init(&librt_base64_module);
|
||||
}
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
#ifndef LIBRT_BASE64_H
|
||||
#define LIBRT_BASE64_H
|
||||
|
||||
#include <Python.h>
|
||||
|
||||
#define LIBRT_BASE64_ABI_VERSION 1
|
||||
#define LIBRT_BASE64_API_VERSION 2
|
||||
#define LIBRT_BASE64_API_LEN 4
|
||||
|
||||
static void *LibRTBase64_API[LIBRT_BASE64_API_LEN];
|
||||
|
||||
#define LibRTBase64_ABIVersion (*(int (*)(void)) LibRTBase64_API[0])
|
||||
#define LibRTBase64_APIVersion (*(int (*)(void)) LibRTBase64_API[1])
|
||||
#define LibRTBase64_b64encode_internal (*(PyObject* (*)(PyObject *source, bool urlsafe)) LibRTBase64_API[2])
|
||||
#define LibRTBase64_b64decode_internal (*(PyObject* (*)(PyObject *source, bool urlsafe)) LibRTBase64_API[3])
|
||||
|
||||
static int
|
||||
import_librt_base64(void)
|
||||
{
|
||||
PyObject *mod = PyImport_ImportModule("librt.base64");
|
||||
if (mod == NULL)
|
||||
return -1;
|
||||
Py_DECREF(mod); // we import just for the side effect of making the below work.
|
||||
void *capsule = PyCapsule_Import("librt.base64._C_API", 0);
|
||||
if (capsule == NULL)
|
||||
return -1;
|
||||
memcpy(LibRTBase64_API, capsule, sizeof(LibRTBase64_API));
|
||||
if (LibRTBase64_ABIVersion() != LIBRT_BASE64_ABI_VERSION) {
|
||||
char err[128];
|
||||
snprintf(err, sizeof(err), "ABI version conflict for librt.base64, expected %d, found %d",
|
||||
LIBRT_BASE64_ABI_VERSION,
|
||||
LibRTBase64_ABIVersion()
|
||||
);
|
||||
PyErr_SetString(PyExc_ValueError, err);
|
||||
return -1;
|
||||
}
|
||||
if (LibRTBase64_APIVersion() < LIBRT_BASE64_API_VERSION) {
|
||||
char err[128];
|
||||
snprintf(err, sizeof(err),
|
||||
"API version conflict for librt.base64, expected %d or newer, found %d (hint: upgrade librt)",
|
||||
LIBRT_BASE64_API_VERSION,
|
||||
LibRTBase64_APIVersion()
|
||||
);
|
||||
PyErr_SetString(PyExc_ValueError, err);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // LIBRT_BASE64_H
|
||||
|
|
@ -0,0 +1,393 @@
|
|||
#include <stdint.h>
|
||||
#define CHAR62 '+'
|
||||
#define CHAR63 '/'
|
||||
#define CHARPAD '='
|
||||
|
||||
|
||||
#if BASE64_LITTLE_ENDIAN
|
||||
|
||||
|
||||
/* SPECIAL DECODE TABLES FOR LITTLE ENDIAN (INTEL) CPUS */
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d0[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x000000f8, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000fc,
|
||||
0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4,
|
||||
0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018,
|
||||
0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030,
|
||||
0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048,
|
||||
0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060,
|
||||
0x00000064, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078,
|
||||
0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090,
|
||||
0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8,
|
||||
0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0,
|
||||
0x000000c4, 0x000000c8, 0x000000cc, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d1[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x0000e003, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000f003,
|
||||
0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003,
|
||||
0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000,
|
||||
0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000,
|
||||
0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001,
|
||||
0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001,
|
||||
0x00009001, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001,
|
||||
0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002,
|
||||
0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002,
|
||||
0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003,
|
||||
0x00001003, 0x00002003, 0x00003003, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d2[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00800f00, 0xffffffff, 0xffffffff, 0xffffffff, 0x00c00f00,
|
||||
0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00,
|
||||
0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100,
|
||||
0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300,
|
||||
0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400,
|
||||
0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600,
|
||||
0x00400600, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700,
|
||||
0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900,
|
||||
0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00,
|
||||
0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00,
|
||||
0x00400c00, 0x00800c00, 0x00c00c00, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d3[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x003e0000, 0xffffffff, 0xffffffff, 0xffffffff, 0x003f0000,
|
||||
0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000,
|
||||
0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000,
|
||||
0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000,
|
||||
0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000,
|
||||
0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000,
|
||||
0x00190000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000,
|
||||
0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000,
|
||||
0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000,
|
||||
0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000,
|
||||
0x00310000, 0x00320000, 0x00330000, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
#else
|
||||
|
||||
|
||||
/* SPECIAL DECODE TABLES FOR BIG ENDIAN (IBM/MOTOROLA/SUN) CPUS */
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d0[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xf8000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xfc000000,
|
||||
0xd0000000, 0xd4000000, 0xd8000000, 0xdc000000, 0xe0000000, 0xe4000000,
|
||||
0xe8000000, 0xec000000, 0xf0000000, 0xf4000000, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x04000000, 0x08000000, 0x0c000000, 0x10000000, 0x14000000, 0x18000000,
|
||||
0x1c000000, 0x20000000, 0x24000000, 0x28000000, 0x2c000000, 0x30000000,
|
||||
0x34000000, 0x38000000, 0x3c000000, 0x40000000, 0x44000000, 0x48000000,
|
||||
0x4c000000, 0x50000000, 0x54000000, 0x58000000, 0x5c000000, 0x60000000,
|
||||
0x64000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x68000000, 0x6c000000, 0x70000000, 0x74000000, 0x78000000,
|
||||
0x7c000000, 0x80000000, 0x84000000, 0x88000000, 0x8c000000, 0x90000000,
|
||||
0x94000000, 0x98000000, 0x9c000000, 0xa0000000, 0xa4000000, 0xa8000000,
|
||||
0xac000000, 0xb0000000, 0xb4000000, 0xb8000000, 0xbc000000, 0xc0000000,
|
||||
0xc4000000, 0xc8000000, 0xcc000000, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d1[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x03e00000, 0xffffffff, 0xffffffff, 0xffffffff, 0x03f00000,
|
||||
0x03400000, 0x03500000, 0x03600000, 0x03700000, 0x03800000, 0x03900000,
|
||||
0x03a00000, 0x03b00000, 0x03c00000, 0x03d00000, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00100000, 0x00200000, 0x00300000, 0x00400000, 0x00500000, 0x00600000,
|
||||
0x00700000, 0x00800000, 0x00900000, 0x00a00000, 0x00b00000, 0x00c00000,
|
||||
0x00d00000, 0x00e00000, 0x00f00000, 0x01000000, 0x01100000, 0x01200000,
|
||||
0x01300000, 0x01400000, 0x01500000, 0x01600000, 0x01700000, 0x01800000,
|
||||
0x01900000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x01a00000, 0x01b00000, 0x01c00000, 0x01d00000, 0x01e00000,
|
||||
0x01f00000, 0x02000000, 0x02100000, 0x02200000, 0x02300000, 0x02400000,
|
||||
0x02500000, 0x02600000, 0x02700000, 0x02800000, 0x02900000, 0x02a00000,
|
||||
0x02b00000, 0x02c00000, 0x02d00000, 0x02e00000, 0x02f00000, 0x03000000,
|
||||
0x03100000, 0x03200000, 0x03300000, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d2[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x000f8000, 0xffffffff, 0xffffffff, 0xffffffff, 0x000fc000,
|
||||
0x000d0000, 0x000d4000, 0x000d8000, 0x000dc000, 0x000e0000, 0x000e4000,
|
||||
0x000e8000, 0x000ec000, 0x000f0000, 0x000f4000, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00004000, 0x00008000, 0x0000c000, 0x00010000, 0x00014000, 0x00018000,
|
||||
0x0001c000, 0x00020000, 0x00024000, 0x00028000, 0x0002c000, 0x00030000,
|
||||
0x00034000, 0x00038000, 0x0003c000, 0x00040000, 0x00044000, 0x00048000,
|
||||
0x0004c000, 0x00050000, 0x00054000, 0x00058000, 0x0005c000, 0x00060000,
|
||||
0x00064000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00068000, 0x0006c000, 0x00070000, 0x00074000, 0x00078000,
|
||||
0x0007c000, 0x00080000, 0x00084000, 0x00088000, 0x0008c000, 0x00090000,
|
||||
0x00094000, 0x00098000, 0x0009c000, 0x000a0000, 0x000a4000, 0x000a8000,
|
||||
0x000ac000, 0x000b0000, 0x000b4000, 0x000b8000, 0x000bc000, 0x000c0000,
|
||||
0x000c4000, 0x000c8000, 0x000cc000, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
const uint32_t base64_table_dec_32bit_d3[256] = {
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00003e00, 0xffffffff, 0xffffffff, 0xffffffff, 0x00003f00,
|
||||
0x00003400, 0x00003500, 0x00003600, 0x00003700, 0x00003800, 0x00003900,
|
||||
0x00003a00, 0x00003b00, 0x00003c00, 0x00003d00, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
|
||||
0x00000100, 0x00000200, 0x00000300, 0x00000400, 0x00000500, 0x00000600,
|
||||
0x00000700, 0x00000800, 0x00000900, 0x00000a00, 0x00000b00, 0x00000c00,
|
||||
0x00000d00, 0x00000e00, 0x00000f00, 0x00001000, 0x00001100, 0x00001200,
|
||||
0x00001300, 0x00001400, 0x00001500, 0x00001600, 0x00001700, 0x00001800,
|
||||
0x00001900, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0x00001a00, 0x00001b00, 0x00001c00, 0x00001d00, 0x00001e00,
|
||||
0x00001f00, 0x00002000, 0x00002100, 0x00002200, 0x00002300, 0x00002400,
|
||||
0x00002500, 0x00002600, 0x00002700, 0x00002800, 0x00002900, 0x00002a00,
|
||||
0x00002b00, 0x00002c00, 0x00002d00, 0x00002e00, 0x00002f00, 0x00003000,
|
||||
0x00003100, 0x00003200, 0x00003300, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,40 @@
|
|||
#include "tables.h"
|
||||
|
||||
const uint8_t
|
||||
base64_table_enc_6bit[] =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"0123456789"
|
||||
"+/";
|
||||
|
||||
// In the lookup table below, note that the value for '=' (character 61) is
|
||||
// 254, not 255. This character is used for in-band signaling of the end of
|
||||
// the datastream, and we will use that later. The characters A-Z, a-z, 0-9
|
||||
// and + / are mapped to their "decoded" values. The other bytes all map to
|
||||
// the value 255, which flags them as "invalid input".
|
||||
|
||||
const uint8_t
|
||||
base64_table_dec_8bit[] =
|
||||
{
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, // 32..47
|
||||
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63
|
||||
255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, // 80..95
|
||||
255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
};
|
||||
|
||||
#if BASE64_WORDSIZE >= 32
|
||||
# include "table_dec_32bit.h"
|
||||
# include "table_enc_12bit.h"
|
||||
#endif
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#ifndef BASE64_TABLES_H
|
||||
#define BASE64_TABLES_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../env.h"
|
||||
|
||||
// These tables are used by all codecs for fallback plain encoding/decoding:
|
||||
extern const uint8_t base64_table_enc_6bit[];
|
||||
extern const uint8_t base64_table_dec_8bit[];
|
||||
|
||||
// These tables are used for the 32-bit and 64-bit generic decoders:
|
||||
#if BASE64_WORDSIZE >= 32
|
||||
extern const uint32_t base64_table_dec_32bit_d0[];
|
||||
extern const uint32_t base64_table_dec_32bit_d1[];
|
||||
extern const uint32_t base64_table_dec_32bit_d2[];
|
||||
extern const uint32_t base64_table_dec_32bit_d3[];
|
||||
|
||||
// This table is used by the 32 and 64-bit generic encoders:
|
||||
extern const uint16_t base64_table_enc_12bit[];
|
||||
#endif
|
||||
|
||||
#endif // BASE64_TABLES_H
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
# This file must have the same content for mypyc/build_setup.py and lib-rt/build_setup.py,
|
||||
# it exists to work around absence of support for per-file compile flags in setuptools.
|
||||
# The version in mypyc/ is the source of truth, and should be copied to lib-rt if modified.
|
||||
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
|
||||
try:
|
||||
# Import setuptools so that it monkey-patch overrides distutils
|
||||
import setuptools # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
if sys.version_info >= (3, 12):
|
||||
# From setuptools' monkeypatch
|
||||
from distutils import ccompiler # type: ignore[import-not-found]
|
||||
else:
|
||||
from distutils import ccompiler
|
||||
|
||||
EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = {
|
||||
"msvc": {
|
||||
"base64/arch/sse42": ["/arch:SSE4.2"],
|
||||
"base64/arch/avx2": ["/arch:AVX2"],
|
||||
"base64/arch/avx": ["/arch:AVX"],
|
||||
}
|
||||
}
|
||||
|
||||
ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined]
|
||||
X86_64 = platform.machine() in ("x86_64", "AMD64", "amd64")
|
||||
PYODIDE = "PYODIDE" in os.environ
|
||||
NO_EXTRA_FLAGS = "MYPYC_NO_EXTRA_FLAGS" in os.environ
|
||||
|
||||
|
||||
def spawn(self, cmd, **kwargs) -> None: # type: ignore[no-untyped-def]
|
||||
new_cmd = list(cmd)
|
||||
if PYODIDE:
|
||||
for argument in reversed(new_cmd):
|
||||
if not str(argument).endswith(".c"):
|
||||
continue
|
||||
if "base64/arch/" in str(argument):
|
||||
new_cmd.extend(["-msimd128"])
|
||||
elif not NO_EXTRA_FLAGS:
|
||||
compiler_type: str = self.compiler_type
|
||||
extra_options = EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT.get(compiler_type, None)
|
||||
if X86_64 and extra_options is not None:
|
||||
# filenames are closer to the end of command line
|
||||
for argument in reversed(new_cmd):
|
||||
# Check if the matching argument contains a source filename.
|
||||
if not str(argument).endswith(".c"):
|
||||
continue
|
||||
|
||||
for path in extra_options.keys():
|
||||
if path in str(argument):
|
||||
if compiler_type == "bcpp":
|
||||
compiler = new_cmd.pop()
|
||||
# Borland accepts a source file name at the end,
|
||||
# insert the options before it
|
||||
new_cmd.extend(extra_options[path])
|
||||
new_cmd.append(compiler)
|
||||
else:
|
||||
new_cmd.extend(extra_options[path])
|
||||
|
||||
# path component is found, no need to search any further
|
||||
break
|
||||
self.__spawn(new_cmd, **kwargs)
|
||||
|
||||
|
||||
ccompiler.CCompiler.spawn = spawn # type: ignore[method-assign]
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
#include "bytearray_extra_ops.h"
|
||||
|
||||
PyObject *CPyByteArray_New(void) {
|
||||
return PyByteArray_FromStringAndSize(NULL, 0);
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
#ifndef MYPYC_BYTEARRAY_EXTRA_OPS_H
|
||||
#define MYPYC_BYTEARRAY_EXTRA_OPS_H
|
||||
|
||||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
|
||||
// Construct empty bytearray
|
||||
PyObject *CPyByteArray_New(void);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
#include "bytes_extra_ops.h"
|
||||
|
||||
PyObject *CPyBytes_Translate(PyObject *bytes, PyObject *table) {
|
||||
// Fast path: exact bytes object with exact bytes table
|
||||
if (PyBytes_CheckExact(bytes) && PyBytes_CheckExact(table)) {
|
||||
Py_ssize_t table_len = PyBytes_GET_SIZE(table);
|
||||
if (table_len != 256) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"translation table must be 256 characters long");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_ssize_t len = PyBytes_GET_SIZE(bytes);
|
||||
const char *input = PyBytes_AS_STRING(bytes);
|
||||
const char *trans_table = PyBytes_AS_STRING(table);
|
||||
|
||||
PyObject *result = PyBytes_FromStringAndSize(NULL, len);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *output = PyBytes_AS_STRING(result);
|
||||
bool changed = false;
|
||||
|
||||
// Without a loop unrolling hint performance can be worse than CPython
|
||||
CPY_UNROLL_LOOP(4)
|
||||
for (Py_ssize_t i = len; --i >= 0;) {
|
||||
char c = *input++;
|
||||
if ((*output++ = trans_table[(unsigned char)c]) != c)
|
||||
changed = true;
|
||||
}
|
||||
|
||||
// If nothing changed, discard result and return the original object
|
||||
if (!changed) {
|
||||
Py_DECREF(result);
|
||||
Py_INCREF(bytes);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Fallback to Python method call for non-exact types or non-standard tables
|
||||
return PyObject_CallMethodOneArg(bytes, mypyc_interned_str.translate, table);
|
||||
}
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
#ifndef MYPYC_BYTES_EXTRA_OPS_H
|
||||
#define MYPYC_BYTES_EXTRA_OPS_H
|
||||
|
||||
#include <Python.h>
|
||||
#include <stdint.h>
|
||||
#include "CPy.h"
|
||||
|
||||
// Optimized bytes translate operation
|
||||
PyObject *CPyBytes_Translate(PyObject *bytes, PyObject *table);
|
||||
|
||||
// Optimized bytes.__getitem__ operations
|
||||
|
||||
// If index is negative, convert to non-negative index (no range checking)
|
||||
static inline int64_t CPyBytes_AdjustIndex(PyObject *obj, int64_t index) {
|
||||
if (index < 0) {
|
||||
return index + Py_SIZE(obj);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
// Check if index is in valid range [0, len)
|
||||
static inline bool CPyBytes_RangeCheck(PyObject *obj, int64_t index) {
|
||||
return index >= 0 && index < Py_SIZE(obj);
|
||||
}
|
||||
|
||||
// Get byte at index (no bounds checking) - returns as CPyTagged
|
||||
static inline CPyTagged CPyBytes_GetItemUnsafe(PyObject *obj, int64_t index) {
|
||||
return ((CPyTagged)(uint8_t)(PyBytes_AS_STRING(obj))[index]) << 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
218
venv/lib/python3.11/site-packages/mypyc/lib-rt/bytes_ops.c
Normal file
218
venv/lib/python3.11/site-packages/mypyc/lib-rt/bytes_ops.c
Normal file
|
|
@ -0,0 +1,218 @@
|
|||
// Bytes primitive operations
|
||||
//
|
||||
// These are registered in mypyc.primitives.bytes_ops.
|
||||
|
||||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
|
||||
// Returns -1 on error, 0 on inequality, 1 on equality.
|
||||
//
|
||||
// Falls back to PyObject_RichCompareBool.
|
||||
int CPyBytes_Compare(PyObject *left, PyObject *right) {
|
||||
if (PyBytes_CheckExact(left) && PyBytes_CheckExact(right)) {
|
||||
if (left == right) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Adapted from cpython internal implementation of bytes_compare.
|
||||
Py_ssize_t len = Py_SIZE(left);
|
||||
if (Py_SIZE(right) != len) {
|
||||
return 0;
|
||||
}
|
||||
PyBytesObject *left_b = (PyBytesObject *)left;
|
||||
PyBytesObject *right_b = (PyBytesObject *)right;
|
||||
if (left_b->ob_sval[0] != right_b->ob_sval[0]) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return memcmp(left_b->ob_sval, right_b->ob_sval, len) == 0;
|
||||
}
|
||||
return PyObject_RichCompareBool(left, right, Py_EQ);
|
||||
}
|
||||
|
||||
CPyTagged CPyBytes_GetItem(PyObject *o, CPyTagged index) {
|
||||
if (CPyTagged_CheckShort(index)) {
|
||||
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
|
||||
Py_ssize_t size = ((PyVarObject *)o)->ob_size;
|
||||
if (n < 0)
|
||||
n += size;
|
||||
if (n < 0 || n >= size) {
|
||||
PyErr_SetString(PyExc_IndexError, "index out of range");
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
unsigned char num = PyBytes_Check(o) ? ((PyBytesObject *)o)->ob_sval[n]
|
||||
: ((PyByteArrayObject *)o)->ob_bytes[n];
|
||||
return num << 1;
|
||||
} else {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPyBytes_Concat(PyObject *a, PyObject *b) {
|
||||
Py_ssize_t a_len = ((PyVarObject *)a)->ob_size;
|
||||
Py_ssize_t b_len = ((PyVarObject *)b)->ob_size;
|
||||
PyBytesObject *ret = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, a_len + b_len);
|
||||
if (ret != NULL) {
|
||||
memcpy(ret->ob_sval, ((PyBytesObject *)a)->ob_sval, a_len);
|
||||
memcpy(ret->ob_sval + a_len, ((PyBytesObject *)b)->ob_sval, b_len);
|
||||
}
|
||||
return (PyObject *)ret;
|
||||
}
|
||||
|
||||
static inline Py_ssize_t Clamp(Py_ssize_t a, Py_ssize_t b, Py_ssize_t c) {
|
||||
return a < b ? b : (a >= c ? c : a);
|
||||
}
|
||||
|
||||
PyObject *CPyBytes_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end) {
|
||||
if (CPyTagged_CheckShort(start) && CPyTagged_CheckShort(end)) {
|
||||
Py_ssize_t startn = CPyTagged_ShortAsSsize_t(start);
|
||||
Py_ssize_t endn = CPyTagged_ShortAsSsize_t(end);
|
||||
Py_ssize_t len = ((PyVarObject *)obj)->ob_size;
|
||||
if (startn < 0) {
|
||||
startn += len;
|
||||
}
|
||||
if (endn < 0) {
|
||||
endn += len;
|
||||
}
|
||||
startn = Clamp(startn, 0, len);
|
||||
endn = Clamp(endn, 0, len);
|
||||
Py_ssize_t slice_len = endn - startn;
|
||||
if (PyBytes_Check(obj)) {
|
||||
return PyBytes_FromStringAndSize(PyBytes_AS_STRING(obj) + startn, slice_len);
|
||||
} else {
|
||||
return PyByteArray_FromStringAndSize(PyByteArray_AS_STRING(obj) + startn, slice_len);
|
||||
}
|
||||
}
|
||||
return CPyObject_GetSlice(obj, start, end);
|
||||
}
|
||||
|
||||
// Like _PyBytes_Join but fallback to dynamic call if 'sep' is not bytes
|
||||
// (mostly commonly, for bytearrays)
|
||||
PyObject *CPyBytes_Join(PyObject *sep, PyObject *iter) {
|
||||
if (PyBytes_CheckExact(sep)) {
|
||||
return PyBytes_Join(sep, iter);
|
||||
} else {
|
||||
return PyObject_CallMethodOneArg(sep, mypyc_interned_str.join, iter);
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPyBytes_Build(Py_ssize_t len, ...) {
|
||||
Py_ssize_t i;
|
||||
Py_ssize_t sz = 0;
|
||||
|
||||
va_list args;
|
||||
va_start(args, len);
|
||||
for (i = 0; i < len; i++) {
|
||||
PyObject *item = va_arg(args, PyObject *);
|
||||
size_t add_sz = ((PyVarObject *)item)->ob_size;
|
||||
// Using size_t to avoid overflow during arithmetic calculation
|
||||
if (add_sz > (size_t)(PY_SSIZE_T_MAX - sz)) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"join() result is too long for a Python bytes");
|
||||
return NULL;
|
||||
}
|
||||
sz += add_sz;
|
||||
}
|
||||
va_end(args);
|
||||
|
||||
PyBytesObject *ret = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, sz);
|
||||
if (ret != NULL) {
|
||||
char *res_data = ret->ob_sval;
|
||||
va_start(args, len);
|
||||
for (i = 0; i < len; i++) {
|
||||
PyObject *item = va_arg(args, PyObject *);
|
||||
Py_ssize_t item_sz = ((PyVarObject *)item)->ob_size;
|
||||
memcpy(res_data, ((PyBytesObject *)item)->ob_sval, item_sz);
|
||||
res_data += item_sz;
|
||||
}
|
||||
va_end(args);
|
||||
assert(res_data == ret->ob_sval + ((PyVarObject *)ret)->ob_size);
|
||||
}
|
||||
|
||||
return (PyObject *)ret;
|
||||
}
|
||||
|
||||
CPyTagged CPyBytes_Ord(PyObject *obj) {
|
||||
Py_ssize_t s = PyBytes_GET_SIZE(obj);
|
||||
if (s == 1) {
|
||||
return (unsigned char)(PyBytes_AS_STRING(obj)[0]) << 1;
|
||||
}
|
||||
PyErr_SetString(PyExc_TypeError, "ord() expects a character");
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
|
||||
PyObject *CPyBytes_Multiply(PyObject *bytes, CPyTagged count) {
|
||||
Py_ssize_t temp_count = CPyTagged_AsSsize_t(count);
|
||||
if (temp_count == -1 && PyErr_Occurred()) {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return NULL;
|
||||
}
|
||||
return PySequence_Repeat(bytes, temp_count);
|
||||
}
|
||||
|
||||
int CPyBytes_Startswith(PyObject *self, PyObject *subobj) {
|
||||
if (PyBytes_CheckExact(self) && PyBytes_CheckExact(subobj)) {
|
||||
if (self == subobj) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
Py_ssize_t subobj_len = PyBytes_GET_SIZE(subobj);
|
||||
if (subobj_len == 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
Py_ssize_t self_len = PyBytes_GET_SIZE(self);
|
||||
if (subobj_len > self_len) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *self_buf = PyBytes_AS_STRING(self);
|
||||
const char *subobj_buf = PyBytes_AS_STRING(subobj);
|
||||
|
||||
return memcmp(self_buf, subobj_buf, (size_t)subobj_len) == 0 ? 1 : 0;
|
||||
}
|
||||
PyObject *result = PyObject_CallMethodOneArg(self, mypyc_interned_str.startswith, subobj);
|
||||
if (result == NULL) {
|
||||
return 2;
|
||||
}
|
||||
int ret = PyObject_IsTrue(result);
|
||||
Py_DECREF(result);
|
||||
if (ret < 0) {
|
||||
return 2;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CPyBytes_Endswith(PyObject *self, PyObject *subobj) {
|
||||
if (PyBytes_CheckExact(self) && PyBytes_CheckExact(subobj)) {
|
||||
if (self == subobj) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
Py_ssize_t subobj_len = PyBytes_GET_SIZE(subobj);
|
||||
if (subobj_len == 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
Py_ssize_t self_len = PyBytes_GET_SIZE(self);
|
||||
if (subobj_len > self_len) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *self_buf = PyBytes_AS_STRING(self);
|
||||
const char *subobj_buf = PyBytes_AS_STRING(subobj);
|
||||
|
||||
return memcmp(self_buf + (self_len - subobj_len), subobj_buf, (size_t)subobj_len) == 0 ? 1 : 0;
|
||||
}
|
||||
PyObject *result = PyObject_CallMethodOneArg(self, mypyc_interned_str.endswith, subobj);
|
||||
if (result == NULL) {
|
||||
return 2;
|
||||
}
|
||||
int ret = PyObject_IsTrue(result);
|
||||
Py_DECREF(result);
|
||||
if (ret < 0) {
|
||||
return 2;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
// Primitives related to librt.strings.BytesWriter that get linked statically
|
||||
// with compiled modules, instead of being called via a capsule.
|
||||
|
||||
#include "byteswriter_extra_ops.h"
|
||||
|
||||
#ifdef MYPYC_EXPERIMENTAL
|
||||
|
||||
char CPyBytesWriter_Write(PyObject *obj, PyObject *value) {
|
||||
BytesWriterObject *self = (BytesWriterObject *)obj;
|
||||
const char *data;
|
||||
Py_ssize_t size;
|
||||
if (likely(PyBytes_Check(value))) {
|
||||
data = PyBytes_AS_STRING(value);
|
||||
size = PyBytes_GET_SIZE(value);
|
||||
} else {
|
||||
data = PyByteArray_AS_STRING(value);
|
||||
size = PyByteArray_GET_SIZE(value);
|
||||
}
|
||||
// Write bytes content.
|
||||
if (!CPyBytesWriter_EnsureSize(self, size))
|
||||
return CPY_NONE_ERROR;
|
||||
if (size < 8) {
|
||||
// Loop tends to be faster for small sizes
|
||||
char *p = self->buf + self->len;
|
||||
for (Py_ssize_t i = 0; i < size; i++) {
|
||||
p[i] = data[i];
|
||||
}
|
||||
} else {
|
||||
memcpy(self->buf + self->len, data, size);
|
||||
}
|
||||
self->len += size;
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
void CPyBytes_ReadError(int64_t index, Py_ssize_t size) {
|
||||
if (index < 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "index must be non-negative");
|
||||
} else {
|
||||
PyErr_Format(PyExc_IndexError,
|
||||
"index %lld out of range for bytes of length %zd",
|
||||
(long long)index, size);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // MYPYC_EXPERIMENTAL
|
||||
|
|
@ -0,0 +1,291 @@
|
|||
#ifndef BYTESWRITER_EXTRA_OPS_H
|
||||
#define BYTESWRITER_EXTRA_OPS_H
|
||||
|
||||
#ifdef MYPYC_EXPERIMENTAL
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <Python.h>
|
||||
|
||||
#include "mypyc_util.h"
|
||||
#include "strings/librt_strings.h"
|
||||
#include "strings/librt_strings_common.h"
|
||||
|
||||
// BytesWriter: Length and capacity
|
||||
|
||||
static inline CPyTagged
|
||||
CPyBytesWriter_Len(PyObject *obj) {
|
||||
return (CPyTagged)((BytesWriterObject *)obj)->len << 1;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
CPyBytesWriter_EnsureSize(BytesWriterObject *data, Py_ssize_t n) {
|
||||
if (likely(data->capacity - data->len >= n)) {
|
||||
return true;
|
||||
} else {
|
||||
return LibRTStrings_ByteWriter_grow_buffer_internal(data, n);
|
||||
}
|
||||
}
|
||||
|
||||
// BytesWriter: Basic write operations
|
||||
|
||||
static inline char
|
||||
CPyBytesWriter_Append(PyObject *obj, uint8_t value) {
|
||||
BytesWriterObject *self = (BytesWriterObject *)obj;
|
||||
// Store length in a local variable to enable additional optimizations
|
||||
Py_ssize_t len = self->len;
|
||||
if (!CPyBytesWriter_EnsureSize(self, 1))
|
||||
return CPY_NONE_ERROR;
|
||||
self->buf[len] = value;
|
||||
self->len = len + 1;
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
char CPyBytesWriter_Write(PyObject *obj, PyObject *value);
|
||||
|
||||
// BytesWriter: Indexing operations
|
||||
|
||||
// If index is negative, convert to non-negative index (no range checking)
|
||||
static inline int64_t CPyBytesWriter_AdjustIndex(PyObject *obj, int64_t index) {
|
||||
if (index < 0) {
|
||||
return index + ((BytesWriterObject *)obj)->len;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
static inline bool CPyBytesWriter_RangeCheck(PyObject *obj, int64_t index) {
|
||||
return index >= 0 && index < ((BytesWriterObject *)obj)->len;
|
||||
}
|
||||
|
||||
static inline uint8_t CPyBytesWriter_GetItem(PyObject *obj, int64_t index) {
|
||||
return (((BytesWriterObject *)obj)->buf)[index];
|
||||
}
|
||||
|
||||
static inline void CPyBytesWriter_SetItem(PyObject *obj, int64_t index, uint8_t x) {
|
||||
(((BytesWriterObject *)obj)->buf)[index] = x;
|
||||
}
|
||||
|
||||
// BytesWriter: Write integer operations (little-endian)
|
||||
|
||||
static inline char
|
||||
CPyBytesWriter_WriteI16LE(PyObject *obj, int16_t value) {
|
||||
BytesWriterObject *self = (BytesWriterObject *)obj;
|
||||
if (!CPyBytesWriter_EnsureSize(self, 2))
|
||||
return CPY_NONE_ERROR;
|
||||
BytesWriter_WriteI16LEUnsafe(self, value);
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
static inline char
|
||||
CPyBytesWriter_WriteI16BE(PyObject *obj, int16_t value) {
|
||||
BytesWriterObject *self = (BytesWriterObject *)obj;
|
||||
if (!CPyBytesWriter_EnsureSize(self, 2))
|
||||
return CPY_NONE_ERROR;
|
||||
BytesWriter_WriteI16BEUnsafe(self, value);
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
static inline char
|
||||
CPyBytesWriter_WriteI32LE(PyObject *obj, int32_t value) {
|
||||
BytesWriterObject *self = (BytesWriterObject *)obj;
|
||||
if (!CPyBytesWriter_EnsureSize(self, 4))
|
||||
return CPY_NONE_ERROR;
|
||||
BytesWriter_WriteI32LEUnsafe(self, value);
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
static inline char
|
||||
CPyBytesWriter_WriteI32BE(PyObject *obj, int32_t value) {
|
||||
BytesWriterObject *self = (BytesWriterObject *)obj;
|
||||
if (!CPyBytesWriter_EnsureSize(self, 4))
|
||||
return CPY_NONE_ERROR;
|
||||
BytesWriter_WriteI32BEUnsafe(self, value);
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
static inline char
|
||||
CPyBytesWriter_WriteI64LE(PyObject *obj, int64_t value) {
|
||||
BytesWriterObject *self = (BytesWriterObject *)obj;
|
||||
if (!CPyBytesWriter_EnsureSize(self, 8))
|
||||
return CPY_NONE_ERROR;
|
||||
BytesWriter_WriteI64LEUnsafe(self, value);
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
static inline char
|
||||
CPyBytesWriter_WriteI64BE(PyObject *obj, int64_t value) {
|
||||
BytesWriterObject *self = (BytesWriterObject *)obj;
|
||||
if (!CPyBytesWriter_EnsureSize(self, 8))
|
||||
return CPY_NONE_ERROR;
|
||||
BytesWriter_WriteI64BEUnsafe(self, value);
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
// BytesWriter: Write float operations
|
||||
|
||||
static inline char
|
||||
CPyBytesWriter_WriteF32LE(PyObject *obj, double value) {
|
||||
BytesWriterObject *self = (BytesWriterObject *)obj;
|
||||
if (!CPyBytesWriter_EnsureSize(self, 4))
|
||||
return CPY_NONE_ERROR;
|
||||
BytesWriter_WriteF32LEUnsafe(self, (float)value);
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
static inline char
|
||||
CPyBytesWriter_WriteF32BE(PyObject *obj, double value) {
|
||||
BytesWriterObject *self = (BytesWriterObject *)obj;
|
||||
if (!CPyBytesWriter_EnsureSize(self, 4))
|
||||
return CPY_NONE_ERROR;
|
||||
BytesWriter_WriteF32BEUnsafe(self, (float)value);
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
static inline char
|
||||
CPyBytesWriter_WriteF64LE(PyObject *obj, double value) {
|
||||
BytesWriterObject *self = (BytesWriterObject *)obj;
|
||||
if (!CPyBytesWriter_EnsureSize(self, 8))
|
||||
return CPY_NONE_ERROR;
|
||||
BytesWriter_WriteF64LEUnsafe(self, value);
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
static inline char
|
||||
CPyBytesWriter_WriteF64BE(PyObject *obj, double value) {
|
||||
BytesWriterObject *self = (BytesWriterObject *)obj;
|
||||
if (!CPyBytesWriter_EnsureSize(self, 8))
|
||||
return CPY_NONE_ERROR;
|
||||
BytesWriter_WriteF64BEUnsafe(self, value);
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
// Bytes: Read integer operations
|
||||
|
||||
// Helper function for bytes read error handling (negative index or out of range)
|
||||
void CPyBytes_ReadError(int64_t index, Py_ssize_t size);
|
||||
|
||||
static inline int16_t
|
||||
CPyBytes_ReadI16LE(PyObject *bytes_obj, int64_t index) {
|
||||
// bytes_obj type is enforced by mypyc
|
||||
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
|
||||
if (unlikely(index < 0 || index > size - 2)) {
|
||||
CPyBytes_ReadError(index, size);
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
|
||||
return CPyBytes_ReadI16LEUnsafe(data + index);
|
||||
}
|
||||
|
||||
static inline int16_t
|
||||
CPyBytes_ReadI16BE(PyObject *bytes_obj, int64_t index) {
|
||||
// bytes_obj type is enforced by mypyc
|
||||
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
|
||||
if (unlikely(index < 0 || index > size - 2)) {
|
||||
CPyBytes_ReadError(index, size);
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
|
||||
return CPyBytes_ReadI16BEUnsafe(data + index);
|
||||
}
|
||||
|
||||
static inline int32_t
|
||||
CPyBytes_ReadI32BE(PyObject *bytes_obj, int64_t index) {
|
||||
// bytes_obj type is enforced by mypyc
|
||||
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
|
||||
if (unlikely(index < 0 || index > size - 4)) {
|
||||
CPyBytes_ReadError(index, size);
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
|
||||
return CPyBytes_ReadI32BEUnsafe(data + index);
|
||||
}
|
||||
|
||||
static inline int32_t
|
||||
CPyBytes_ReadI32LE(PyObject *bytes_obj, int64_t index) {
|
||||
// bytes_obj type is enforced by mypyc
|
||||
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
|
||||
if (unlikely(index < 0 || index > size - 4)) {
|
||||
CPyBytes_ReadError(index, size);
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
|
||||
return CPyBytes_ReadI32LEUnsafe(data + index);
|
||||
}
|
||||
|
||||
static inline int64_t
|
||||
CPyBytes_ReadI64LE(PyObject *bytes_obj, int64_t index) {
|
||||
// bytes_obj type is enforced by mypyc
|
||||
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
|
||||
if (unlikely(index < 0 || index > size - 8)) {
|
||||
CPyBytes_ReadError(index, size);
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
|
||||
return CPyBytes_ReadI64LEUnsafe(data + index);
|
||||
}
|
||||
|
||||
static inline int64_t
|
||||
CPyBytes_ReadI64BE(PyObject *bytes_obj, int64_t index) {
|
||||
// bytes_obj type is enforced by mypyc
|
||||
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
|
||||
if (unlikely(index < 0 || index > size - 8)) {
|
||||
CPyBytes_ReadError(index, size);
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
|
||||
return CPyBytes_ReadI64BEUnsafe(data + index);
|
||||
}
|
||||
|
||||
// Bytes: Read float operations
|
||||
|
||||
static inline double
|
||||
CPyBytes_ReadF32LE(PyObject *bytes_obj, int64_t index) {
|
||||
// bytes_obj type is enforced by mypyc
|
||||
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
|
||||
if (unlikely(index < 0 || index > size - 4)) {
|
||||
CPyBytes_ReadError(index, size);
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
|
||||
return (double)CPyBytes_ReadF32LEUnsafe(data + index);
|
||||
}
|
||||
|
||||
static inline double
|
||||
CPyBytes_ReadF32BE(PyObject *bytes_obj, int64_t index) {
|
||||
// bytes_obj type is enforced by mypyc
|
||||
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
|
||||
if (unlikely(index < 0 || index > size - 4)) {
|
||||
CPyBytes_ReadError(index, size);
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
|
||||
return (double)CPyBytes_ReadF32BEUnsafe(data + index);
|
||||
}
|
||||
|
||||
static inline double
|
||||
CPyBytes_ReadF64LE(PyObject *bytes_obj, int64_t index) {
|
||||
// bytes_obj type is enforced by mypyc
|
||||
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
|
||||
if (unlikely(index < 0 || index > size - 8)) {
|
||||
CPyBytes_ReadError(index, size);
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
|
||||
return CPyBytes_ReadF64LEUnsafe(data + index);
|
||||
}
|
||||
|
||||
static inline double
|
||||
CPyBytes_ReadF64BE(PyObject *bytes_obj, int64_t index) {
|
||||
// bytes_obj type is enforced by mypyc
|
||||
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
|
||||
if (unlikely(index < 0 || index > size - 8)) {
|
||||
CPyBytes_ReadError(index, size);
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
|
||||
return CPyBytes_ReadF64BEUnsafe(data + index);
|
||||
}
|
||||
|
||||
#endif // MYPYC_EXPERIMENTAL
|
||||
|
||||
#endif
|
||||
429
venv/lib/python3.11/site-packages/mypyc/lib-rt/dict_ops.c
Normal file
429
venv/lib/python3.11/site-packages/mypyc/lib-rt/dict_ops.c
Normal file
|
|
@ -0,0 +1,429 @@
|
|||
// Dict primitive operations
|
||||
//
|
||||
// These are registered in mypyc.primitives.dict_ops.
|
||||
|
||||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
|
||||
#ifndef Py_TPFLAGS_MAPPING
|
||||
#define Py_TPFLAGS_MAPPING (1 << 6)
|
||||
#endif
|
||||
|
||||
// Dict subclasses like defaultdict override things in interesting
|
||||
// ways, so we don't want to just directly use the dict methods. Not
|
||||
// sure if it is actually worth doing all this stuff, but it saves
|
||||
// some indirections.
|
||||
PyObject *CPyDict_GetItem(PyObject *dict, PyObject *key) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
PyObject *res = PyDict_GetItemWithError(dict, key);
|
||||
if (!res) {
|
||||
if (!PyErr_Occurred()) {
|
||||
PyErr_SetObject(PyExc_KeyError, key);
|
||||
}
|
||||
} else {
|
||||
Py_INCREF(res);
|
||||
}
|
||||
return res;
|
||||
} else {
|
||||
return PyObject_GetItem(dict, key);
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPyDict_Build(Py_ssize_t size, ...) {
|
||||
Py_ssize_t i;
|
||||
|
||||
PyObject *res = _PyDict_NewPresized(size);
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
va_list args;
|
||||
va_start(args, size);
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
PyObject *key = va_arg(args, PyObject *);
|
||||
PyObject *value = va_arg(args, PyObject *);
|
||||
if (PyDict_SetItem(res, key, value)) {
|
||||
Py_DECREF(res);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
va_end(args);
|
||||
return res;
|
||||
}
|
||||
|
||||
PyObject *CPyDict_Get(PyObject *dict, PyObject *key, PyObject *fallback) {
|
||||
// We are dodgily assuming that get on a subclass doesn't have
|
||||
// different behavior.
|
||||
PyObject *res = PyDict_GetItemWithError(dict, key);
|
||||
if (!res) {
|
||||
if (PyErr_Occurred()) {
|
||||
return NULL;
|
||||
}
|
||||
res = fallback;
|
||||
}
|
||||
Py_INCREF(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
PyObject *CPyDict_GetWithNone(PyObject *dict, PyObject *key) {
|
||||
return CPyDict_Get(dict, key, Py_None);
|
||||
}
|
||||
|
||||
PyObject *CPyDict_SetDefault(PyObject *dict, PyObject *key, PyObject *value) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
PyObject* ret = PyDict_SetDefault(dict, key, value);
|
||||
Py_XINCREF(ret);
|
||||
return ret;
|
||||
}
|
||||
return PyObject_CallMethodObjArgs(dict, mypyc_interned_str.setdefault, key, value, NULL);
|
||||
}
|
||||
|
||||
PyObject *CPyDict_SetDefaultWithNone(PyObject *dict, PyObject *key) {
|
||||
return CPyDict_SetDefault(dict, key, Py_None);
|
||||
}
|
||||
|
||||
PyObject *CPyDict_SetDefaultWithEmptyDatatype(PyObject *dict, PyObject *key,
|
||||
int data_type) {
|
||||
PyObject *res = CPyDict_GetItem(dict, key);
|
||||
if (!res) {
|
||||
// CPyDict_GetItem() would generates a PyExc_KeyError
|
||||
// when key is not found.
|
||||
PyErr_Clear();
|
||||
|
||||
PyObject *new_obj;
|
||||
if (data_type == 1) {
|
||||
new_obj = PyList_New(0);
|
||||
} else if (data_type == 2) {
|
||||
new_obj = PyDict_New();
|
||||
} else if (data_type == 3) {
|
||||
new_obj = PySet_New(NULL);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (CPyDict_SetItem(dict, key, new_obj) == -1) {
|
||||
return NULL;
|
||||
} else {
|
||||
return new_obj;
|
||||
}
|
||||
} else {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
int CPyDict_SetItem(PyObject *dict, PyObject *key, PyObject *value) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
return PyDict_SetItem(dict, key, value);
|
||||
} else {
|
||||
return PyObject_SetItem(dict, key, value);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int CPy_ObjectToStatus(PyObject *obj) {
|
||||
if (obj) {
|
||||
Py_DECREF(obj);
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static int CPyDict_UpdateGeneral(PyObject *dict, PyObject *stuff) {
|
||||
PyObject *res = PyObject_CallMethodOneArg(dict, mypyc_interned_str.update, stuff);
|
||||
return CPy_ObjectToStatus(res);
|
||||
}
|
||||
|
||||
int CPyDict_UpdateInDisplay(PyObject *dict, PyObject *stuff) {
|
||||
// from https://github.com/python/cpython/blob/55d035113dfb1bd90495c8571758f504ae8d4802/Python/ceval.c#L2710
|
||||
int ret = PyDict_Update(dict, stuff);
|
||||
if (ret < 0) {
|
||||
if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"'%.200s' object is not a mapping",
|
||||
Py_TYPE(stuff)->tp_name);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CPyDict_Update(PyObject *dict, PyObject *stuff) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
return PyDict_Update(dict, stuff);
|
||||
} else {
|
||||
return CPyDict_UpdateGeneral(dict, stuff);
|
||||
}
|
||||
}
|
||||
|
||||
int CPyDict_UpdateFromAny(PyObject *dict, PyObject *stuff) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
// Argh this sucks
|
||||
if (PyDict_Check(stuff) || PyObject_HasAttrWithError(stuff, mypyc_interned_str.keys) > 0) {
|
||||
return PyDict_Update(dict, stuff);
|
||||
} else {
|
||||
return PyDict_MergeFromSeq2(dict, stuff, 1);
|
||||
}
|
||||
} else {
|
||||
return CPyDict_UpdateGeneral(dict, stuff);
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPyDict_FromAny(PyObject *obj) {
|
||||
if (PyDict_Check(obj)) {
|
||||
return PyDict_Copy(obj);
|
||||
} else {
|
||||
int res;
|
||||
PyObject *dict = PyDict_New();
|
||||
if (!dict) {
|
||||
return NULL;
|
||||
}
|
||||
if (PyObject_HasAttrWithError(obj, mypyc_interned_str.keys) > 0) {
|
||||
res = PyDict_Update(dict, obj);
|
||||
} else {
|
||||
res = PyDict_MergeFromSeq2(dict, obj, 1);
|
||||
}
|
||||
if (res < 0) {
|
||||
Py_DECREF(dict);
|
||||
return NULL;
|
||||
}
|
||||
return dict;
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPyDict_KeysView(PyObject *dict) {
|
||||
if (PyDict_CheckExact(dict)){
|
||||
return _CPyDictView_New(dict, &PyDictKeys_Type);
|
||||
}
|
||||
return PyObject_CallMethodNoArgs(dict, mypyc_interned_str.keys);
|
||||
}
|
||||
|
||||
PyObject *CPyDict_ValuesView(PyObject *dict) {
|
||||
if (PyDict_CheckExact(dict)){
|
||||
return _CPyDictView_New(dict, &PyDictValues_Type);
|
||||
}
|
||||
return PyObject_CallMethodNoArgs(dict, mypyc_interned_str.values);
|
||||
}
|
||||
|
||||
PyObject *CPyDict_ItemsView(PyObject *dict) {
|
||||
if (PyDict_CheckExact(dict)){
|
||||
return _CPyDictView_New(dict, &PyDictItems_Type);
|
||||
}
|
||||
return PyObject_CallMethodNoArgs(dict, mypyc_interned_str.items);
|
||||
}
|
||||
|
||||
PyObject *CPyDict_Keys(PyObject *dict) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
return PyDict_Keys(dict);
|
||||
}
|
||||
// Inline generic fallback logic to also return a list.
|
||||
PyObject *list = PyList_New(0);
|
||||
PyObject *view = PyObject_CallMethodNoArgs(dict, mypyc_interned_str.keys);
|
||||
if (view == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
int res = PyList_Extend(list, view);
|
||||
Py_DECREF(view);
|
||||
if (res < 0) {
|
||||
return NULL;
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
PyObject *CPyDict_Values(PyObject *dict) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
return PyDict_Values(dict);
|
||||
}
|
||||
// Inline generic fallback logic to also return a list.
|
||||
PyObject *list = PyList_New(0);
|
||||
PyObject *view = PyObject_CallMethodNoArgs(dict, mypyc_interned_str.values);
|
||||
if (view == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
int res = PyList_Extend(list, view);
|
||||
Py_DECREF(view);
|
||||
if (res < 0) {
|
||||
return NULL;
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
PyObject *CPyDict_Items(PyObject *dict) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
return PyDict_Items(dict);
|
||||
}
|
||||
// Inline generic fallback logic to also return a list.
|
||||
PyObject *list = PyList_New(0);
|
||||
PyObject *view = PyObject_CallMethodNoArgs(dict, mypyc_interned_str.items);
|
||||
if (view == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
int res = PyList_Extend(list, view);
|
||||
Py_DECREF(view);
|
||||
if (res < 0) {
|
||||
return NULL;
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
char CPyDict_Clear(PyObject *dict) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
PyDict_Clear(dict);
|
||||
} else {
|
||||
PyObject *res = PyObject_CallMethodNoArgs(dict, mypyc_interned_str.clear);
|
||||
if (res == NULL) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
PyObject *CPyDict_Copy(PyObject *dict) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
return PyDict_Copy(dict);
|
||||
}
|
||||
return PyObject_CallMethodNoArgs(dict, mypyc_interned_str.copy);
|
||||
}
|
||||
|
||||
PyObject *CPyDict_GetKeysIter(PyObject *dict) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
// Return dict itself to indicate we can use fast path instead.
|
||||
Py_INCREF(dict);
|
||||
return dict;
|
||||
}
|
||||
return PyObject_GetIter(dict);
|
||||
}
|
||||
|
||||
PyObject *CPyDict_GetItemsIter(PyObject *dict) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
// Return dict itself to indicate we can use fast path instead.
|
||||
Py_INCREF(dict);
|
||||
return dict;
|
||||
}
|
||||
PyObject *view = PyObject_CallMethodNoArgs(dict, mypyc_interned_str.items);
|
||||
if (view == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
PyObject *iter = PyObject_GetIter(view);
|
||||
Py_DECREF(view);
|
||||
return iter;
|
||||
}
|
||||
|
||||
PyObject *CPyDict_GetValuesIter(PyObject *dict) {
|
||||
if (PyDict_CheckExact(dict)) {
|
||||
// Return dict itself to indicate we can use fast path instead.
|
||||
Py_INCREF(dict);
|
||||
return dict;
|
||||
}
|
||||
PyObject *view = PyObject_CallMethodNoArgs(dict, mypyc_interned_str.values);
|
||||
if (view == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
PyObject *iter = PyObject_GetIter(view);
|
||||
Py_DECREF(view);
|
||||
return iter;
|
||||
}
|
||||
|
||||
static void _CPyDict_FromNext(tuple_T3CIO *ret, PyObject *dict_iter) {
|
||||
// Get next item from iterator and set "should continue" flag.
|
||||
ret->f2 = PyIter_Next(dict_iter);
|
||||
if (ret->f2 == NULL) {
|
||||
ret->f0 = 0;
|
||||
Py_INCREF(Py_None);
|
||||
ret->f2 = Py_None;
|
||||
} else {
|
||||
ret->f0 = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Helpers for fast dictionary iteration, return a single tuple
|
||||
// instead of writing to multiple registers, for exact dicts use
|
||||
// the fast path, and fall back to generic iterator logic for subclasses.
|
||||
tuple_T3CIO CPyDict_NextKey(PyObject *dict_or_iter, CPyTagged offset) {
|
||||
tuple_T3CIO ret;
|
||||
Py_ssize_t py_offset = CPyTagged_AsSsize_t(offset);
|
||||
PyObject *dummy;
|
||||
|
||||
if (PyDict_CheckExact(dict_or_iter)) {
|
||||
ret.f0 = PyDict_Next(dict_or_iter, &py_offset, &ret.f2, &dummy);
|
||||
if (ret.f0) {
|
||||
ret.f1 = CPyTagged_FromSsize_t(py_offset);
|
||||
} else {
|
||||
// Set key to None, so mypyc can manage refcounts.
|
||||
ret.f1 = 0;
|
||||
ret.f2 = Py_None;
|
||||
}
|
||||
// PyDict_Next() returns borrowed references.
|
||||
Py_INCREF(ret.f2);
|
||||
} else {
|
||||
// offset is dummy in this case, just use the old value.
|
||||
ret.f1 = offset;
|
||||
_CPyDict_FromNext(&ret, dict_or_iter);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
tuple_T3CIO CPyDict_NextValue(PyObject *dict_or_iter, CPyTagged offset) {
|
||||
tuple_T3CIO ret;
|
||||
Py_ssize_t py_offset = CPyTagged_AsSsize_t(offset);
|
||||
PyObject *dummy;
|
||||
|
||||
if (PyDict_CheckExact(dict_or_iter)) {
|
||||
ret.f0 = PyDict_Next(dict_or_iter, &py_offset, &dummy, &ret.f2);
|
||||
if (ret.f0) {
|
||||
ret.f1 = CPyTagged_FromSsize_t(py_offset);
|
||||
} else {
|
||||
// Set value to None, so mypyc can manage refcounts.
|
||||
ret.f1 = 0;
|
||||
ret.f2 = Py_None;
|
||||
}
|
||||
// PyDict_Next() returns borrowed references.
|
||||
Py_INCREF(ret.f2);
|
||||
} else {
|
||||
// offset is dummy in this case, just use the old value.
|
||||
ret.f1 = offset;
|
||||
_CPyDict_FromNext(&ret, dict_or_iter);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
tuple_T4CIOO CPyDict_NextItem(PyObject *dict_or_iter, CPyTagged offset) {
|
||||
tuple_T4CIOO ret;
|
||||
Py_ssize_t py_offset = CPyTagged_AsSsize_t(offset);
|
||||
|
||||
if (PyDict_CheckExact(dict_or_iter)) {
|
||||
ret.f0 = PyDict_Next(dict_or_iter, &py_offset, &ret.f2, &ret.f3);
|
||||
if (ret.f0) {
|
||||
ret.f1 = CPyTagged_FromSsize_t(py_offset);
|
||||
} else {
|
||||
// Set key and value to None, so mypyc can manage refcounts.
|
||||
ret.f1 = 0;
|
||||
ret.f2 = Py_None;
|
||||
ret.f3 = Py_None;
|
||||
}
|
||||
} else {
|
||||
ret.f1 = offset;
|
||||
PyObject *item = PyIter_Next(dict_or_iter);
|
||||
if (item == NULL || !PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
|
||||
if (item != NULL) {
|
||||
PyErr_SetString(PyExc_TypeError, "a tuple of length 2 expected");
|
||||
}
|
||||
ret.f0 = 0;
|
||||
ret.f2 = Py_None;
|
||||
ret.f3 = Py_None;
|
||||
} else {
|
||||
ret.f0 = 1;
|
||||
ret.f2 = PyTuple_GET_ITEM(item, 0);
|
||||
ret.f3 = PyTuple_GET_ITEM(item, 1);
|
||||
Py_DECREF(item);
|
||||
}
|
||||
}
|
||||
// PyDict_Next() returns borrowed references.
|
||||
Py_INCREF(ret.f2);
|
||||
Py_INCREF(ret.f3);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int CPyMapping_Check(PyObject *obj) {
|
||||
return Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MAPPING;
|
||||
}
|
||||
261
venv/lib/python3.11/site-packages/mypyc/lib-rt/exc_ops.c
Normal file
261
venv/lib/python3.11/site-packages/mypyc/lib-rt/exc_ops.c
Normal file
|
|
@ -0,0 +1,261 @@
|
|||
#include "pythoncapi_compat.h"
|
||||
|
||||
// Exception related primitive operations
|
||||
//
|
||||
// These are registered in mypyc.primitives.exc_ops.
|
||||
|
||||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
|
||||
void CPy_Raise(PyObject *exc) {
|
||||
if (PyObject_IsInstance(exc, (PyObject *)&PyType_Type)) {
|
||||
PyObject *obj = PyObject_CallNoArgs(exc);
|
||||
if (!obj)
|
||||
return;
|
||||
PyErr_SetObject(exc, obj);
|
||||
Py_DECREF(obj);
|
||||
} else {
|
||||
PyErr_SetObject((PyObject *)Py_TYPE(exc), exc);
|
||||
}
|
||||
}
|
||||
|
||||
void CPy_Reraise(void) {
|
||||
PyObject *p_type, *p_value, *p_traceback;
|
||||
PyErr_GetExcInfo(&p_type, &p_value, &p_traceback);
|
||||
PyErr_Restore(p_type, p_value, p_traceback);
|
||||
}
|
||||
|
||||
void CPyErr_SetObjectAndTraceback(PyObject *type, PyObject *value, PyObject *traceback) {
|
||||
if (!PyType_Check(type) && Py_IsNone(value)) {
|
||||
// The first argument must be an exception instance
|
||||
value = type;
|
||||
type = (PyObject *)Py_TYPE(value);
|
||||
}
|
||||
|
||||
// Set the value and traceback of an error. Because calling
|
||||
// PyErr_Restore takes away a reference to each object passed in
|
||||
// as an argument, we manually increase the reference count of
|
||||
// each argument before calling it.
|
||||
Py_INCREF(type);
|
||||
Py_INCREF(value);
|
||||
Py_INCREF(traceback);
|
||||
PyErr_Restore(type, value, traceback);
|
||||
}
|
||||
|
||||
tuple_T3OOO CPy_CatchError(void) {
|
||||
// We need to return the existing sys.exc_info() information, so
|
||||
// that it can be restored when we finish handling the error we
|
||||
// are catching now. Grab that triple and convert NULL values to
|
||||
// the ExcDummy object in order to simplify refcount handling in
|
||||
// generated code.
|
||||
tuple_T3OOO ret;
|
||||
PyErr_GetExcInfo(&ret.f0, &ret.f1, &ret.f2);
|
||||
_CPy_ToDummy(&ret.f0);
|
||||
_CPy_ToDummy(&ret.f1);
|
||||
_CPy_ToDummy(&ret.f2);
|
||||
|
||||
if (!PyErr_Occurred()) {
|
||||
PyErr_SetString(PyExc_RuntimeError, "CPy_CatchError called with no error!");
|
||||
}
|
||||
|
||||
// Retrieve the error info and normalize it so that it looks like
|
||||
// what python code needs it to be.
|
||||
PyObject *type, *value, *traceback;
|
||||
PyErr_Fetch(&type, &value, &traceback);
|
||||
// Could we avoid always normalizing?
|
||||
PyErr_NormalizeException(&type, &value, &traceback);
|
||||
if (traceback != NULL) {
|
||||
PyException_SetTraceback(value, traceback);
|
||||
}
|
||||
// Indicate that we are now handling this exception by stashing it
|
||||
// in sys.exc_info(). mypyc routines that need access to the
|
||||
// exception will read it out of there.
|
||||
PyErr_SetExcInfo(type, value, traceback);
|
||||
// Clear the error indicator, since the exception isn't
|
||||
// propagating anymore.
|
||||
PyErr_Clear();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void CPy_RestoreExcInfo(tuple_T3OOO info) {
|
||||
PyErr_SetExcInfo(_CPy_FromDummy(info.f0), _CPy_FromDummy(info.f1), _CPy_FromDummy(info.f2));
|
||||
}
|
||||
|
||||
bool CPy_ExceptionMatches(PyObject *type) {
|
||||
return PyErr_GivenExceptionMatches((PyObject *)Py_TYPE(CPy_ExcState()->exc_value), type);
|
||||
}
|
||||
|
||||
PyObject *CPy_GetExcValue(void) {
|
||||
PyObject *exc = CPy_ExcState()->exc_value;
|
||||
Py_INCREF(exc);
|
||||
return exc;
|
||||
}
|
||||
|
||||
static inline void _CPy_ToNone(PyObject **p) {
|
||||
if (*p == NULL) {
|
||||
Py_INCREF(Py_None);
|
||||
*p = Py_None;
|
||||
}
|
||||
}
|
||||
|
||||
void _CPy_GetExcInfo(PyObject **p_type, PyObject **p_value, PyObject **p_traceback) {
|
||||
PyErr_GetExcInfo(p_type, p_value, p_traceback);
|
||||
_CPy_ToNone(p_type);
|
||||
_CPy_ToNone(p_value);
|
||||
_CPy_ToNone(p_traceback);
|
||||
}
|
||||
|
||||
tuple_T3OOO CPy_GetExcInfo(void) {
|
||||
tuple_T3OOO ret;
|
||||
_CPy_GetExcInfo(&ret.f0, &ret.f1, &ret.f2);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void CPyError_OutOfMemory(void) {
|
||||
fprintf(stderr, "fatal: out of memory\n");
|
||||
fflush(stderr);
|
||||
abort();
|
||||
}
|
||||
|
||||
// Construct a nicely formatted type name based on __module__ and __name__.
|
||||
static PyObject *CPy_GetTypeName(PyObject *type) {
|
||||
PyObject *module = NULL, *name = NULL;
|
||||
PyObject *full = NULL;
|
||||
|
||||
module = PyObject_GetAttr(type, mypyc_interned_str.__module__);
|
||||
if (!module || !PyUnicode_Check(module)) {
|
||||
goto out;
|
||||
}
|
||||
name = PyObject_GetAttr(type, mypyc_interned_str.__qualname__);
|
||||
if (!name || !PyUnicode_Check(name)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (PyUnicode_CompareWithASCIIString(module, "builtins") == 0) {
|
||||
Py_INCREF(name);
|
||||
full = name;
|
||||
} else {
|
||||
full = PyUnicode_FromFormat("%U.%U", module, name);
|
||||
}
|
||||
|
||||
out:
|
||||
Py_XDECREF(module);
|
||||
Py_XDECREF(name);
|
||||
return full;
|
||||
}
|
||||
|
||||
// Get the type of a value as a string, expanding tuples to include
|
||||
// all the element types.
|
||||
static PyObject *CPy_FormatTypeName(PyObject *value) {
|
||||
if (Py_IsNone(value)) {
|
||||
return PyUnicode_FromString("None");
|
||||
}
|
||||
|
||||
if (!PyTuple_CheckExact(value)) {
|
||||
return CPy_GetTypeName((PyObject *)Py_TYPE(value));
|
||||
}
|
||||
|
||||
if (PyTuple_GET_SIZE(value) > 10) {
|
||||
return PyUnicode_FromFormat("tuple[<%d items>]", PyTuple_GET_SIZE(value));
|
||||
}
|
||||
|
||||
// Most of the logic is all for tuples, which is the only interesting case
|
||||
PyObject *output = PyUnicode_FromString("tuple[");
|
||||
if (!output) {
|
||||
return NULL;
|
||||
}
|
||||
/* This is quadratic but if that ever matters something is really weird. */
|
||||
int i;
|
||||
for (i = 0; i < PyTuple_GET_SIZE(value); i++) {
|
||||
PyObject *s = CPy_FormatTypeName(PyTuple_GET_ITEM(value, i));
|
||||
if (!s) {
|
||||
Py_DECREF(output);
|
||||
return NULL;
|
||||
}
|
||||
PyObject *next = PyUnicode_FromFormat("%U%U%s", output, s,
|
||||
i + 1 == PyTuple_GET_SIZE(value) ? "]" : ", ");
|
||||
Py_DECREF(output);
|
||||
Py_DECREF(s);
|
||||
if (!next) {
|
||||
return NULL;
|
||||
}
|
||||
output = next;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
CPy_NOINLINE
|
||||
void CPy_TypeError(const char *expected, PyObject *value) {
|
||||
PyObject *out = CPy_FormatTypeName(value);
|
||||
if (out) {
|
||||
PyErr_Format(PyExc_TypeError, "%s object expected; got %U", expected, out);
|
||||
Py_DECREF(out);
|
||||
} else {
|
||||
PyErr_Format(PyExc_TypeError, "%s object expected; and errored formatting real type!",
|
||||
expected);
|
||||
}
|
||||
}
|
||||
|
||||
// The PyFrameObject type definition (struct _frame) has been moved
|
||||
// to the internal C API: to the pycore_frame.h header file.
|
||||
// https://github.com/python/cpython/pull/31530
|
||||
#if PY_VERSION_HEX >= 0x030b00a6
|
||||
#include "internal/pycore_frame.h"
|
||||
#endif
|
||||
|
||||
// This function is basically exactly the same with _PyTraceback_Add
|
||||
// which is available in all the versions we support.
|
||||
// We're continuing to use this because we'll probably optimize this later.
|
||||
void CPy_AddTraceback(const char *filename, const char *funcname, int line, PyObject *globals) {
|
||||
PyObject *exc, *val, *tb;
|
||||
PyThreadState *thread_state = PyThreadState_GET();
|
||||
PyFrameObject *frame_obj;
|
||||
|
||||
// We need to save off the exception state because in 3.8,
|
||||
// PyFrame_New fails if there is an error set and it fails to look
|
||||
// up builtins in the globals. (_PyTraceback_Add documents that it
|
||||
// needs to do it because it decodes the filename according to the
|
||||
// FS encoding, which could have a decoder in Python. We don't do
|
||||
// that so *that* doesn't apply to us.)
|
||||
PyErr_Fetch(&exc, &val, &tb);
|
||||
PyCodeObject *code_obj = PyCode_NewEmpty(filename, funcname, line);
|
||||
if (code_obj == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
frame_obj = PyFrame_New(thread_state, code_obj, globals, 0);
|
||||
if (frame_obj == NULL) {
|
||||
Py_DECREF(code_obj);
|
||||
goto error;
|
||||
}
|
||||
frame_obj->f_lineno = line;
|
||||
PyErr_Restore(exc, val, tb);
|
||||
PyTraceBack_Here(frame_obj);
|
||||
Py_DECREF(code_obj);
|
||||
Py_DECREF(frame_obj);
|
||||
|
||||
return;
|
||||
|
||||
error:
|
||||
#if CPY_3_12_FEATURES
|
||||
_PyErr_ChainExceptions1(exc);
|
||||
#else
|
||||
_PyErr_ChainExceptions(exc, val, tb);
|
||||
#endif
|
||||
}
|
||||
|
||||
CPy_NOINLINE
|
||||
void CPy_TypeErrorTraceback(const char *filename, const char *funcname, int line,
|
||||
PyObject *globals, const char *expected, PyObject *value) {
|
||||
CPy_TypeError(expected, value);
|
||||
CPy_AddTraceback(filename, funcname, line, globals);
|
||||
}
|
||||
|
||||
void CPy_AttributeError(const char *filename, const char *funcname, const char *classname,
|
||||
const char *attrname, int line, PyObject *globals) {
|
||||
char buf[500];
|
||||
snprintf(buf, sizeof(buf), "attribute '%.200s' of '%.200s' undefined", attrname, classname);
|
||||
PyErr_SetString(PyExc_AttributeError, buf);
|
||||
CPy_AddTraceback(filename, funcname, line, globals);
|
||||
}
|
||||
239
venv/lib/python3.11/site-packages/mypyc/lib-rt/float_ops.c
Normal file
239
venv/lib/python3.11/site-packages/mypyc/lib-rt/float_ops.c
Normal file
|
|
@ -0,0 +1,239 @@
|
|||
// Float primitive operations
|
||||
//
|
||||
// These are registered in mypyc.primitives.float_ops.
|
||||
|
||||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
|
||||
|
||||
static double CPy_DomainError(void) {
|
||||
PyErr_SetString(PyExc_ValueError, "math domain error");
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
|
||||
static double CPy_MathRangeError(void) {
|
||||
PyErr_SetString(PyExc_OverflowError, "math range error");
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
|
||||
static double CPy_MathExpectedNonNegativeInputError(double x) {
|
||||
char *buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
|
||||
if (buf) {
|
||||
PyErr_Format(PyExc_ValueError, "expected a nonnegative input, got %s", buf);
|
||||
PyMem_Free(buf);
|
||||
}
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
|
||||
static double CPy_MathExpectedPositiveInputError(double x) {
|
||||
char *buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
|
||||
if (buf) {
|
||||
PyErr_Format(PyExc_ValueError, "expected a positive input, got %s", buf);
|
||||
PyMem_Free(buf);
|
||||
}
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
|
||||
static double CPy_MathExpectedFiniteInput(double x) {
|
||||
char *buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
|
||||
if (buf) {
|
||||
PyErr_Format(PyExc_ValueError, "expected a finite input, got %s", buf);
|
||||
PyMem_Free(buf);
|
||||
}
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
|
||||
double CPyFloat_FromTagged(CPyTagged x) {
|
||||
if (CPyTagged_CheckShort(x)) {
|
||||
return CPyTagged_ShortAsSsize_t(x);
|
||||
}
|
||||
double result = PyFloat_AsDouble(CPyTagged_LongAsObject(x));
|
||||
if (unlikely(result == -1.0) && PyErr_Occurred()) {
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
double CPyFloat_Sin(double x) {
|
||||
double v = sin(x);
|
||||
if (unlikely(isnan(v)) && !isnan(x)) {
|
||||
#if CPY_3_14_FEATURES
|
||||
return CPy_MathExpectedFiniteInput(x);
|
||||
#else
|
||||
return CPy_DomainError();
|
||||
#endif
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
double CPyFloat_Cos(double x) {
|
||||
double v = cos(x);
|
||||
if (unlikely(isnan(v)) && !isnan(x)) {
|
||||
#if CPY_3_14_FEATURES
|
||||
return CPy_MathExpectedFiniteInput(x);
|
||||
#else
|
||||
return CPy_DomainError();
|
||||
#endif
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
double CPyFloat_Tan(double x) {
|
||||
if (unlikely(isinf(x))) {
|
||||
#if CPY_3_14_FEATURES
|
||||
return CPy_MathExpectedFiniteInput(x);
|
||||
#else
|
||||
return CPy_DomainError();
|
||||
#endif
|
||||
}
|
||||
return tan(x);
|
||||
}
|
||||
|
||||
double CPyFloat_Sqrt(double x) {
|
||||
if (x < 0.0) {
|
||||
#if CPY_3_14_FEATURES
|
||||
return CPy_MathExpectedNonNegativeInputError(x);
|
||||
#else
|
||||
return CPy_DomainError();
|
||||
#endif
|
||||
}
|
||||
return sqrt(x);
|
||||
}
|
||||
|
||||
double CPyFloat_Exp(double x) {
|
||||
double v = exp(x);
|
||||
if (unlikely(v == INFINITY) && x != INFINITY) {
|
||||
return CPy_MathRangeError();
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
double CPyFloat_Log(double x) {
|
||||
if (x <= 0.0) {
|
||||
#if CPY_3_14_FEATURES
|
||||
return CPy_MathExpectedPositiveInputError(x);
|
||||
#else
|
||||
return CPy_DomainError();
|
||||
#endif
|
||||
}
|
||||
return log(x);
|
||||
}
|
||||
|
||||
CPyTagged CPyFloat_Floor(double x) {
|
||||
double v = floor(x);
|
||||
return CPyTagged_FromFloat(v);
|
||||
}
|
||||
|
||||
CPyTagged CPyFloat_Ceil(double x) {
|
||||
double v = ceil(x);
|
||||
return CPyTagged_FromFloat(v);
|
||||
}
|
||||
|
||||
bool CPyFloat_IsInf(double x) {
|
||||
return isinf(x) != 0;
|
||||
}
|
||||
|
||||
bool CPyFloat_IsNaN(double x) {
|
||||
return isnan(x) != 0;
|
||||
}
|
||||
|
||||
// From CPython 3.10.0, Objects/floatobject.c
|
||||
static void
|
||||
_float_div_mod(double vx, double wx, double *floordiv, double *mod)
|
||||
{
|
||||
double div;
|
||||
*mod = fmod(vx, wx);
|
||||
/* fmod is typically exact, so vx-mod is *mathematically* an
|
||||
exact multiple of wx. But this is fp arithmetic, and fp
|
||||
vx - mod is an approximation; the result is that div may
|
||||
not be an exact integral value after the division, although
|
||||
it will always be very close to one.
|
||||
*/
|
||||
div = (vx - *mod) / wx;
|
||||
if (*mod) {
|
||||
/* ensure the remainder has the same sign as the denominator */
|
||||
if ((wx < 0) != (*mod < 0)) {
|
||||
*mod += wx;
|
||||
div -= 1.0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* the remainder is zero, and in the presence of signed zeroes
|
||||
fmod returns different results across platforms; ensure
|
||||
it has the same sign as the denominator. */
|
||||
*mod = copysign(0.0, wx);
|
||||
}
|
||||
/* snap quotient to nearest integral value */
|
||||
if (div) {
|
||||
*floordiv = floor(div);
|
||||
if (div - *floordiv > 0.5) {
|
||||
*floordiv += 1.0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* div is zero - get the same sign as the true quotient */
|
||||
*floordiv = copysign(0.0, vx / wx); /* zero w/ sign of vx/wx */
|
||||
}
|
||||
}
|
||||
|
||||
double CPyFloat_FloorDivide(double x, double y) {
|
||||
double mod, floordiv;
|
||||
if (y == 0) {
|
||||
PyErr_SetString(PyExc_ZeroDivisionError, "float floor division by zero");
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
_float_div_mod(x, y, &floordiv, &mod);
|
||||
return floordiv;
|
||||
}
|
||||
|
||||
// Adapted from CPython 3.10.7
|
||||
double CPyFloat_Pow(double x, double y) {
|
||||
if (!isfinite(x) || !isfinite(y)) {
|
||||
if (isnan(x))
|
||||
return y == 0.0 ? 1.0 : x; /* NaN**0 = 1 */
|
||||
else if (isnan(y))
|
||||
return x == 1.0 ? 1.0 : y; /* 1**NaN = 1 */
|
||||
else if (isinf(x)) {
|
||||
int odd_y = isfinite(y) && fmod(fabs(y), 2.0) == 1.0;
|
||||
if (y > 0.0)
|
||||
return odd_y ? x : fabs(x);
|
||||
else if (y == 0.0)
|
||||
return 1.0;
|
||||
else /* y < 0. */
|
||||
return odd_y ? copysign(0.0, x) : 0.0;
|
||||
}
|
||||
else if (isinf(y)) {
|
||||
if (fabs(x) == 1.0)
|
||||
return 1.0;
|
||||
else if (y > 0.0 && fabs(x) > 1.0)
|
||||
return y;
|
||||
else if (y < 0.0 && fabs(x) < 1.0) {
|
||||
#if PY_VERSION_HEX < 0x030B0000
|
||||
if (x == 0.0) { /* 0**-inf: divide-by-zero */
|
||||
return CPy_DomainError();
|
||||
}
|
||||
#endif
|
||||
return -y; /* result is +inf */
|
||||
} else
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
double r = pow(x, y);
|
||||
if (!isfinite(r)) {
|
||||
if (isnan(r)) {
|
||||
return CPy_DomainError();
|
||||
}
|
||||
/*
|
||||
an infinite result here arises either from:
|
||||
(A) (+/-0.)**negative (-> divide-by-zero)
|
||||
(B) overflow of x**y with x and y finite
|
||||
*/
|
||||
else if (isinf(r)) {
|
||||
if (x == 0.0)
|
||||
return CPy_DomainError();
|
||||
else
|
||||
return CPy_MathRangeError();
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
|
@ -0,0 +1,271 @@
|
|||
#define PY_SSIZE_T_CLEAN
|
||||
#include <stdint.h>
|
||||
#include "CPy.h"
|
||||
|
||||
#define CPyFunction_weakreflist(f) (((PyCFunctionObject *)f)->m_weakreflist)
|
||||
#define CPyFunction_class(f) ((PyObject*) ((PyCMethodObject *) (f))->mm_class)
|
||||
#define CPyFunction_func_vectorcall(f) (((PyCFunctionObject *)f)->vectorcall)
|
||||
|
||||
static int
|
||||
CPyFunction_clear(CPyFunction *m) {
|
||||
Py_CLEAR(((PyCFunctionObject*)m)->m_module);
|
||||
PyObject_ClearManagedDict((PyObject*)m);
|
||||
Py_CLEAR(m->func_name);
|
||||
Py_CLEAR(m->func_code);
|
||||
PyObject *cls = CPyFunction_class(m);
|
||||
((PyCMethodObject *)m)->mm_class = NULL;
|
||||
Py_XDECREF(cls);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void CPyFunction_dealloc(CPyFunction *m) {
|
||||
PyObject_GC_UnTrack(m);
|
||||
if (CPyFunction_weakreflist(m) != NULL)
|
||||
PyObject_ClearWeakRefs((PyObject *) m);
|
||||
CPyFunction_clear(m);
|
||||
PyMem_Free(m->func.func.m_ml);
|
||||
PyObject_GC_Del(m);
|
||||
}
|
||||
|
||||
static PyObject* CPyFunction_repr(CPyFunction *op) {
|
||||
return PyUnicode_FromFormat("<function %U at %p>", op->func_name, (void *)op);
|
||||
}
|
||||
|
||||
static PyObject* CPyFunction_call(PyObject *func, PyObject *args, PyObject *kw) {
|
||||
CPyFunction *f = (CPyFunction *)func;
|
||||
vectorcallfunc vc = CPyFunction_func_vectorcall(f);
|
||||
assert(vc);
|
||||
return PyVectorcall_Call(func, args, kw);
|
||||
}
|
||||
|
||||
static int CPyFunction_traverse(CPyFunction *m, visitproc visit, void *arg) {
|
||||
Py_VISIT(((PyCFunctionObject *)m)->m_module);
|
||||
int e = PyObject_VisitManagedDict((PyObject*)m, visit, arg);
|
||||
if (e != 0) return e;
|
||||
Py_VISIT(m->func_name);
|
||||
Py_VISIT(m->func_code);
|
||||
Py_VISIT(CPyFunction_class(m));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static PyMemberDef CPyFunction_members[] = {
|
||||
{"__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), 0, 0},
|
||||
{"__vectorcalloffset__", T_PYSSIZET, offsetof(PyCFunctionObject, vectorcall), READONLY, 0},
|
||||
{"__weaklistoffset__", T_PYSSIZET, offsetof(PyCFunctionObject, m_weakreflist), READONLY, 0},
|
||||
{0, 0, 0, 0, 0}
|
||||
};
|
||||
|
||||
PyObject* CPyFunction_get_name(PyObject *op, void *context) {
|
||||
(void)context;
|
||||
CPyFunction *func = (CPyFunction *)op;
|
||||
if (unlikely(func->func_name == NULL)) {
|
||||
func->func_name = PyUnicode_InternFromString(((PyCFunctionObject *)func)->m_ml->ml_name);
|
||||
if (unlikely(func->func_name == NULL))
|
||||
return NULL;
|
||||
}
|
||||
Py_INCREF(func->func_name);
|
||||
return func->func_name;
|
||||
}
|
||||
|
||||
int CPyFunction_set_name(PyObject *op, PyObject *value, void *context) {
|
||||
(void)context;
|
||||
CPyFunction *func = (CPyFunction *)op;
|
||||
if (unlikely(!value || !PyUnicode_Check(value))) {
|
||||
PyErr_SetString(PyExc_TypeError, "__name__ must be set to a string object");
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_INCREF(value);
|
||||
Py_XDECREF(func->func_name);
|
||||
func->func_name = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject* CPyFunction_get_code(PyObject *op, void *context) {
|
||||
(void)context;
|
||||
CPyFunction *func = (CPyFunction *)op;
|
||||
PyObject* result = (func->func_code) ? func->func_code : Py_None;
|
||||
Py_INCREF(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyObject* CPyFunction_get_none(PyObject *op, void *context) {
|
||||
(void)op;
|
||||
(void)context;
|
||||
PyObject* result = Py_None;
|
||||
Py_INCREF(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
int CPyFunction_set_none(PyObject *op, PyObject *value, void *context) {
|
||||
(void)op;
|
||||
(void)value;
|
||||
(void)context;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject* CPyFunction_get_defaults(PyObject *op, void *context) {
|
||||
return CPyFunction_get_none(op, context);
|
||||
}
|
||||
|
||||
PyObject* CPyFunction_get_kwdefaults(PyObject *op, void *context) {
|
||||
return CPyFunction_get_none(op, context);
|
||||
}
|
||||
|
||||
PyObject* CPyFunction_get_annotations(PyObject *op, void *context) {
|
||||
return CPyFunction_get_none(op, context);
|
||||
}
|
||||
|
||||
int CPyFunction_set_annotations(PyObject *op, PyObject *value, void *context) {
|
||||
return CPyFunction_set_none(op, value, context);
|
||||
}
|
||||
|
||||
static PyGetSetDef CPyFunction_getsets[] = {
|
||||
{"__dict__", (getter)PyObject_GenericGetDict, (setter)PyObject_GenericSetDict, 0, 0},
|
||||
{"__name__", (getter)CPyFunction_get_name, (setter)CPyFunction_set_name, 0, 0},
|
||||
{"__code__", (getter)CPyFunction_get_code, 0, 0, 0},
|
||||
{"__defaults__", (getter)CPyFunction_get_defaults, 0, 0, 0},
|
||||
{"__kwdefaults__", (getter)CPyFunction_get_kwdefaults, 0, 0, 0},
|
||||
{"__annotations__", (getter)CPyFunction_get_annotations, CPyFunction_set_annotations, 0, 0},
|
||||
{0, 0, 0, 0, 0}
|
||||
};
|
||||
|
||||
static PyObject* CPy_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) {
|
||||
(void)typ;
|
||||
if (!self) {
|
||||
Py_INCREF(func);
|
||||
return func;
|
||||
}
|
||||
return PyMethod_New(func, self);
|
||||
}
|
||||
|
||||
static PyType_Slot CPyFunction_slots[] = {
|
||||
{Py_tp_dealloc, (void *)CPyFunction_dealloc},
|
||||
{Py_tp_repr, (void *)CPyFunction_repr},
|
||||
{Py_tp_call, (void *)CPyFunction_call},
|
||||
{Py_tp_traverse, (void *)CPyFunction_traverse},
|
||||
{Py_tp_clear, (void *)CPyFunction_clear},
|
||||
{Py_tp_members, (void *)CPyFunction_members},
|
||||
{Py_tp_getset, (void *)CPyFunction_getsets},
|
||||
{Py_tp_descr_get, (void *)CPy_PyMethod_New},
|
||||
{0, 0},
|
||||
};
|
||||
|
||||
static PyType_Spec CPyFunction_spec = {
|
||||
.name = "Function compiled with mypyc",
|
||||
.basicsize = sizeof(CPyFunction),
|
||||
.itemsize = 0,
|
||||
.flags = Py_TPFLAGS_IMMUTABLETYPE |
|
||||
#if PY_VERSION_HEX >= 0x030C0000
|
||||
Py_TPFLAGS_MANAGED_DICT |
|
||||
#endif
|
||||
Py_TPFLAGS_HAVE_VECTORCALL | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
|
||||
.slots = CPyFunction_slots,
|
||||
};
|
||||
|
||||
static PyTypeObject *CPyFunctionType = NULL;
|
||||
|
||||
static PyObject* CPyFunction_Vectorcall(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) {
|
||||
CPyFunction *f = (CPyFunction *)func;
|
||||
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
|
||||
PyObject *self;
|
||||
PyCFunction meth = ((PyCFunctionObject *)f)->m_ml->ml_meth;
|
||||
|
||||
self = ((PyCFunctionObject *)f)->m_self;
|
||||
if (!self) {
|
||||
self = args[0];
|
||||
args += 1;
|
||||
nargs -= 1;
|
||||
}
|
||||
return ((_PyCFunctionFastWithKeywords)(void(*)(void))meth)(self, args, nargs, kwnames);
|
||||
}
|
||||
|
||||
|
||||
static CPyFunction* CPyFunction_Init(CPyFunction *op, PyMethodDef *ml, PyObject* name,
|
||||
PyObject *module, PyObject* code, bool set_self) {
|
||||
PyCFunctionObject *cf = (PyCFunctionObject *)op;
|
||||
CPyFunction_weakreflist(op) = NULL;
|
||||
cf->m_ml = ml;
|
||||
cf->m_self = set_self ? (PyObject *) op : NULL;
|
||||
|
||||
Py_XINCREF(module);
|
||||
cf->m_module = module;
|
||||
|
||||
Py_INCREF(name);
|
||||
op->func_name = name;
|
||||
|
||||
((PyCMethodObject *)op)->mm_class = NULL;
|
||||
|
||||
Py_XINCREF(code);
|
||||
op->func_code = code;
|
||||
|
||||
CPyFunction_func_vectorcall(op) = CPyFunction_Vectorcall;
|
||||
return op;
|
||||
}
|
||||
|
||||
static PyObject* CPyCode_New(const char *filename, const char *funcname, int first_line, int flags) {
|
||||
PyCodeObject *code_obj = PyCode_NewEmpty(filename, funcname, first_line);
|
||||
if (unlikely(!code_obj)) {
|
||||
return NULL;
|
||||
}
|
||||
code_obj->co_flags = flags;
|
||||
return (PyObject *)code_obj;
|
||||
}
|
||||
|
||||
static PyMethodDef* CPyMethodDef_New(const char *name, PyCFunction func, int flags, const char *doc) {
|
||||
PyMethodDef *method = (PyMethodDef *)PyMem_Malloc(sizeof(PyMethodDef));
|
||||
if (unlikely(!method)) {
|
||||
return NULL;
|
||||
}
|
||||
method->ml_name = name;
|
||||
method->ml_meth = func;
|
||||
method->ml_flags = flags;
|
||||
method->ml_doc = doc;
|
||||
return method;
|
||||
}
|
||||
|
||||
PyObject* CPyFunction_New(PyObject *module, const char *filename, const char *funcname,
|
||||
PyCFunction func, int func_flags, const char *func_doc,
|
||||
int first_line, int code_flags, bool has_self_arg) {
|
||||
PyMethodDef *method = NULL;
|
||||
PyObject *code = NULL, *op = NULL;
|
||||
bool set_self = false;
|
||||
|
||||
if (!CPyFunctionType) {
|
||||
CPyFunctionType = (PyTypeObject *)PyType_FromSpec(&CPyFunction_spec);
|
||||
if (unlikely(!CPyFunctionType)) {
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
method = CPyMethodDef_New(funcname, func, func_flags, func_doc);
|
||||
if (unlikely(!method)) {
|
||||
goto err;
|
||||
}
|
||||
code = CPyCode_New(filename, funcname, first_line, code_flags);
|
||||
if (unlikely(!code)) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
// Set m_self inside the function wrapper only if the wrapped function has no self arg
|
||||
// to pass m_self as the self arg when the function is called.
|
||||
// When the function has a self arg, it will come in the args vector passed to the
|
||||
// vectorcall handler.
|
||||
set_self = !has_self_arg;
|
||||
op = (PyObject *)CPyFunction_Init(PyObject_GC_New(CPyFunction, CPyFunctionType),
|
||||
method, PyUnicode_FromString(funcname), module,
|
||||
code, set_self);
|
||||
if (unlikely(!op)) {
|
||||
goto err;
|
||||
}
|
||||
PyObject_GC_Track(op);
|
||||
return op;
|
||||
|
||||
err:
|
||||
CPyError_OutOfMemory();
|
||||
if (method) {
|
||||
PyMem_Free(method);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
84
venv/lib/python3.11/site-packages/mypyc/lib-rt/generic_ops.c
Normal file
84
venv/lib/python3.11/site-packages/mypyc/lib-rt/generic_ops.c
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
// Generic primitive operations
|
||||
//
|
||||
// These are registered in mypyc.primitives.generic_ops.
|
||||
|
||||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
|
||||
CPyTagged CPyObject_Hash(PyObject *o) {
|
||||
Py_hash_t h = PyObject_Hash(o);
|
||||
if (h == -1) {
|
||||
return CPY_INT_TAG;
|
||||
} else {
|
||||
// This is tragically annoying. The range of hash values in
|
||||
// 64-bit python covers 64-bits, and our short integers only
|
||||
// cover 63. This means that half the time we are boxing the
|
||||
// result for basically no good reason. To add insult to
|
||||
// injury it is probably about to be immediately unboxed by a
|
||||
// tp_hash wrapper.
|
||||
return CPyTagged_FromSsize_t(h);
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPyObject_GetAttr3(PyObject *v, PyObject *name, PyObject *defl)
|
||||
{
|
||||
PyObject *result = PyObject_GetAttr(v, name);
|
||||
if (!result && PyErr_ExceptionMatches(PyExc_AttributeError)) {
|
||||
PyErr_Clear();
|
||||
Py_INCREF(defl);
|
||||
result = defl;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
PyObject *CPyIter_Next(PyObject *iter)
|
||||
{
|
||||
return (*Py_TYPE(iter)->tp_iternext)(iter);
|
||||
}
|
||||
|
||||
PyObject *CPyNumber_Power(PyObject *base, PyObject *index)
|
||||
{
|
||||
return PyNumber_Power(base, index, Py_None);
|
||||
}
|
||||
|
||||
PyObject *CPyNumber_InPlacePower(PyObject *base, PyObject *index)
|
||||
{
|
||||
return PyNumber_InPlacePower(base, index, Py_None);
|
||||
}
|
||||
|
||||
PyObject *CPyObject_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end) {
|
||||
PyObject *start_obj = CPyTagged_AsObject(start);
|
||||
PyObject *end_obj = CPyTagged_AsObject(end);
|
||||
if (unlikely(start_obj == NULL || end_obj == NULL)) {
|
||||
return NULL;
|
||||
}
|
||||
PyObject *slice = PySlice_New(start_obj, end_obj, NULL);
|
||||
Py_DECREF(start_obj);
|
||||
Py_DECREF(end_obj);
|
||||
if (unlikely(slice == NULL)) {
|
||||
return NULL;
|
||||
}
|
||||
PyObject *result = PyObject_GetItem(obj, slice);
|
||||
Py_DECREF(slice);
|
||||
return result;
|
||||
}
|
||||
|
||||
typedef PyObject *(*SetupFunction)(PyObject *);
|
||||
|
||||
PyObject *CPy_SetupObject(PyObject *type) {
|
||||
PyTypeObject *tp = (PyTypeObject *)type;
|
||||
PyMethodDef *def = NULL;
|
||||
for(; tp; tp = tp->tp_base) {
|
||||
def = tp->tp_methods;
|
||||
if (!def || !def->ml_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(def->ml_name, "__internal_mypyc_setup")) {
|
||||
return ((SetupFunction)(void(*)(void))def->ml_meth)(type);
|
||||
}
|
||||
}
|
||||
|
||||
PyErr_SetString(PyExc_RuntimeError, "Internal mypyc error: Unable to find object setup function");
|
||||
return NULL;
|
||||
}
|
||||
451
venv/lib/python3.11/site-packages/mypyc/lib-rt/getargs.c
Normal file
451
venv/lib/python3.11/site-packages/mypyc/lib-rt/getargs.c
Normal file
|
|
@ -0,0 +1,451 @@
|
|||
/* getargs implementation copied from Python 3.8 and stripped down to only include
|
||||
* the functions we need.
|
||||
* We also add support for required kwonly args and accepting *args / **kwargs.
|
||||
* A good idea would be to also vendor in the Fast versions and get our stuff
|
||||
* working with *that*.
|
||||
* Another probably good idea is to strip out all the formatting stuff we don't need
|
||||
* and then add in custom stuff that we do need.
|
||||
*
|
||||
* DOCUMENTATION OF THE EXTENSIONS:
|
||||
* - Arguments given after a @ format specify are required keyword-only arguments.
|
||||
* The | and $ specifiers must both appear before @.
|
||||
* - If the first character of a format string is %, then the function can support
|
||||
* *args and **kwargs. On seeing a %, the parser will consume two arguments,
|
||||
* which should be pointers to variables to store the *args and **kwargs, respectively.
|
||||
* Either pointer can be NULL, in which case the function doesn't take that
|
||||
* variety of vararg.
|
||||
* Unlike most format specifiers, the caller takes ownership of these objects
|
||||
* and is responsible for decrefing them.
|
||||
* - All arguments must use the 'O' format.
|
||||
* - There's minimal error checking of format strings. They are generated
|
||||
* programmatically and can be assumed valid.
|
||||
*/
|
||||
|
||||
// These macro definitions are copied from pyport.h in Python 3.9 and later
|
||||
// https://bugs.python.org/issue19569
|
||||
#if defined(__clang__)
|
||||
#define _Py_COMP_DIAG_PUSH _Pragma("clang diagnostic push")
|
||||
#define _Py_COMP_DIAG_IGNORE_DEPR_DECLS \
|
||||
_Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
|
||||
#define _Py_COMP_DIAG_POP _Pragma("clang diagnostic pop")
|
||||
#elif defined(__GNUC__) \
|
||||
&& ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 6))
|
||||
#define _Py_COMP_DIAG_PUSH _Pragma("GCC diagnostic push")
|
||||
#define _Py_COMP_DIAG_IGNORE_DEPR_DECLS \
|
||||
_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
|
||||
#define _Py_COMP_DIAG_POP _Pragma("GCC diagnostic pop")
|
||||
#elif defined(_MSC_VER)
|
||||
#define _Py_COMP_DIAG_PUSH __pragma(warning(push))
|
||||
#define _Py_COMP_DIAG_IGNORE_DEPR_DECLS __pragma(warning(disable: 4996))
|
||||
#define _Py_COMP_DIAG_POP __pragma(warning(pop))
|
||||
#else
|
||||
#define _Py_COMP_DIAG_PUSH
|
||||
#define _Py_COMP_DIAG_IGNORE_DEPR_DECLS
|
||||
#define _Py_COMP_DIAG_POP
|
||||
#endif
|
||||
|
||||
#include "Python.h"
|
||||
#include "pythonsupport.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <float.h>
|
||||
|
||||
#ifndef PyDict_GET_SIZE
|
||||
#define PyDict_GET_SIZE(d) PyDict_Size(d)
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int CPyArg_ParseTupleAndKeywords(PyObject *, PyObject *,
|
||||
const char *, const char *, const char * const *, ...);
|
||||
|
||||
/* Forward */
|
||||
static int vgetargskeywords(PyObject *, PyObject *,
|
||||
const char *, const char *, const char * const *, va_list *);
|
||||
static void skipitem(const char **, va_list *);
|
||||
|
||||
/* Support for keyword arguments donated by
|
||||
Geoff Philbrick <philbric@delphi.hks.com> */
|
||||
|
||||
/* Return false (0) for error, else true. */
|
||||
int
|
||||
CPyArg_ParseTupleAndKeywords(PyObject *args,
|
||||
PyObject *keywords,
|
||||
const char *format,
|
||||
const char *fname,
|
||||
const char * const *kwlist, ...)
|
||||
{
|
||||
int retval;
|
||||
va_list va;
|
||||
|
||||
va_start(va, kwlist);
|
||||
retval = vgetargskeywords(args, keywords, format, fname, kwlist, &va);
|
||||
va_end(va);
|
||||
return retval;
|
||||
}
|
||||
|
||||
#define IS_END_OF_FORMAT(c) (c == '\0' || c == ';' || c == ':')
|
||||
|
||||
static int
|
||||
vgetargskeywords(PyObject *args, PyObject *kwargs, const char *format,
|
||||
const char *fname, const char * const *kwlist, va_list *p_va)
|
||||
{
|
||||
int min = INT_MAX;
|
||||
int max = INT_MAX;
|
||||
int required_kwonly_start = INT_MAX;
|
||||
int has_required_kws = 0;
|
||||
int i, pos, len;
|
||||
int skip = 0;
|
||||
Py_ssize_t nargs, nkwargs;
|
||||
PyObject *current_arg;
|
||||
int bound_pos_args;
|
||||
|
||||
PyObject **p_args = NULL, **p_kwargs = NULL;
|
||||
|
||||
assert(args != NULL && PyTuple_Check(args));
|
||||
assert(kwargs == NULL || PyDict_Check(kwargs));
|
||||
assert(format != NULL);
|
||||
assert(kwlist != NULL);
|
||||
assert(p_va != NULL);
|
||||
|
||||
/* scan kwlist and count the number of positional-only parameters */
|
||||
for (pos = 0; kwlist[pos] && !*kwlist[pos]; pos++) {
|
||||
}
|
||||
/* scan kwlist and get greatest possible nbr of args */
|
||||
for (len = pos; kwlist[len]; len++) {
|
||||
#ifdef DEBUG
|
||||
if (!*kwlist[len]) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Empty keyword parameter name");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (*format == '%') {
|
||||
p_args = va_arg(*p_va, PyObject **);
|
||||
p_kwargs = va_arg(*p_va, PyObject **);
|
||||
format++;
|
||||
}
|
||||
|
||||
nargs = PyTuple_GET_SIZE(args);
|
||||
nkwargs = (kwargs == NULL) ? 0 : PyDict_GET_SIZE(kwargs);
|
||||
if (unlikely(nargs + nkwargs > len && !p_args && !p_kwargs)) {
|
||||
/* Adding "keyword" (when nargs == 0) prevents producing wrong error
|
||||
messages in some special cases (see bpo-31229). */
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%s takes at most %d %sargument%s (%zd given)",
|
||||
(fname == NULL) ? "function" : fname,
|
||||
(fname == NULL) ? "" : "()",
|
||||
len,
|
||||
(nargs == 0) ? "keyword " : "",
|
||||
(len == 1) ? "" : "s",
|
||||
nargs + nkwargs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* convert tuple args and keyword args in same loop, using kwlist to drive process */
|
||||
for (i = 0; i < len; i++) {
|
||||
if (*format == '|') {
|
||||
#ifdef DEBUG
|
||||
if (min != INT_MAX) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Invalid format string (| specified twice)");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
min = i;
|
||||
format++;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (max != INT_MAX) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Invalid format string ($ before |)");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* If there are optional args, figure out whether we have
|
||||
* required keyword arguments so that we don't bail without
|
||||
* enforcing them. */
|
||||
has_required_kws = strchr(format, '@') != NULL;
|
||||
}
|
||||
if (*format == '$') {
|
||||
#ifdef DEBUG
|
||||
if (max != INT_MAX) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Invalid format string ($ specified twice)");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
max = i;
|
||||
format++;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (max < pos) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Empty parameter name after $");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
if (skip) {
|
||||
/* Now we know the minimal and the maximal numbers of
|
||||
* positional arguments and can raise an exception with
|
||||
* informative message (see below). */
|
||||
break;
|
||||
}
|
||||
if (unlikely(max < nargs && !p_args)) {
|
||||
if (max == 0) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%s takes no positional arguments",
|
||||
(fname == NULL) ? "function" : fname,
|
||||
(fname == NULL) ? "" : "()");
|
||||
}
|
||||
else {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%s takes %s %d positional argument%s"
|
||||
" (%zd given)",
|
||||
(fname == NULL) ? "function" : fname,
|
||||
(fname == NULL) ? "" : "()",
|
||||
(min < max) ? "at most" : "exactly",
|
||||
max,
|
||||
max == 1 ? "" : "s",
|
||||
nargs);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (*format == '@') {
|
||||
#ifdef DEBUG
|
||||
if (min == INT_MAX && max == INT_MAX) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Invalid format string "
|
||||
"(@ without preceding | and $)");
|
||||
return 0;
|
||||
}
|
||||
if (required_kwonly_start != INT_MAX) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Invalid format string (@ specified twice)");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
required_kwonly_start = i;
|
||||
format++;
|
||||
}
|
||||
#ifdef DEBUG
|
||||
if (IS_END_OF_FORMAT(*format)) {
|
||||
PyErr_Format(PyExc_SystemError,
|
||||
"More keyword list entries (%d) than "
|
||||
"format specifiers (%d)", len, i);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
if (!skip) {
|
||||
if (i < nargs && i < max) {
|
||||
current_arg = Py_NewRef(PyTuple_GET_ITEM(args, i));
|
||||
}
|
||||
else if (nkwargs && i >= pos) {
|
||||
if (unlikely(PyDict_GetItemStringRef(kwargs, kwlist[i], ¤t_arg) < 0)) {
|
||||
return 0;
|
||||
}
|
||||
if (current_arg) {
|
||||
--nkwargs;
|
||||
}
|
||||
}
|
||||
else {
|
||||
current_arg = NULL;
|
||||
}
|
||||
|
||||
if (current_arg) {
|
||||
PyObject **p = va_arg(*p_va, PyObject **);
|
||||
*p = current_arg;
|
||||
Py_DECREF(current_arg);
|
||||
format++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i < min || i >= required_kwonly_start) {
|
||||
if (likely(i < pos)) {
|
||||
assert (min == INT_MAX);
|
||||
assert (max == INT_MAX);
|
||||
skip = 1;
|
||||
/* At that moment we still don't know the minimal and
|
||||
* the maximal numbers of positional arguments. Raising
|
||||
* an exception is deferred until we encounter | and $
|
||||
* or the end of the format. */
|
||||
}
|
||||
else {
|
||||
if (i >= max) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%s missing required "
|
||||
"keyword-only argument '%s'",
|
||||
(fname == NULL) ? "function" : fname,
|
||||
(fname == NULL) ? "" : "()",
|
||||
kwlist[i]);
|
||||
}
|
||||
else {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%s missing required "
|
||||
"argument '%s' (pos %d)",
|
||||
(fname == NULL) ? "function" : fname,
|
||||
(fname == NULL) ? "" : "()",
|
||||
kwlist[i], i+1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
/* current code reports success when all required args
|
||||
* fulfilled and no keyword args left, with no further
|
||||
* validation. XXX Maybe skip this in debug build ?
|
||||
*/
|
||||
if (!nkwargs && !skip && !has_required_kws &&
|
||||
!p_args && !p_kwargs)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* We are into optional args, skip through to any remaining
|
||||
* keyword args */
|
||||
skipitem(&format, p_va);
|
||||
}
|
||||
|
||||
if (unlikely(skip)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%s takes %s %d positional argument%s"
|
||||
" (%zd given)",
|
||||
(fname == NULL) ? "function" : fname,
|
||||
(fname == NULL) ? "" : "()",
|
||||
(Py_MIN(pos, min) < i) ? "at least" : "exactly",
|
||||
Py_MIN(pos, min),
|
||||
Py_MIN(pos, min) == 1 ? "" : "s",
|
||||
nargs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
if (!IS_END_OF_FORMAT(*format) &&
|
||||
(*format != '|') && (*format != '$') && (*format != '@'))
|
||||
{
|
||||
PyErr_Format(PyExc_SystemError,
|
||||
"more argument specifiers than keyword list entries "
|
||||
"(remaining format:'%s')", format);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
bound_pos_args = Py_MIN(nargs, Py_MIN(max, len));
|
||||
if (p_args) {
|
||||
*p_args = PyTuple_GetSlice(args, bound_pos_args, nargs);
|
||||
if (!*p_args) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (p_kwargs) {
|
||||
/* This unfortunately needs to be special cased because if len is 0 then we
|
||||
* never go through the main loop. */
|
||||
if (unlikely(nargs > 0 && len == 0 && !p_args)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%s takes no positional arguments",
|
||||
(fname == NULL) ? "function" : fname,
|
||||
(fname == NULL) ? "" : "()");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
*p_kwargs = PyDict_New();
|
||||
if (!*p_kwargs) {
|
||||
goto latefail;
|
||||
}
|
||||
}
|
||||
|
||||
if (nkwargs > 0) {
|
||||
PyObject *key, *value;
|
||||
Py_ssize_t j;
|
||||
/* make sure there are no arguments given by name and position */
|
||||
for (i = pos; i < bound_pos_args && i < len; i++) {
|
||||
PyObject *current_arg;
|
||||
if (unlikely(PyDict_GetItemStringRef(kwargs, kwlist[i], ¤t_arg) < 0)) {
|
||||
goto latefail;
|
||||
}
|
||||
if (unlikely(current_arg != NULL)) {
|
||||
Py_DECREF(current_arg);
|
||||
/* arg present in tuple and in dict */
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"argument for %.200s%s given by name ('%s') "
|
||||
"and position (%d)",
|
||||
(fname == NULL) ? "function" : fname,
|
||||
(fname == NULL) ? "" : "()",
|
||||
kwlist[i], i+1);
|
||||
goto latefail;
|
||||
}
|
||||
}
|
||||
/* make sure there are no extraneous keyword arguments */
|
||||
j = 0;
|
||||
while (PyDict_Next(kwargs, &j, &key, &value)) {
|
||||
int match = 0;
|
||||
if (unlikely(!PyUnicode_Check(key))) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"keywords must be strings");
|
||||
goto latefail;
|
||||
}
|
||||
for (i = pos; i < len; i++) {
|
||||
if (PyUnicode_EqualToUTF8(key, kwlist[i])) {
|
||||
match = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!match) {
|
||||
if (unlikely(!p_kwargs)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"'%U' is an invalid keyword "
|
||||
"argument for %.200s%s",
|
||||
key,
|
||||
(fname == NULL) ? "this function" : fname,
|
||||
(fname == NULL) ? "" : "()");
|
||||
goto latefail;
|
||||
} else {
|
||||
if (PyDict_SetItem(*p_kwargs, key, value) < 0) {
|
||||
goto latefail;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
/* Handle failures that have happened after we have tried to
|
||||
* create *args and **kwargs, if they exist. */
|
||||
latefail:
|
||||
if (p_args) {
|
||||
Py_XDECREF(*p_args);
|
||||
}
|
||||
if (p_kwargs) {
|
||||
Py_XDECREF(*p_kwargs);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
skipitem(const char **p_format, va_list *p_va)
|
||||
{
|
||||
const char *format = *p_format;
|
||||
format++;
|
||||
|
||||
if (p_va != NULL) {
|
||||
(void) va_arg(*p_va, PyObject **);
|
||||
}
|
||||
|
||||
*p_format = format;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
569
venv/lib/python3.11/site-packages/mypyc/lib-rt/getargsfast.c
Normal file
569
venv/lib/python3.11/site-packages/mypyc/lib-rt/getargsfast.c
Normal file
|
|
@ -0,0 +1,569 @@
|
|||
/* getargskeywordsfast implementation copied from Python 3.9 and stripped down to
|
||||
* only include the functionality we need.
|
||||
*
|
||||
* We also add support for required kwonly args and accepting *args / **kwargs.
|
||||
*
|
||||
* DOCUMENTATION OF THE EXTENSIONS:
|
||||
* - Arguments given after a @ format specify required keyword-only arguments.
|
||||
* The | and $ specifiers must both appear before @.
|
||||
* - If the first character of a format string is %, then the function can support
|
||||
* *args and/or **kwargs. In this case the parser will consume two arguments,
|
||||
* which should be pointers to variables to store the *args and **kwargs, respectively.
|
||||
* Either pointer can be NULL, in which case the function doesn't take that
|
||||
* variety of vararg.
|
||||
* Unlike most format specifiers, the caller takes ownership of these objects
|
||||
* and is responsible for decrefing them.
|
||||
*/
|
||||
|
||||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
|
||||
#define PARSER_INITED(parser) ((parser)->kwtuple != NULL)
|
||||
|
||||
/* Forward */
|
||||
static int
|
||||
vgetargskeywordsfast_impl(PyObject *const *args, Py_ssize_t nargs,
|
||||
PyObject *kwargs, PyObject *kwnames,
|
||||
CPyArg_Parser *parser,
|
||||
va_list *p_va);
|
||||
static void skipitem_fast(const char **, va_list *);
|
||||
|
||||
/* Parse args for an arbitrary signature */
|
||||
int
|
||||
CPyArg_ParseStackAndKeywords(PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames,
|
||||
CPyArg_Parser *parser, ...)
|
||||
{
|
||||
int retval;
|
||||
va_list va;
|
||||
|
||||
va_start(va, parser);
|
||||
retval = vgetargskeywordsfast_impl(args, nargs, NULL, kwnames, parser, &va);
|
||||
va_end(va);
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* Parse args for a function that takes no args */
|
||||
int
|
||||
CPyArg_ParseStackAndKeywordsNoArgs(PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames,
|
||||
CPyArg_Parser *parser, ...)
|
||||
{
|
||||
int retval;
|
||||
va_list va;
|
||||
|
||||
va_start(va, parser);
|
||||
if (nargs == 0 && kwnames == NULL) {
|
||||
// Fast path: no arguments
|
||||
retval = 1;
|
||||
} else {
|
||||
retval = vgetargskeywordsfast_impl(args, nargs, NULL, kwnames, parser, &va);
|
||||
}
|
||||
va_end(va);
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* Parse args for a function that takes one arg */
|
||||
int
|
||||
CPyArg_ParseStackAndKeywordsOneArg(PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames,
|
||||
CPyArg_Parser *parser, ...)
|
||||
{
|
||||
int retval;
|
||||
va_list va;
|
||||
|
||||
va_start(va, parser);
|
||||
if (kwnames == NULL && nargs == 1) {
|
||||
// Fast path: one positional argument
|
||||
PyObject **p;
|
||||
p = va_arg(va, PyObject **);
|
||||
*p = args[0];
|
||||
retval = 1;
|
||||
} else {
|
||||
retval = vgetargskeywordsfast_impl(args, nargs, NULL, kwnames, parser, &va);
|
||||
}
|
||||
va_end(va);
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* Parse args for a function that takes no keyword-only args, *args or **kwargs */
|
||||
int
|
||||
CPyArg_ParseStackAndKeywordsSimple(PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames,
|
||||
CPyArg_Parser *parser, ...)
|
||||
{
|
||||
int retval;
|
||||
va_list va;
|
||||
|
||||
va_start(va, parser);
|
||||
if (kwnames == NULL && PARSER_INITED(parser) &&
|
||||
nargs >= parser->min && nargs <= parser->max) {
|
||||
// Fast path: correct number of positional arguments only
|
||||
PyObject **p;
|
||||
Py_ssize_t i;
|
||||
for (i = 0; i < nargs; i++) {
|
||||
p = va_arg(va, PyObject **);
|
||||
*p = args[i];
|
||||
}
|
||||
retval = 1;
|
||||
} else {
|
||||
retval = vgetargskeywordsfast_impl(args, nargs, NULL, kwnames, parser, &va);
|
||||
}
|
||||
va_end(va);
|
||||
return retval;
|
||||
}
|
||||
|
||||
#define IS_END_OF_FORMAT(c) (c == '\0' || c == ';' || c == ':')
|
||||
|
||||
|
||||
/* List of static parsers. */
|
||||
static struct CPyArg_Parser *static_arg_parsers = NULL;
|
||||
|
||||
static int
|
||||
parser_init(CPyArg_Parser *parser)
|
||||
{
|
||||
const char * const *keywords;
|
||||
const char *format;
|
||||
int i, len, min, max, nkw;
|
||||
PyObject *kwtuple;
|
||||
|
||||
assert(parser->keywords != NULL);
|
||||
if (PARSER_INITED(parser)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
keywords = parser->keywords;
|
||||
/* scan keywords and count the number of positional-only parameters */
|
||||
for (i = 0; keywords[i] && !*keywords[i]; i++) {
|
||||
}
|
||||
parser->pos = i;
|
||||
/* scan keywords and get greatest possible nbr of args */
|
||||
for (; keywords[i]; i++) {
|
||||
if (!*keywords[i]) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Empty keyword parameter name");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
len = i;
|
||||
|
||||
parser->required_kwonly_start = INT_MAX;
|
||||
if (*parser->format == '%') {
|
||||
parser->format++;
|
||||
parser->varargs = 1;
|
||||
}
|
||||
|
||||
format = parser->format;
|
||||
if (format) {
|
||||
/* grab the function name or custom error msg first (mutually exclusive) */
|
||||
parser->fname = strchr(parser->format, ':');
|
||||
if (parser->fname) {
|
||||
parser->fname++;
|
||||
parser->custom_msg = NULL;
|
||||
}
|
||||
else {
|
||||
parser->custom_msg = strchr(parser->format,';');
|
||||
if (parser->custom_msg)
|
||||
parser->custom_msg++;
|
||||
}
|
||||
|
||||
min = max = INT_MAX;
|
||||
for (i = 0; i < len; i++) {
|
||||
if (*format == '|') {
|
||||
if (min != INT_MAX) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Invalid format string (| specified twice)");
|
||||
return 0;
|
||||
}
|
||||
if (max != INT_MAX) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Invalid format string ($ before |)");
|
||||
return 0;
|
||||
}
|
||||
min = i;
|
||||
format++;
|
||||
}
|
||||
if (*format == '$') {
|
||||
if (max != INT_MAX) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Invalid format string ($ specified twice)");
|
||||
return 0;
|
||||
}
|
||||
if (i < parser->pos) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Empty parameter name after $");
|
||||
return 0;
|
||||
}
|
||||
max = i;
|
||||
format++;
|
||||
}
|
||||
if (*format == '@') {
|
||||
if (parser->required_kwonly_start != INT_MAX) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Invalid format string (@ specified twice)");
|
||||
return 0;
|
||||
}
|
||||
if (min == INT_MAX && max == INT_MAX) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Invalid format string "
|
||||
"(@ without preceding | and $)");
|
||||
return 0;
|
||||
}
|
||||
format++;
|
||||
parser->has_required_kws = 1;
|
||||
parser->required_kwonly_start = i;
|
||||
}
|
||||
if (IS_END_OF_FORMAT(*format)) {
|
||||
PyErr_Format(PyExc_SystemError,
|
||||
"More keyword list entries (%d) than "
|
||||
"format specifiers (%d)", len, i);
|
||||
return 0;
|
||||
}
|
||||
|
||||
skipitem_fast(&format, NULL);
|
||||
}
|
||||
parser->min = Py_MIN(min, len);
|
||||
parser->max = Py_MIN(max, len);
|
||||
|
||||
if (!IS_END_OF_FORMAT(*format) && (*format != '|') && (*format != '$')) {
|
||||
PyErr_Format(PyExc_SystemError,
|
||||
"more argument specifiers than keyword list entries "
|
||||
"(remaining format:'%s')", format);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
nkw = len - parser->pos;
|
||||
kwtuple = PyTuple_New(nkw);
|
||||
if (kwtuple == NULL) {
|
||||
return 0;
|
||||
}
|
||||
keywords = parser->keywords + parser->pos;
|
||||
for (i = 0; i < nkw; i++) {
|
||||
PyObject *str = PyUnicode_FromString(keywords[i]);
|
||||
if (str == NULL) {
|
||||
Py_DECREF(kwtuple);
|
||||
return 0;
|
||||
}
|
||||
PyUnicode_InternInPlace(&str);
|
||||
PyTuple_SET_ITEM(kwtuple, i, str);
|
||||
}
|
||||
parser->kwtuple = kwtuple;
|
||||
|
||||
assert(parser->next == NULL);
|
||||
parser->next = static_arg_parsers;
|
||||
static_arg_parsers = parser;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
find_keyword(PyObject *kwnames, PyObject *const *kwstack, PyObject *key)
|
||||
{
|
||||
Py_ssize_t i, nkwargs;
|
||||
|
||||
nkwargs = PyTuple_GET_SIZE(kwnames);
|
||||
for (i = 0; i < nkwargs; i++) {
|
||||
PyObject *kwname = PyTuple_GET_ITEM(kwnames, i);
|
||||
|
||||
/* kwname == key will normally find a match in since keyword keys
|
||||
should be interned strings; if not retry below in a new loop. */
|
||||
if (kwname == key) {
|
||||
return kwstack[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < nkwargs; i++) {
|
||||
PyObject *kwname = PyTuple_GET_ITEM(kwnames, i);
|
||||
assert(PyUnicode_Check(kwname));
|
||||
if (PyUnicode_Equal(kwname, key)) {
|
||||
return kwstack[i];
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
vgetargskeywordsfast_impl(PyObject *const *args, Py_ssize_t nargs,
|
||||
PyObject *kwargs, PyObject *kwnames,
|
||||
CPyArg_Parser *parser,
|
||||
va_list *p_va)
|
||||
{
|
||||
PyObject *kwtuple;
|
||||
const char *format;
|
||||
PyObject *keyword;
|
||||
int i, pos, len;
|
||||
Py_ssize_t nkwargs;
|
||||
PyObject *current_arg;
|
||||
PyObject *const *kwstack = NULL;
|
||||
int bound_pos_args;
|
||||
PyObject **p_args = NULL, **p_kwargs = NULL;
|
||||
|
||||
assert(kwargs == NULL || PyDict_Check(kwargs));
|
||||
assert(kwargs == NULL || kwnames == NULL);
|
||||
assert(p_va != NULL);
|
||||
|
||||
if (!parser_init(parser)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
kwtuple = parser->kwtuple;
|
||||
pos = parser->pos;
|
||||
len = pos + (int)PyTuple_GET_SIZE(kwtuple);
|
||||
|
||||
if (parser->varargs) {
|
||||
p_args = va_arg(*p_va, PyObject **);
|
||||
p_kwargs = va_arg(*p_va, PyObject **);
|
||||
}
|
||||
|
||||
if (kwargs != NULL) {
|
||||
nkwargs = PyDict_GET_SIZE(kwargs);
|
||||
}
|
||||
else if (kwnames != NULL) {
|
||||
nkwargs = PyTuple_GET_SIZE(kwnames);
|
||||
kwstack = args + nargs;
|
||||
}
|
||||
else {
|
||||
nkwargs = 0;
|
||||
}
|
||||
if (nargs + nkwargs > len && !p_args && !p_kwargs) {
|
||||
/* Adding "keyword" (when nargs == 0) prevents producing wrong error
|
||||
messages in some special cases (see bpo-31229). */
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%s takes at most %d %sargument%s (%zd given)",
|
||||
(parser->fname == NULL) ? "function" : parser->fname,
|
||||
(parser->fname == NULL) ? "" : "()",
|
||||
len,
|
||||
(nargs == 0) ? "keyword " : "",
|
||||
(len == 1) ? "" : "s",
|
||||
nargs + nkwargs);
|
||||
return 0;
|
||||
}
|
||||
if (parser->max < nargs && !p_args) {
|
||||
if (parser->max == 0) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%s takes no positional arguments",
|
||||
(parser->fname == NULL) ? "function" : parser->fname,
|
||||
(parser->fname == NULL) ? "" : "()");
|
||||
}
|
||||
else {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%s takes %s %d positional argument%s (%zd given)",
|
||||
(parser->fname == NULL) ? "function" : parser->fname,
|
||||
(parser->fname == NULL) ? "" : "()",
|
||||
(parser->min < parser->max) ? "at most" : "exactly",
|
||||
parser->max,
|
||||
parser->max == 1 ? "" : "s",
|
||||
nargs);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
format = parser->format;
|
||||
|
||||
/* convert tuple args and keyword args in same loop, using kwtuple to drive process */
|
||||
for (i = 0; i < len; i++) {
|
||||
if (*format == '|') {
|
||||
format++;
|
||||
}
|
||||
if (*format == '$') {
|
||||
format++;
|
||||
}
|
||||
if (*format == '@') {
|
||||
format++;
|
||||
}
|
||||
assert(!IS_END_OF_FORMAT(*format));
|
||||
|
||||
if (i < nargs && i < parser->max) {
|
||||
current_arg = args[i];
|
||||
}
|
||||
else if (nkwargs && i >= pos) {
|
||||
keyword = PyTuple_GET_ITEM(kwtuple, i - pos);
|
||||
if (kwargs != NULL) {
|
||||
current_arg = PyDict_GetItemWithError(kwargs, keyword);
|
||||
if (!current_arg && PyErr_Occurred()) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
current_arg = find_keyword(kwnames, kwstack, keyword);
|
||||
}
|
||||
if (current_arg) {
|
||||
--nkwargs;
|
||||
}
|
||||
}
|
||||
else {
|
||||
current_arg = NULL;
|
||||
}
|
||||
|
||||
if (current_arg) {
|
||||
PyObject **p = va_arg(*p_va, PyObject **);
|
||||
*p = current_arg;
|
||||
format++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i < parser->min || i >= parser->required_kwonly_start) {
|
||||
/* Less arguments than required */
|
||||
if (i < pos) {
|
||||
Py_ssize_t min = Py_MIN(pos, parser->min);
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%s takes %s %d positional argument%s"
|
||||
" (%zd given)",
|
||||
(parser->fname == NULL) ? "function" : parser->fname,
|
||||
(parser->fname == NULL) ? "" : "()",
|
||||
min < parser->max ? "at least" : "exactly",
|
||||
min,
|
||||
min == 1 ? "" : "s",
|
||||
nargs);
|
||||
}
|
||||
else {
|
||||
keyword = PyTuple_GET_ITEM(kwtuple, i - pos);
|
||||
if (i >= parser->max) {
|
||||
PyErr_Format(PyExc_TypeError, "%.200s%s missing required "
|
||||
"keyword-only argument '%U'",
|
||||
(parser->fname == NULL) ? "function" : parser->fname,
|
||||
(parser->fname == NULL) ? "" : "()",
|
||||
keyword);
|
||||
}
|
||||
else {
|
||||
PyErr_Format(PyExc_TypeError, "%.200s%s missing required "
|
||||
"argument '%U' (pos %d)",
|
||||
(parser->fname == NULL) ? "function" : parser->fname,
|
||||
(parser->fname == NULL) ? "" : "()",
|
||||
keyword, i+1);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
/* current code reports success when all required args
|
||||
* fulfilled and no keyword args left, with no further
|
||||
* validation. XXX Maybe skip this in debug build ?
|
||||
*/
|
||||
if (!nkwargs && !parser->has_required_kws && !p_args && !p_kwargs) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* We are into optional args, skip through to any remaining
|
||||
* keyword args */
|
||||
skipitem_fast(&format, p_va);
|
||||
}
|
||||
|
||||
assert(IS_END_OF_FORMAT(*format) || (*format == '|') || (*format == '$'));
|
||||
|
||||
bound_pos_args = Py_MIN(nargs, Py_MIN(parser->max, len));
|
||||
if (p_args) {
|
||||
*p_args = PyTuple_New(nargs - bound_pos_args);
|
||||
if (!*p_args) {
|
||||
return 0;
|
||||
}
|
||||
for (i = bound_pos_args; i < nargs; i++) {
|
||||
PyObject *arg = args[i];
|
||||
Py_INCREF(arg);
|
||||
PyTuple_SET_ITEM(*p_args, i - bound_pos_args, arg);
|
||||
}
|
||||
}
|
||||
|
||||
if (p_kwargs) {
|
||||
/* This unfortunately needs to be special cased because if len is 0 then we
|
||||
* never go through the main loop. */
|
||||
if (nargs > 0 && len == 0 && !p_args) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%s takes no positional arguments",
|
||||
(parser->fname == NULL) ? "function" : parser->fname,
|
||||
(parser->fname == NULL) ? "" : "()");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
*p_kwargs = PyDict_New();
|
||||
if (!*p_kwargs) {
|
||||
goto latefail;
|
||||
}
|
||||
}
|
||||
|
||||
if (nkwargs > 0) {
|
||||
Py_ssize_t j;
|
||||
PyObject *value;
|
||||
/* make sure there are no arguments given by name and position */
|
||||
for (i = pos; i < bound_pos_args; i++) {
|
||||
keyword = PyTuple_GET_ITEM(kwtuple, i - pos);
|
||||
if (kwargs != NULL) {
|
||||
current_arg = PyDict_GetItemWithError(kwargs, keyword);
|
||||
if (!current_arg && PyErr_Occurred()) {
|
||||
goto latefail;
|
||||
}
|
||||
}
|
||||
else {
|
||||
current_arg = find_keyword(kwnames, kwstack, keyword);
|
||||
}
|
||||
if (current_arg) {
|
||||
/* arg present in tuple and in dict */
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"argument for %.200s%s given by name ('%U') "
|
||||
"and position (%d)",
|
||||
(parser->fname == NULL) ? "function" : parser->fname,
|
||||
(parser->fname == NULL) ? "" : "()",
|
||||
keyword, i+1);
|
||||
goto latefail;
|
||||
}
|
||||
}
|
||||
/* make sure there are no extraneous keyword arguments */
|
||||
j = 0;
|
||||
while (1) {
|
||||
int match;
|
||||
if (kwargs != NULL) {
|
||||
if (!PyDict_Next(kwargs, &j, &keyword, &value))
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (j >= PyTuple_GET_SIZE(kwnames))
|
||||
break;
|
||||
keyword = PyTuple_GET_ITEM(kwnames, j);
|
||||
value = kwstack[j];
|
||||
j++;
|
||||
}
|
||||
|
||||
match = PySequence_Contains(kwtuple, keyword);
|
||||
if (match <= 0) {
|
||||
if (!match) {
|
||||
if (!p_kwargs) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"'%S' is an invalid keyword "
|
||||
"argument for %.200s%s",
|
||||
keyword,
|
||||
(parser->fname == NULL) ? "this function" : parser->fname,
|
||||
(parser->fname == NULL) ? "" : "()");
|
||||
goto latefail;
|
||||
} else {
|
||||
if (PyDict_SetItem(*p_kwargs, keyword, value) < 0) {
|
||||
goto latefail;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
goto latefail;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
/* Handle failures that have happened after we have tried to
|
||||
* create *args and **kwargs, if they exist. */
|
||||
latefail:
|
||||
if (p_args) {
|
||||
Py_XDECREF(*p_args);
|
||||
}
|
||||
if (p_kwargs) {
|
||||
Py_XDECREF(*p_kwargs);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
skipitem_fast(const char **p_format, va_list *p_va)
|
||||
{
|
||||
const char *format = *p_format;
|
||||
char c = *format++;
|
||||
|
||||
if (p_va != NULL) {
|
||||
(void) va_arg(*p_va, PyObject **);
|
||||
}
|
||||
|
||||
*p_format = format;
|
||||
}
|
||||
25
venv/lib/python3.11/site-packages/mypyc/lib-rt/init.c
Normal file
25
venv/lib/python3.11/site-packages/mypyc/lib-rt/init.c
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
#include "static_data.c"
|
||||
|
||||
struct ExcDummyStruct _CPy_ExcDummyStruct = { PyObject_HEAD_INIT(NULL) };
|
||||
PyObject *_CPy_ExcDummy = (PyObject *)&_CPy_ExcDummyStruct;
|
||||
|
||||
// System-wide empty tuple constant
|
||||
PyObject * __mypyc_empty_tuple__ = NULL;
|
||||
|
||||
// Because its dynamic linker is more restricted than linux/OS X,
|
||||
// Windows doesn't allow initializing globals with values from
|
||||
// other dynamic libraries. This means we need to initialize
|
||||
// things at load time.
|
||||
void CPy_Init(void) {
|
||||
_CPy_ExcDummyStruct.ob_base.ob_type = &PyBaseObject_Type;
|
||||
|
||||
// Initialize system-wide empty tuple constant
|
||||
if (__mypyc_empty_tuple__ == NULL) {
|
||||
__mypyc_empty_tuple__ = PyTuple_New(0);
|
||||
if (!__mypyc_empty_tuple__) {
|
||||
CPyError_OutOfMemory();
|
||||
}
|
||||
}
|
||||
}
|
||||
709
venv/lib/python3.11/site-packages/mypyc/lib-rt/int_ops.c
Normal file
709
venv/lib/python3.11/site-packages/mypyc/lib-rt/int_ops.c
Normal file
|
|
@ -0,0 +1,709 @@
|
|||
// Int primitive operations (tagged arbitrary-precision integers)
|
||||
//
|
||||
// These are registered in mypyc.primitives.int_ops.
|
||||
|
||||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#ifndef _WIN32
|
||||
// On 64-bit Linux and macOS, ssize_t and long are both 64 bits, and
|
||||
// PyLong_FromLong is faster than PyLong_FromSsize_t, so use the faster one
|
||||
#define CPyLong_FromSsize_t PyLong_FromLong
|
||||
#else
|
||||
// On 64-bit Windows, ssize_t is 64 bits but long is 32 bits, so we
|
||||
// can't use the above trick
|
||||
#define CPyLong_FromSsize_t PyLong_FromSsize_t
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
# if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || (defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 8)
|
||||
# define CPY_CLZ(x) __builtin_clzll((unsigned long long)(x))
|
||||
# define CPY_BITS 64
|
||||
# else
|
||||
# define CPY_CLZ(x) __builtin_clz((unsigned int)(x))
|
||||
# define CPY_BITS 32
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
CPyTagged CPyTagged_FromSsize_t(Py_ssize_t value) {
|
||||
// We use a Python object if the value shifted left by 1 is too
|
||||
// large for Py_ssize_t
|
||||
if (unlikely(CPyTagged_TooBig(value))) {
|
||||
PyObject *object = PyLong_FromSsize_t(value);
|
||||
return ((CPyTagged)object) | CPY_INT_TAG;
|
||||
} else {
|
||||
return value << 1;
|
||||
}
|
||||
}
|
||||
|
||||
CPyTagged CPyTagged_FromVoidPtr(void *ptr) {
|
||||
if ((uintptr_t)ptr > PY_SSIZE_T_MAX) {
|
||||
PyObject *object = PyLong_FromVoidPtr(ptr);
|
||||
return ((CPyTagged)object) | CPY_INT_TAG;
|
||||
} else {
|
||||
return CPyTagged_FromSsize_t((Py_ssize_t)ptr);
|
||||
}
|
||||
}
|
||||
|
||||
CPyTagged CPyTagged_FromInt64(int64_t value) {
|
||||
if (unlikely(CPyTagged_TooBigInt64(value))) {
|
||||
PyObject *object = PyLong_FromLongLong(value);
|
||||
return ((CPyTagged)object) | CPY_INT_TAG;
|
||||
} else {
|
||||
return value << 1;
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPyTagged_AsObject(CPyTagged x) {
|
||||
PyObject *value;
|
||||
if (unlikely(CPyTagged_CheckLong(x))) {
|
||||
value = CPyTagged_LongAsObject(x);
|
||||
Py_INCREF(value);
|
||||
} else {
|
||||
value = CPyLong_FromSsize_t(CPyTagged_ShortAsSsize_t(x));
|
||||
if (value == NULL) {
|
||||
CPyError_OutOfMemory();
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
PyObject *CPyTagged_StealAsObject(CPyTagged x) {
|
||||
PyObject *value;
|
||||
if (unlikely(CPyTagged_CheckLong(x))) {
|
||||
value = CPyTagged_LongAsObject(x);
|
||||
} else {
|
||||
value = CPyLong_FromSsize_t(CPyTagged_ShortAsSsize_t(x));
|
||||
if (value == NULL) {
|
||||
CPyError_OutOfMemory();
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
Py_ssize_t CPyTagged_AsSsize_t(CPyTagged x) {
|
||||
if (likely(CPyTagged_CheckShort(x))) {
|
||||
return CPyTagged_ShortAsSsize_t(x);
|
||||
} else {
|
||||
return PyLong_AsSsize_t(CPyTagged_LongAsObject(x));
|
||||
}
|
||||
}
|
||||
|
||||
CPy_NOINLINE
|
||||
void CPyTagged_IncRef(CPyTagged x) {
|
||||
if (unlikely(CPyTagged_CheckLong(x))) {
|
||||
Py_INCREF(CPyTagged_LongAsObject(x));
|
||||
}
|
||||
}
|
||||
|
||||
CPy_NOINLINE
|
||||
void CPyTagged_DecRef(CPyTagged x) {
|
||||
if (unlikely(CPyTagged_CheckLong(x))) {
|
||||
Py_DECREF(CPyTagged_LongAsObject(x));
|
||||
}
|
||||
}
|
||||
|
||||
CPy_NOINLINE
|
||||
void CPyTagged_XDecRef(CPyTagged x) {
|
||||
if (unlikely(CPyTagged_CheckLong(x))) {
|
||||
Py_XDECREF(CPyTagged_LongAsObject(x));
|
||||
}
|
||||
}
|
||||
|
||||
// Tagged int negation slow path, where the result may be a long integer
|
||||
CPyTagged CPyTagged_Negate_(CPyTagged num) {
|
||||
PyObject *num_obj = CPyTagged_AsObject(num);
|
||||
PyObject *result = PyNumber_Negative(num_obj);
|
||||
if (result == NULL) {
|
||||
CPyError_OutOfMemory();
|
||||
}
|
||||
Py_DECREF(num_obj);
|
||||
return CPyTagged_StealFromObject(result);
|
||||
}
|
||||
|
||||
// Tagged int addition slow path, where the result may be a long integer
|
||||
CPyTagged CPyTagged_Add_(CPyTagged left, CPyTagged right) {
|
||||
PyObject *left_obj = CPyTagged_AsObject(left);
|
||||
PyObject *right_obj = CPyTagged_AsObject(right);
|
||||
PyObject *result = PyNumber_Add(left_obj, right_obj);
|
||||
if (result == NULL) {
|
||||
CPyError_OutOfMemory();
|
||||
}
|
||||
Py_DECREF(left_obj);
|
||||
Py_DECREF(right_obj);
|
||||
return CPyTagged_StealFromObject(result);
|
||||
}
|
||||
|
||||
// Tagged int subtraction slow path, where the result may be a long integer
|
||||
CPyTagged CPyTagged_Subtract_(CPyTagged left, CPyTagged right) {
|
||||
PyObject *left_obj = CPyTagged_AsObject(left);
|
||||
PyObject *right_obj = CPyTagged_AsObject(right);
|
||||
PyObject *result = PyNumber_Subtract(left_obj, right_obj);
|
||||
if (result == NULL) {
|
||||
CPyError_OutOfMemory();
|
||||
}
|
||||
Py_DECREF(left_obj);
|
||||
Py_DECREF(right_obj);
|
||||
return CPyTagged_StealFromObject(result);
|
||||
}
|
||||
|
||||
// Tagged int multiplication slow path, where the result may be a long integer
|
||||
CPyTagged CPyTagged_Multiply_(CPyTagged left, CPyTagged right) {
|
||||
PyObject *left_obj = CPyTagged_AsObject(left);
|
||||
PyObject *right_obj = CPyTagged_AsObject(right);
|
||||
PyObject *result = PyNumber_Multiply(left_obj, right_obj);
|
||||
if (result == NULL) {
|
||||
CPyError_OutOfMemory();
|
||||
}
|
||||
Py_DECREF(left_obj);
|
||||
Py_DECREF(right_obj);
|
||||
return CPyTagged_StealFromObject(result);
|
||||
}
|
||||
|
||||
// Tagged int // slow path, where the result may be a long integer (or raise)
|
||||
CPyTagged CPyTagged_FloorDivide_(CPyTagged left, CPyTagged right) {
|
||||
PyObject *left_obj = CPyTagged_AsObject(left);
|
||||
PyObject *right_obj = CPyTagged_AsObject(right);
|
||||
PyObject *result = PyNumber_FloorDivide(left_obj, right_obj);
|
||||
Py_DECREF(left_obj);
|
||||
Py_DECREF(right_obj);
|
||||
// Handle exceptions honestly because it could be ZeroDivisionError
|
||||
if (result == NULL) {
|
||||
return CPY_INT_TAG;
|
||||
} else {
|
||||
return CPyTagged_StealFromObject(result);
|
||||
}
|
||||
}
|
||||
|
||||
// Tagged int % slow path, where the result may be a long integer (or raise)
|
||||
CPyTagged CPyTagged_Remainder_(CPyTagged left, CPyTagged right) {
|
||||
PyObject *left_obj = CPyTagged_AsObject(left);
|
||||
PyObject *right_obj = CPyTagged_AsObject(right);
|
||||
PyObject *result = PyNumber_Remainder(left_obj, right_obj);
|
||||
Py_DECREF(left_obj);
|
||||
Py_DECREF(right_obj);
|
||||
// Handle exceptions honestly because it could be ZeroDivisionError
|
||||
if (result == NULL) {
|
||||
return CPY_INT_TAG;
|
||||
} else {
|
||||
return CPyTagged_StealFromObject(result);
|
||||
}
|
||||
}
|
||||
|
||||
bool CPyTagged_IsEq_(CPyTagged left, CPyTagged right) {
|
||||
if (CPyTagged_CheckShort(right)) {
|
||||
return false;
|
||||
} else {
|
||||
PyObject *left_obj = CPyTagged_AsObject(left);
|
||||
PyObject *right_obj = CPyTagged_AsObject(right);
|
||||
int result = PyObject_RichCompareBool(left_obj, right_obj, Py_EQ);
|
||||
Py_DECREF(left_obj);
|
||||
Py_DECREF(right_obj);
|
||||
if (result == -1) {
|
||||
CPyError_OutOfMemory();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
bool CPyTagged_IsLt_(CPyTagged left, CPyTagged right) {
|
||||
PyObject *left_obj = CPyTagged_AsObject(left);
|
||||
PyObject *right_obj = CPyTagged_AsObject(right);
|
||||
int result = PyObject_RichCompareBool(left_obj, right_obj, Py_LT);
|
||||
Py_DECREF(left_obj);
|
||||
Py_DECREF(right_obj);
|
||||
if (result == -1) {
|
||||
CPyError_OutOfMemory();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
PyObject *CPyLong_FromStrWithBase(PyObject *o, CPyTagged base) {
|
||||
Py_ssize_t base_size_t = CPyTagged_AsSsize_t(base);
|
||||
return PyLong_FromUnicodeObject(o, base_size_t);
|
||||
}
|
||||
|
||||
PyObject *CPyLong_FromStr(PyObject *o) {
|
||||
CPyTagged base = CPyTagged_FromSsize_t(10);
|
||||
return CPyLong_FromStrWithBase(o, base);
|
||||
}
|
||||
|
||||
CPyTagged CPyTagged_FromFloat(double f) {
|
||||
if (f < ((double)CPY_TAGGED_MAX + 1.0) && f > (CPY_TAGGED_MIN - 1.0)) {
|
||||
return (Py_ssize_t)f << 1;
|
||||
}
|
||||
PyObject *o = PyLong_FromDouble(f);
|
||||
if (o == NULL)
|
||||
return CPY_INT_TAG;
|
||||
return CPyTagged_StealFromObject(o);
|
||||
}
|
||||
|
||||
PyObject *CPyBool_Str(bool b) {
|
||||
return PyObject_Str(b ? Py_True : Py_False);
|
||||
}
|
||||
|
||||
// Bitwise op '&', '|' or '^' using the generic (slow) API
|
||||
static CPyTagged GenericBitwiseOp(CPyTagged a, CPyTagged b, char op) {
|
||||
PyObject *aobj = CPyTagged_AsObject(a);
|
||||
PyObject *bobj = CPyTagged_AsObject(b);
|
||||
PyObject *r;
|
||||
if (op == '&') {
|
||||
r = PyNumber_And(aobj, bobj);
|
||||
} else if (op == '|') {
|
||||
r = PyNumber_Or(aobj, bobj);
|
||||
} else {
|
||||
r = PyNumber_Xor(aobj, bobj);
|
||||
}
|
||||
if (unlikely(r == NULL)) {
|
||||
CPyError_OutOfMemory();
|
||||
}
|
||||
Py_DECREF(aobj);
|
||||
Py_DECREF(bobj);
|
||||
return CPyTagged_StealFromObject(r);
|
||||
}
|
||||
|
||||
// Return pointer to digits of a PyLong object. If it's a short
|
||||
// integer, place digits in the buffer buf instead to avoid memory
|
||||
// allocation (it's assumed to be big enough). Return the number of
|
||||
// digits in *size. *size is negative if the integer is negative.
|
||||
static digit *GetIntDigits(CPyTagged n, Py_ssize_t *size, digit *buf) {
|
||||
if (CPyTagged_CheckShort(n)) {
|
||||
Py_ssize_t val = CPyTagged_ShortAsSsize_t(n);
|
||||
bool neg = val < 0;
|
||||
int len = 1;
|
||||
if (neg) {
|
||||
val = -val;
|
||||
}
|
||||
buf[0] = val & PyLong_MASK;
|
||||
if (val > (Py_ssize_t)PyLong_MASK) {
|
||||
val >>= PyLong_SHIFT;
|
||||
buf[1] = val & PyLong_MASK;
|
||||
if (val > (Py_ssize_t)PyLong_MASK) {
|
||||
buf[2] = val >> PyLong_SHIFT;
|
||||
len = 3;
|
||||
} else {
|
||||
len = 2;
|
||||
}
|
||||
}
|
||||
*size = neg ? -len : len;
|
||||
return buf;
|
||||
} else {
|
||||
PyLongObject *obj = (PyLongObject *)CPyTagged_LongAsObject(n);
|
||||
*size = CPY_LONG_SIZE_SIGNED(obj);
|
||||
return &CPY_LONG_DIGIT(obj, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Shared implementation of bitwise '&', '|' and '^' (specified by op) for at least
|
||||
// one long operand. This is somewhat optimized for performance.
|
||||
CPyTagged CPyTagged_BitwiseLongOp_(CPyTagged a, CPyTagged b, char op) {
|
||||
// Directly access the digits, as there is no fast C API function for this.
|
||||
digit abuf[3];
|
||||
digit bbuf[3];
|
||||
Py_ssize_t asize;
|
||||
Py_ssize_t bsize;
|
||||
digit *adigits = GetIntDigits(a, &asize, abuf);
|
||||
digit *bdigits = GetIntDigits(b, &bsize, bbuf);
|
||||
|
||||
if (unlikely(asize < 0 || bsize < 0)) {
|
||||
// Negative operand. This is slower, but bitwise ops on them are pretty rare.
|
||||
return GenericBitwiseOp(a, b, op);
|
||||
}
|
||||
// Optimized implementation for two non-negative integers.
|
||||
// Swap a and b as needed to ensure a is no longer than b.
|
||||
if (asize > bsize) {
|
||||
digit *tmp = adigits;
|
||||
adigits = bdigits;
|
||||
bdigits = tmp;
|
||||
Py_ssize_t tmp_size = asize;
|
||||
asize = bsize;
|
||||
bsize = tmp_size;
|
||||
}
|
||||
void *digits = NULL;
|
||||
PyLongWriter *writer = PyLongWriter_Create(0, op == '&' ? asize : bsize, &digits);
|
||||
if (unlikely(writer == NULL)) {
|
||||
CPyError_OutOfMemory();
|
||||
}
|
||||
Py_ssize_t i;
|
||||
if (op == '&') {
|
||||
for (i = 0; i < asize; i++) {
|
||||
((digit *)digits)[i] = adigits[i] & bdigits[i];
|
||||
}
|
||||
} else {
|
||||
if (op == '|') {
|
||||
for (i = 0; i < asize; i++) {
|
||||
((digit *)digits)[i] = adigits[i] | bdigits[i];
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < asize; i++) {
|
||||
((digit *)digits)[i] = adigits[i] ^ bdigits[i];
|
||||
}
|
||||
}
|
||||
for (; i < bsize; i++) {
|
||||
((digit *)digits)[i] = bdigits[i];
|
||||
}
|
||||
}
|
||||
return CPyTagged_StealFromObject(PyLongWriter_Finish(writer));
|
||||
}
|
||||
|
||||
// Bitwise '~' slow path
|
||||
CPyTagged CPyTagged_Invert_(CPyTagged num) {
|
||||
PyObject *obj = CPyTagged_AsObject(num);
|
||||
PyObject *result = PyNumber_Invert(obj);
|
||||
if (unlikely(result == NULL)) {
|
||||
CPyError_OutOfMemory();
|
||||
}
|
||||
Py_DECREF(obj);
|
||||
return CPyTagged_StealFromObject(result);
|
||||
}
|
||||
|
||||
// Bitwise '>>' slow path
|
||||
CPyTagged CPyTagged_Rshift_(CPyTagged left, CPyTagged right) {
|
||||
// Long integer or negative shift -- use generic op
|
||||
PyObject *lobj = CPyTagged_AsObject(left);
|
||||
PyObject *robj = CPyTagged_AsObject(right);
|
||||
PyObject *result = PyNumber_Rshift(lobj, robj);
|
||||
Py_DECREF(lobj);
|
||||
Py_DECREF(robj);
|
||||
if (result == NULL) {
|
||||
// Propagate error (could be negative shift count)
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
return CPyTagged_StealFromObject(result);
|
||||
}
|
||||
|
||||
// Bitwise '<<' slow path
|
||||
CPyTagged CPyTagged_Lshift_(CPyTagged left, CPyTagged right) {
|
||||
// Long integer or out of range shift -- use generic op
|
||||
PyObject *lobj = CPyTagged_AsObject(left);
|
||||
PyObject *robj = CPyTagged_AsObject(right);
|
||||
PyObject *result = PyNumber_Lshift(lobj, robj);
|
||||
Py_DECREF(lobj);
|
||||
Py_DECREF(robj);
|
||||
if (result == NULL) {
|
||||
// Propagate error (could be negative shift count)
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
return CPyTagged_StealFromObject(result);
|
||||
}
|
||||
|
||||
// i64 unboxing slow path
|
||||
int64_t CPyLong_AsInt64_(PyObject *o) {
|
||||
int overflow;
|
||||
int64_t result = PyLong_AsLongLongAndOverflow(o, &overflow);
|
||||
if (result == -1) {
|
||||
if (PyErr_Occurred()) {
|
||||
return CPY_LL_INT_ERROR;
|
||||
} else if (overflow) {
|
||||
PyErr_SetString(PyExc_ValueError, "int too large to convert to i64");
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int64_t CPyInt64_Divide(int64_t x, int64_t y) {
|
||||
if (y == 0) {
|
||||
PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero");
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
if (y == -1 && x == INT64_MIN) {
|
||||
PyErr_SetString(PyExc_OverflowError, "integer division overflow");
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
int64_t d = x / y;
|
||||
// Adjust for Python semantics
|
||||
if (((x < 0) != (y < 0)) && d * y != x) {
|
||||
d--;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
int64_t CPyInt64_Remainder(int64_t x, int64_t y) {
|
||||
if (y == 0) {
|
||||
PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero");
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
// Edge case: avoid core dump
|
||||
if (y == -1 && x == INT64_MIN) {
|
||||
return 0;
|
||||
}
|
||||
int64_t d = x % y;
|
||||
// Adjust for Python semantics
|
||||
if (((x < 0) != (y < 0)) && d != 0) {
|
||||
d += y;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
// i32 unboxing slow path
|
||||
int32_t CPyLong_AsInt32_(PyObject *o) {
|
||||
int overflow;
|
||||
long result = PyLong_AsLongAndOverflow(o, &overflow);
|
||||
if (result > 0x7fffffffLL || result < -0x80000000LL) {
|
||||
overflow = 1;
|
||||
result = -1;
|
||||
}
|
||||
if (result == -1) {
|
||||
if (PyErr_Occurred()) {
|
||||
return CPY_LL_INT_ERROR;
|
||||
} else if (overflow) {
|
||||
PyErr_SetString(PyExc_ValueError, "int too large to convert to i32");
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t CPyInt32_Divide(int32_t x, int32_t y) {
|
||||
if (y == 0) {
|
||||
PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero");
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
if (y == -1 && x == INT32_MIN) {
|
||||
PyErr_SetString(PyExc_OverflowError, "integer division overflow");
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
int32_t d = x / y;
|
||||
// Adjust for Python semantics
|
||||
if (((x < 0) != (y < 0)) && d * y != x) {
|
||||
d--;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
int32_t CPyInt32_Remainder(int32_t x, int32_t y) {
|
||||
if (y == 0) {
|
||||
PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero");
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
// Edge case: avoid core dump
|
||||
if (y == -1 && x == INT32_MIN) {
|
||||
return 0;
|
||||
}
|
||||
int32_t d = x % y;
|
||||
// Adjust for Python semantics
|
||||
if (((x < 0) != (y < 0)) && d != 0) {
|
||||
d += y;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
void CPyInt32_Overflow() {
|
||||
PyErr_SetString(PyExc_ValueError, "int too large to convert to i32");
|
||||
}
|
||||
|
||||
// i16 unboxing slow path
|
||||
int16_t CPyLong_AsInt16_(PyObject *o) {
|
||||
int overflow;
|
||||
long result = PyLong_AsLongAndOverflow(o, &overflow);
|
||||
if (result > 0x7fff || result < -0x8000) {
|
||||
overflow = 1;
|
||||
result = -1;
|
||||
}
|
||||
if (result == -1) {
|
||||
if (PyErr_Occurred()) {
|
||||
return CPY_LL_INT_ERROR;
|
||||
} else if (overflow) {
|
||||
PyErr_SetString(PyExc_ValueError, "int too large to convert to i16");
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int16_t CPyInt16_Divide(int16_t x, int16_t y) {
|
||||
if (y == 0) {
|
||||
PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero");
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
if (y == -1 && x == INT16_MIN) {
|
||||
PyErr_SetString(PyExc_OverflowError, "integer division overflow");
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
int16_t d = x / y;
|
||||
// Adjust for Python semantics
|
||||
if (((x < 0) != (y < 0)) && d * y != x) {
|
||||
d--;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
int16_t CPyInt16_Remainder(int16_t x, int16_t y) {
|
||||
if (y == 0) {
|
||||
PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero");
|
||||
return CPY_LL_INT_ERROR;
|
||||
}
|
||||
// Edge case: avoid core dump
|
||||
if (y == -1 && x == INT16_MIN) {
|
||||
return 0;
|
||||
}
|
||||
int16_t d = x % y;
|
||||
// Adjust for Python semantics
|
||||
if (((x < 0) != (y < 0)) && d != 0) {
|
||||
d += y;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
void CPyInt16_Overflow() {
|
||||
PyErr_SetString(PyExc_ValueError, "int too large to convert to i16");
|
||||
}
|
||||
|
||||
// u8 unboxing slow path
|
||||
uint8_t CPyLong_AsUInt8_(PyObject *o) {
|
||||
int overflow;
|
||||
long result = PyLong_AsLongAndOverflow(o, &overflow);
|
||||
if (result < 0 || result >= 256) {
|
||||
overflow = 1;
|
||||
result = -1;
|
||||
}
|
||||
if (result == -1) {
|
||||
if (PyErr_Occurred()) {
|
||||
return CPY_LL_UINT_ERROR;
|
||||
} else if (overflow) {
|
||||
PyErr_SetString(PyExc_ValueError, "int too large or small to convert to u8");
|
||||
return CPY_LL_UINT_ERROR;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void CPyUInt8_Overflow() {
|
||||
PyErr_SetString(PyExc_ValueError, "int too large or small to convert to u8");
|
||||
}
|
||||
|
||||
double CPyTagged_TrueDivide(CPyTagged x, CPyTagged y) {
|
||||
if (unlikely(y == 0)) {
|
||||
PyErr_SetString(PyExc_ZeroDivisionError, "division by zero");
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
if (likely(!CPyTagged_CheckLong(x) && !CPyTagged_CheckLong(y))) {
|
||||
return (double)((Py_ssize_t)x >> 1) / (double)((Py_ssize_t)y >> 1);
|
||||
} else {
|
||||
PyObject *xo = CPyTagged_AsObject(x);
|
||||
PyObject *yo = CPyTagged_AsObject(y);
|
||||
PyObject *result = PyNumber_TrueDivide(xo, yo);
|
||||
if (result == NULL) {
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
return PyFloat_AsDouble(result);
|
||||
}
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
static PyObject *CPyLong_ToBytes(PyObject *v, Py_ssize_t length, int little_endian, int signed_flag) {
|
||||
// This is a wrapper for PyLong_AsByteArray and PyBytes_FromStringAndSize
|
||||
PyObject *result = PyBytes_FromStringAndSize(NULL, length);
|
||||
if (!result) {
|
||||
return NULL;
|
||||
}
|
||||
unsigned char *bytes = (unsigned char *)PyBytes_AS_STRING(result);
|
||||
#if PY_VERSION_HEX >= 0x030D0000 // 3.13.0
|
||||
int res = _PyLong_AsByteArray((PyLongObject *)v, bytes, length, little_endian, signed_flag, 1);
|
||||
#else
|
||||
int res = _PyLong_AsByteArray((PyLongObject *)v, bytes, length, little_endian, signed_flag);
|
||||
#endif
|
||||
if (res < 0) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// int.to_bytes(length, byteorder, signed=False)
|
||||
PyObject *CPyTagged_ToBytes(CPyTagged self, Py_ssize_t length, PyObject *byteorder, int signed_flag) {
|
||||
PyObject *pyint = CPyTagged_AsObject(self);
|
||||
if (!PyUnicode_Check(byteorder)) {
|
||||
Py_DECREF(pyint);
|
||||
PyErr_SetString(PyExc_TypeError, "byteorder must be str");
|
||||
return NULL;
|
||||
}
|
||||
const char *order = PyUnicode_AsUTF8(byteorder);
|
||||
if (!order) {
|
||||
Py_DECREF(pyint);
|
||||
return NULL;
|
||||
}
|
||||
int little_endian;
|
||||
if (strcmp(order, "big") == 0) {
|
||||
little_endian = 0;
|
||||
} else if (strcmp(order, "little") == 0) {
|
||||
little_endian = 1;
|
||||
} else {
|
||||
PyErr_SetString(PyExc_ValueError, "byteorder must be either 'little' or 'big'");
|
||||
return NULL;
|
||||
}
|
||||
PyObject *result = CPyLong_ToBytes(pyint, length, little_endian, signed_flag);
|
||||
Py_DECREF(pyint);
|
||||
return result;
|
||||
}
|
||||
|
||||
// int.to_bytes(length, byteorder="little", signed=False)
|
||||
PyObject *CPyTagged_ToLittleEndianBytes(CPyTagged self, Py_ssize_t length, int signed_flag) {
|
||||
PyObject *pyint = CPyTagged_AsObject(self);
|
||||
PyObject *result = CPyLong_ToBytes(pyint, length, 1, signed_flag);
|
||||
Py_DECREF(pyint);
|
||||
return result;
|
||||
}
|
||||
|
||||
// int.to_bytes(length, "big", signed=False)
|
||||
PyObject *CPyTagged_ToBigEndianBytes(CPyTagged self, Py_ssize_t length, int signed_flag) {
|
||||
PyObject *pyint = CPyTagged_AsObject(self);
|
||||
PyObject *result = CPyLong_ToBytes(pyint, length, 0, signed_flag);
|
||||
Py_DECREF(pyint);
|
||||
return result;
|
||||
}
|
||||
|
||||
// int.bit_length()
|
||||
CPyTagged CPyTagged_BitLength(CPyTagged self) {
|
||||
// Handle zero
|
||||
if (self == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Fast path for small (tagged) ints
|
||||
if (CPyTagged_CheckShort(self)) {
|
||||
Py_ssize_t val = CPyTagged_ShortAsSsize_t(self);
|
||||
Py_ssize_t absval = val < 0 ? -val : val;
|
||||
int bits = 0;
|
||||
if (absval) {
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(_WIN64)
|
||||
unsigned long idx;
|
||||
if (_BitScanReverse64(&idx, (unsigned __int64)absval)) {
|
||||
bits = (int)(idx + 1);
|
||||
}
|
||||
#else
|
||||
unsigned long idx;
|
||||
if (_BitScanReverse(&idx, (unsigned long)absval)) {
|
||||
bits = (int)(idx + 1);
|
||||
}
|
||||
#endif
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
bits = (int)(CPY_BITS - CPY_CLZ(absval));
|
||||
#else
|
||||
// Fallback to loop if no builtin
|
||||
while (absval) {
|
||||
absval >>= 1;
|
||||
bits++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return bits << 1;
|
||||
}
|
||||
|
||||
// Slow path for big ints
|
||||
PyObject *pyint = CPyTagged_AsObject(self);
|
||||
int bits = _PyLong_NumBits(pyint);
|
||||
Py_DECREF(pyint);
|
||||
if (bits < 0) {
|
||||
// _PyLong_NumBits sets an error on failure
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
return bits << 1;
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,113 @@
|
|||
#ifndef LIBRT_INTERNAL_H
|
||||
#define LIBRT_INTERNAL_H
|
||||
|
||||
#include <Python.h>
|
||||
|
||||
// ABI version -- only an exact match is compatible. This will only be changed in
|
||||
// very exceptional cases (likely never) due to strict backward compatibility
|
||||
// requirements.
|
||||
#define LIBRT_INTERNAL_ABI_VERSION 2
|
||||
|
||||
// API version -- more recent versions must maintain backward compatibility, i.e.
|
||||
// we can add new features but not remove or change existing features (unless
|
||||
// ABI version is changed, but see the comment above).
|
||||
#define LIBRT_INTERNAL_API_VERSION 0
|
||||
|
||||
// Number of functions in the capsule API. If you add a new function, also increase
|
||||
// LIBRT_INTERNAL_API_VERSION.
|
||||
#define LIBRT_INTERNAL_API_LEN 20
|
||||
|
||||
#ifdef LIBRT_INTERNAL_MODULE
|
||||
|
||||
static PyObject *ReadBuffer_internal(PyObject *source);
|
||||
static PyObject *WriteBuffer_internal(void);
|
||||
static PyObject *WriteBuffer_getvalue_internal(PyObject *self);
|
||||
static PyObject *ReadBuffer_internal(PyObject *source);
|
||||
static PyObject *ReadBuffer_internal_empty(void);
|
||||
static char write_bool_internal(PyObject *data, char value);
|
||||
static char read_bool_internal(PyObject *data);
|
||||
static char write_str_internal(PyObject *data, PyObject *value);
|
||||
static PyObject *read_str_internal(PyObject *data);
|
||||
static char write_float_internal(PyObject *data, double value);
|
||||
static double read_float_internal(PyObject *data);
|
||||
static char write_int_internal(PyObject *data, CPyTagged value);
|
||||
static CPyTagged read_int_internal(PyObject *data);
|
||||
static char write_tag_internal(PyObject *data, uint8_t value);
|
||||
static uint8_t read_tag_internal(PyObject *data);
|
||||
static int NativeInternal_ABI_Version(void);
|
||||
static char write_bytes_internal(PyObject *data, PyObject *value);
|
||||
static PyObject *read_bytes_internal(PyObject *data);
|
||||
static uint8_t cache_version_internal(void);
|
||||
static PyTypeObject *ReadBuffer_type_internal(void);
|
||||
static PyTypeObject *WriteBuffer_type_internal(void);
|
||||
static int NativeInternal_API_Version(void);
|
||||
|
||||
#else
|
||||
|
||||
static void *NativeInternal_API[LIBRT_INTERNAL_API_LEN];
|
||||
|
||||
#define ReadBuffer_internal (*(PyObject* (*)(PyObject *source)) NativeInternal_API[0])
|
||||
#define WriteBuffer_internal (*(PyObject* (*)(void)) NativeInternal_API[1])
|
||||
#define WriteBuffer_getvalue_internal (*(PyObject* (*)(PyObject *source)) NativeInternal_API[2])
|
||||
#define write_bool_internal (*(char (*)(PyObject *source, char value)) NativeInternal_API[3])
|
||||
#define read_bool_internal (*(char (*)(PyObject *source)) NativeInternal_API[4])
|
||||
#define write_str_internal (*(char (*)(PyObject *source, PyObject *value)) NativeInternal_API[5])
|
||||
#define read_str_internal (*(PyObject* (*)(PyObject *source)) NativeInternal_API[6])
|
||||
#define write_float_internal (*(char (*)(PyObject *source, double value)) NativeInternal_API[7])
|
||||
#define read_float_internal (*(double (*)(PyObject *source)) NativeInternal_API[8])
|
||||
#define write_int_internal (*(char (*)(PyObject *source, CPyTagged value)) NativeInternal_API[9])
|
||||
#define read_int_internal (*(CPyTagged (*)(PyObject *source)) NativeInternal_API[10])
|
||||
#define write_tag_internal (*(char (*)(PyObject *source, uint8_t value)) NativeInternal_API[11])
|
||||
#define read_tag_internal (*(uint8_t (*)(PyObject *source)) NativeInternal_API[12])
|
||||
#define NativeInternal_ABI_Version (*(int (*)(void)) NativeInternal_API[13])
|
||||
#define write_bytes_internal (*(char (*)(PyObject *source, PyObject *value)) NativeInternal_API[14])
|
||||
#define read_bytes_internal (*(PyObject* (*)(PyObject *source)) NativeInternal_API[15])
|
||||
#define cache_version_internal (*(uint8_t (*)(void)) NativeInternal_API[16])
|
||||
#define ReadBuffer_type_internal (*(PyTypeObject* (*)(void)) NativeInternal_API[17])
|
||||
#define WriteBuffer_type_internal (*(PyTypeObject* (*)(void)) NativeInternal_API[18])
|
||||
#define NativeInternal_API_Version (*(int (*)(void)) NativeInternal_API[19])
|
||||
|
||||
static int
|
||||
import_librt_internal(void)
|
||||
{
|
||||
PyObject *mod = PyImport_ImportModule("librt.internal");
|
||||
if (mod == NULL)
|
||||
return -1;
|
||||
Py_DECREF(mod); // we import just for the side effect of making the below work.
|
||||
void *capsule = PyCapsule_Import("librt.internal._C_API", 0);
|
||||
if (capsule == NULL)
|
||||
return -1;
|
||||
memcpy(NativeInternal_API, capsule, sizeof(NativeInternal_API));
|
||||
if (NativeInternal_ABI_Version() != LIBRT_INTERNAL_ABI_VERSION) {
|
||||
char err[128];
|
||||
snprintf(err, sizeof(err), "ABI version conflict for librt.internal, expected %d, found %d",
|
||||
LIBRT_INTERNAL_ABI_VERSION,
|
||||
NativeInternal_ABI_Version()
|
||||
);
|
||||
PyErr_SetString(PyExc_ValueError, err);
|
||||
return -1;
|
||||
}
|
||||
if (NativeInternal_API_Version() < LIBRT_INTERNAL_API_VERSION) {
|
||||
char err[128];
|
||||
snprintf(err, sizeof(err),
|
||||
"API version conflict for librt.internal, expected %d or newer, found %d (hint: upgrade librt)",
|
||||
LIBRT_INTERNAL_API_VERSION,
|
||||
NativeInternal_API_Version()
|
||||
);
|
||||
PyErr_SetString(PyExc_ValueError, err);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline bool CPyReadBuffer_Check(PyObject *obj) {
|
||||
return Py_TYPE(obj) == ReadBuffer_type_internal();
|
||||
}
|
||||
|
||||
static inline bool CPyWriteBuffer_Check(PyObject *obj) {
|
||||
return Py_TYPE(obj) == WriteBuffer_type_internal();
|
||||
}
|
||||
|
||||
#endif // LIBRT_INTERNAL_H
|
||||
395
venv/lib/python3.11/site-packages/mypyc/lib-rt/list_ops.c
Normal file
395
venv/lib/python3.11/site-packages/mypyc/lib-rt/list_ops.c
Normal file
|
|
@ -0,0 +1,395 @@
|
|||
// List primitive operations
|
||||
//
|
||||
// These are registered in mypyc.primitives.list_ops.
|
||||
|
||||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
|
||||
#ifndef Py_TPFLAGS_SEQUENCE
|
||||
#define Py_TPFLAGS_SEQUENCE (1 << 5)
|
||||
#endif
|
||||
|
||||
PyObject *CPyList_Build(Py_ssize_t len, ...) {
|
||||
Py_ssize_t i;
|
||||
|
||||
PyObject *res = PyList_New(len);
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
va_list args;
|
||||
va_start(args, len);
|
||||
for (i = 0; i < len; i++) {
|
||||
// Steals the reference
|
||||
PyObject *value = va_arg(args, PyObject *);
|
||||
PyList_SET_ITEM(res, i, value);
|
||||
}
|
||||
va_end(args);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
char CPyList_Clear(PyObject *list) {
|
||||
if (PyList_CheckExact(list)) {
|
||||
PyList_Clear(list);
|
||||
} else {
|
||||
PyObject *res = PyObject_CallMethodNoArgs(list, mypyc_interned_str.clear);
|
||||
if (res == NULL) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
PyObject *CPyList_Copy(PyObject *list) {
|
||||
if(PyList_CheckExact(list)) {
|
||||
return PyList_GetSlice(list, 0, PyList_GET_SIZE(list));
|
||||
}
|
||||
return PyObject_CallMethodNoArgs(list, mypyc_interned_str.copy);
|
||||
}
|
||||
|
||||
PyObject *CPyList_GetItemShort(PyObject *list, CPyTagged index) {
|
||||
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
|
||||
Py_ssize_t size = PyList_GET_SIZE(list);
|
||||
if (n >= 0) {
|
||||
if (n >= size) {
|
||||
PyErr_SetString(PyExc_IndexError, "list index out of range");
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
n += size;
|
||||
if (n < 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "list index out of range");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
PyObject *result = PyList_GET_ITEM(list, n);
|
||||
Py_INCREF(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
PyObject *CPyList_GetItemShortBorrow(PyObject *list, CPyTagged index) {
|
||||
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
|
||||
Py_ssize_t size = PyList_GET_SIZE(list);
|
||||
if (n >= 0) {
|
||||
if (n >= size) {
|
||||
PyErr_SetString(PyExc_IndexError, "list index out of range");
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
n += size;
|
||||
if (n < 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "list index out of range");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return PyList_GET_ITEM(list, n);
|
||||
}
|
||||
|
||||
PyObject *CPyList_GetItem(PyObject *list, CPyTagged index) {
|
||||
if (CPyTagged_CheckShort(index)) {
|
||||
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
|
||||
Py_ssize_t size = PyList_GET_SIZE(list);
|
||||
if (n >= 0) {
|
||||
if (n >= size) {
|
||||
PyErr_SetString(PyExc_IndexError, "list index out of range");
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
n += size;
|
||||
if (n < 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "list index out of range");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
PyObject *result = PyList_GET_ITEM(list, n);
|
||||
Py_INCREF(result);
|
||||
return result;
|
||||
} else {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPyList_GetItemBorrow(PyObject *list, CPyTagged index) {
|
||||
if (CPyTagged_CheckShort(index)) {
|
||||
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
|
||||
Py_ssize_t size = PyList_GET_SIZE(list);
|
||||
if (n >= 0) {
|
||||
if (n >= size) {
|
||||
PyErr_SetString(PyExc_IndexError, "list index out of range");
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
n += size;
|
||||
if (n < 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "list index out of range");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return PyList_GET_ITEM(list, n);
|
||||
} else {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPyList_GetItemInt64(PyObject *list, int64_t index) {
|
||||
size_t size = PyList_GET_SIZE(list);
|
||||
if (likely((uint64_t)index < size)) {
|
||||
PyObject *result = PyList_GET_ITEM(list, index);
|
||||
Py_INCREF(result);
|
||||
return result;
|
||||
}
|
||||
if (index >= 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "list index out of range");
|
||||
return NULL;
|
||||
}
|
||||
index += size;
|
||||
if (index < 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "list index out of range");
|
||||
return NULL;
|
||||
}
|
||||
PyObject *result = PyList_GET_ITEM(list, index);
|
||||
Py_INCREF(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
PyObject *CPyList_GetItemInt64Borrow(PyObject *list, int64_t index) {
|
||||
size_t size = PyList_GET_SIZE(list);
|
||||
if (likely((uint64_t)index < size)) {
|
||||
return PyList_GET_ITEM(list, index);
|
||||
}
|
||||
if (index >= 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "list index out of range");
|
||||
return NULL;
|
||||
}
|
||||
index += size;
|
||||
if (index < 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "list index out of range");
|
||||
return NULL;
|
||||
}
|
||||
return PyList_GET_ITEM(list, index);
|
||||
}
|
||||
|
||||
bool CPyList_SetItem(PyObject *list, CPyTagged index, PyObject *value) {
|
||||
if (CPyTagged_CheckShort(index)) {
|
||||
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
|
||||
Py_ssize_t size = PyList_GET_SIZE(list);
|
||||
if (n >= 0) {
|
||||
if (n >= size) {
|
||||
PyErr_SetString(PyExc_IndexError, "list assignment index out of range");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
n += size;
|
||||
if (n < 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "list assignment index out of range");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// PyList_SET_ITEM doesn't decref the old element, so we do
|
||||
Py_DECREF(PyList_GET_ITEM(list, n));
|
||||
// N.B: Steals reference
|
||||
PyList_SET_ITEM(list, n, value);
|
||||
return true;
|
||||
} else {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool CPyList_SetItemInt64(PyObject *list, int64_t index, PyObject *value) {
|
||||
size_t size = PyList_GET_SIZE(list);
|
||||
if (unlikely((uint64_t)index >= size)) {
|
||||
if (index > 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "list assignment index out of range");
|
||||
return false;
|
||||
}
|
||||
index += size;
|
||||
if (index < 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "list assignment index out of range");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// PyList_SET_ITEM doesn't decref the old element, so we do
|
||||
Py_DECREF(PyList_GET_ITEM(list, index));
|
||||
// N.B: Steals reference
|
||||
PyList_SET_ITEM(list, index, value);
|
||||
return true;
|
||||
}
|
||||
|
||||
// This function should only be used to fill in brand new lists.
|
||||
void CPyList_SetItemUnsafe(PyObject *list, Py_ssize_t index, PyObject *value) {
|
||||
PyList_SET_ITEM(list, index, value);
|
||||
}
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// The original optimized list.pop implementation doesn't work on free-threaded
|
||||
// builds, so provide an alternative that is a bit slower but works.
|
||||
//
|
||||
// Note that this implementation isn't intended to be atomic.
|
||||
static inline PyObject *list_pop_index(PyObject *list, Py_ssize_t index) {
|
||||
PyObject *item = PyList_GetItemRef(list, index);
|
||||
if (item == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
if (PySequence_DelItem(list, index) < 0) {
|
||||
Py_DECREF(item);
|
||||
return NULL;
|
||||
}
|
||||
return item;
|
||||
}
|
||||
#endif
|
||||
|
||||
PyObject *CPyList_PopLast(PyObject *list)
|
||||
{
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// The other implementation causes segfaults on a free-threaded Python 3.14b4 build.
|
||||
Py_ssize_t index = PyList_GET_SIZE(list) - 1;
|
||||
return list_pop_index(list, index);
|
||||
#else
|
||||
// I tried a specalized version of pop_impl for just removing the
|
||||
// last element and it wasn't any faster in microbenchmarks than
|
||||
// the generic one so I ditched it.
|
||||
return list_pop_impl((PyListObject *)list, -1);
|
||||
#endif
|
||||
}
|
||||
|
||||
PyObject *CPyList_Pop(PyObject *obj, CPyTagged index)
|
||||
{
|
||||
if (CPyTagged_CheckShort(index)) {
|
||||
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// We must use a slower implementation on free-threaded builds.
|
||||
if (n < 0) {
|
||||
n += PyList_GET_SIZE(obj);
|
||||
}
|
||||
return list_pop_index(obj, n);
|
||||
#else
|
||||
return list_pop_impl((PyListObject *)obj, n);
|
||||
#endif
|
||||
} else {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
CPyTagged CPyList_Count(PyObject *obj, PyObject *value)
|
||||
{
|
||||
return list_count((PyListObject *)obj, value);
|
||||
}
|
||||
|
||||
int CPyList_Insert(PyObject *list, CPyTagged index, PyObject *value)
|
||||
{
|
||||
if (CPyTagged_CheckShort(index)) {
|
||||
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
|
||||
return PyList_Insert(list, n, value);
|
||||
}
|
||||
// The max range doesn't exactly coincide with ssize_t, but we still
|
||||
// want to keep the error message compatible with CPython.
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return -1;
|
||||
}
|
||||
|
||||
PyObject *CPyList_Extend(PyObject *o1, PyObject *o2) {
|
||||
if (PyList_Extend(o1, o2) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
// Return -2 or error, -1 if not found, or index of first match otherwise.
|
||||
static Py_ssize_t _CPyList_Find(PyObject *list, PyObject *obj) {
|
||||
Py_ssize_t i;
|
||||
for (i = 0; i < Py_SIZE(list); i++) {
|
||||
PyObject *item = PyList_GET_ITEM(list, i);
|
||||
Py_INCREF(item);
|
||||
int cmp = PyObject_RichCompareBool(item, obj, Py_EQ);
|
||||
Py_DECREF(item);
|
||||
if (cmp != 0) {
|
||||
if (cmp > 0) {
|
||||
return i;
|
||||
} else {
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int CPyList_Remove(PyObject *list, PyObject *obj) {
|
||||
Py_ssize_t index = _CPyList_Find(list, obj);
|
||||
if (index == -2) {
|
||||
return -1;
|
||||
}
|
||||
if (index == -1) {
|
||||
PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
|
||||
return -1;
|
||||
}
|
||||
return PyList_SetSlice(list, index, index + 1, NULL);
|
||||
}
|
||||
|
||||
CPyTagged CPyList_Index(PyObject *list, PyObject *obj) {
|
||||
Py_ssize_t index = _CPyList_Find(list, obj);
|
||||
if (index == -2) {
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
if (index == -1) {
|
||||
PyErr_SetString(PyExc_ValueError, "value is not in list");
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
return index << 1;
|
||||
}
|
||||
|
||||
PyObject *CPySequence_Sort(PyObject *seq) {
|
||||
PyObject *newlist = PySequence_List(seq);
|
||||
if (newlist == NULL)
|
||||
return NULL;
|
||||
int res = PyList_Sort(newlist);
|
||||
if (res < 0) {
|
||||
Py_DECREF(newlist);
|
||||
return NULL;
|
||||
}
|
||||
return newlist;
|
||||
}
|
||||
|
||||
PyObject *CPySequence_Multiply(PyObject *seq, CPyTagged t_size) {
|
||||
Py_ssize_t size = CPyTagged_AsSsize_t(t_size);
|
||||
if (size == -1 && PyErr_Occurred()) {
|
||||
return NULL;
|
||||
}
|
||||
return PySequence_Repeat(seq, size);
|
||||
}
|
||||
|
||||
PyObject *CPySequence_RMultiply(CPyTagged t_size, PyObject *seq) {
|
||||
return CPySequence_Multiply(seq, t_size);
|
||||
}
|
||||
|
||||
PyObject *CPySequence_InPlaceMultiply(PyObject *seq, CPyTagged t_size) {
|
||||
Py_ssize_t size = CPyTagged_AsSsize_t(t_size);
|
||||
if (size == -1 && PyErr_Occurred()) {
|
||||
return NULL;
|
||||
}
|
||||
return PySequence_InPlaceRepeat(seq, size);
|
||||
}
|
||||
|
||||
PyObject *CPyList_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end) {
|
||||
if (likely(PyList_CheckExact(obj)
|
||||
&& CPyTagged_CheckShort(start) && CPyTagged_CheckShort(end))) {
|
||||
Py_ssize_t startn = CPyTagged_ShortAsSsize_t(start);
|
||||
Py_ssize_t endn = CPyTagged_ShortAsSsize_t(end);
|
||||
if (startn < 0) {
|
||||
startn += PyList_GET_SIZE(obj);
|
||||
}
|
||||
if (endn < 0) {
|
||||
endn += PyList_GET_SIZE(obj);
|
||||
}
|
||||
return PyList_GetSlice(obj, startn, endn);
|
||||
}
|
||||
return CPyObject_GetSlice(obj, start, end);
|
||||
}
|
||||
|
||||
int CPySequence_Check(PyObject *obj) {
|
||||
return Py_TYPE(obj)->tp_flags & Py_TPFLAGS_SEQUENCE;
|
||||
}
|
||||
1616
venv/lib/python3.11/site-packages/mypyc/lib-rt/misc_ops.c
Normal file
1616
venv/lib/python3.11/site-packages/mypyc/lib-rt/misc_ops.c
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,21 @@
|
|||
#include <Python.h>
|
||||
|
||||
PyMODINIT_FUNC
|
||||
PyInit_{modname}(void)
|
||||
{{
|
||||
PyObject *tmp;
|
||||
if (!(tmp = PyImport_ImportModule("{libname}"))) return NULL;
|
||||
PyObject *capsule = PyObject_GetAttrString(tmp, "init_{full_modname}");
|
||||
Py_DECREF(tmp);
|
||||
if (capsule == NULL) return NULL;
|
||||
void *init_func = PyCapsule_GetPointer(capsule, "{libname}.init_{full_modname}");
|
||||
Py_DECREF(capsule);
|
||||
if (!init_func) {{
|
||||
return NULL;
|
||||
}}
|
||||
return ((PyObject *(*)(void))init_func)();
|
||||
}}
|
||||
|
||||
// distutils sometimes spuriously tells cl to export CPyInit___init__,
|
||||
// so provide that so it chills out
|
||||
PyMODINIT_FUNC PyInit___init__(void) {{ return PyInit_{modname}(); }}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
#include <Python.h>
|
||||
|
||||
static int {modname}_exec(PyObject *module)
|
||||
{{
|
||||
PyObject *tmp;
|
||||
if (!(tmp = PyImport_ImportModule("{libname}"))) return -1;
|
||||
PyObject *capsule = PyObject_GetAttrString(tmp, "exec_{full_modname}");
|
||||
Py_DECREF(tmp);
|
||||
if (capsule == NULL) return -1;
|
||||
void *exec_func = PyCapsule_GetPointer(capsule, "{libname}.exec_{full_modname}");
|
||||
Py_DECREF(capsule);
|
||||
if (!exec_func) return -1;
|
||||
if (((int (*)(PyObject *))exec_func)(module) != 0) return -1;
|
||||
return 0;
|
||||
}}
|
||||
|
||||
static PyModuleDef_Slot {modname}_slots[] = {{
|
||||
{{Py_mod_exec, {modname}_exec}},
|
||||
{{Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}},
|
||||
{{Py_mod_gil, Py_MOD_GIL_NOT_USED}},
|
||||
{{0, NULL}},
|
||||
}};
|
||||
|
||||
static struct PyModuleDef {modname}_module = {{
|
||||
PyModuleDef_HEAD_INIT,
|
||||
.m_name = "{modname}",
|
||||
.m_doc = NULL,
|
||||
.m_methods = NULL,
|
||||
.m_size = 0,
|
||||
.m_slots = {modname}_slots,
|
||||
}};
|
||||
|
||||
PyMODINIT_FUNC
|
||||
PyInit_{modname}(void)
|
||||
{{
|
||||
return PyModuleDef_Init(&{modname}_module);
|
||||
}}
|
||||
|
||||
// distutils sometimes spuriously tells cl to export CPyInit___init__,
|
||||
// so provide that so it chills out
|
||||
PyMODINIT_FUNC PyInit___init__(void) {{ return PyInit_{modname}(); }}
|
||||
200
venv/lib/python3.11/site-packages/mypyc/lib-rt/mypyc_util.h
Normal file
200
venv/lib/python3.11/site-packages/mypyc/lib-rt/mypyc_util.h
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
#ifndef MYPYC_UTIL_H
|
||||
#define MYPYC_UTIL_H
|
||||
|
||||
#include <Python.h>
|
||||
#include <frameobject.h>
|
||||
#include <assert.h>
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define likely(x) __builtin_expect((x),1)
|
||||
#define unlikely(x) __builtin_expect((x),0)
|
||||
#define CPy_Unreachable() __builtin_unreachable()
|
||||
#else
|
||||
#define likely(x) (x)
|
||||
#define unlikely(x) (x)
|
||||
#define CPy_Unreachable() abort()
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define CPy_NOINLINE __attribute__((noinline))
|
||||
#elif defined(_MSC_VER)
|
||||
#define CPy_NOINLINE __declspec(noinline)
|
||||
#else
|
||||
#define CPy_NOINLINE
|
||||
#endif
|
||||
|
||||
#ifndef Py_GIL_DISABLED
|
||||
|
||||
// Everything is running in the same thread, so no need for thread locals
|
||||
#define CPyThreadLocal
|
||||
|
||||
#else
|
||||
|
||||
// 1. Use C11 standard thread_local storage, if available
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
|
||||
#define CPyThreadLocal _Thread_local
|
||||
|
||||
// 2. Microsoft Visual Studio fallback
|
||||
#elif defined(_MSC_VER)
|
||||
#define CPyThreadLocal __declspec(thread)
|
||||
|
||||
// 3. GNU thread local storage for GCC/Clang targets that still need it
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#define CPyThreadLocal __thread
|
||||
|
||||
#else
|
||||
#error "Can't define CPyThreadLocal for this compiler/target (consider using a non-free-threaded Python build)"
|
||||
#endif
|
||||
|
||||
#endif // Py_GIL_DISABLED
|
||||
|
||||
// Helper macro for stringification in _Pragma
|
||||
#define CPY_STRINGIFY(x) #x
|
||||
|
||||
#if defined(__clang__)
|
||||
#define CPY_UNROLL_LOOP_IMPL(x) _Pragma(CPY_STRINGIFY(x))
|
||||
#define CPY_UNROLL_LOOP(n) CPY_UNROLL_LOOP_IMPL(unroll n)
|
||||
#elif defined(__GNUC__) && __GNUC__ >= 8
|
||||
#define CPY_UNROLL_LOOP_IMPL(x) _Pragma(CPY_STRINGIFY(x))
|
||||
#define CPY_UNROLL_LOOP(n) CPY_UNROLL_LOOP_IMPL(GCC unroll n)
|
||||
#else
|
||||
#define CPY_UNROLL_LOOP(n)
|
||||
#endif
|
||||
|
||||
// INCREF and DECREF that assert the pointer is not NULL.
|
||||
// asserts are disabled in release builds so there shouldn't be a perf hit.
|
||||
// I'm honestly kind of surprised that this isn't done by default.
|
||||
#define CPy_INCREF(p) do { assert(p); Py_INCREF(p); } while (0)
|
||||
#define CPy_DECREF(p) do { assert(p); Py_DECREF(p); } while (0)
|
||||
// Here just for consistency
|
||||
#define CPy_XDECREF(p) Py_XDECREF(p)
|
||||
|
||||
#ifndef Py_GIL_DISABLED
|
||||
|
||||
// The *_NO_IMM operations below perform refcount manipulation for
|
||||
// non-immortal objects (Python 3.12 and later).
|
||||
//
|
||||
// Py_INCREF and other CPython operations check for immortality. This
|
||||
// can be expensive when we know that an object cannot be immortal.
|
||||
//
|
||||
// This optimization cannot be performed in free-threaded mode so we
|
||||
// fall back to just calling the normal incref/decref operations.
|
||||
|
||||
static inline void CPy_INCREF_NO_IMM(PyObject *op)
|
||||
{
|
||||
op->ob_refcnt++;
|
||||
}
|
||||
|
||||
static inline void CPy_DECREF_NO_IMM(PyObject *op)
|
||||
{
|
||||
if (--op->ob_refcnt == 0) {
|
||||
_Py_Dealloc(op);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void CPy_XDECREF_NO_IMM(PyObject *op)
|
||||
{
|
||||
if (op != NULL && --op->ob_refcnt == 0) {
|
||||
_Py_Dealloc(op);
|
||||
}
|
||||
}
|
||||
|
||||
#define CPy_INCREF_NO_IMM(op) CPy_INCREF_NO_IMM((PyObject *)(op))
|
||||
#define CPy_DECREF_NO_IMM(op) CPy_DECREF_NO_IMM((PyObject *)(op))
|
||||
#define CPy_XDECREF_NO_IMM(op) CPy_XDECREF_NO_IMM((PyObject *)(op))
|
||||
|
||||
#else
|
||||
|
||||
#define CPy_INCREF_NO_IMM(op) CPy_INCREF(op)
|
||||
#define CPy_DECREF_NO_IMM(op) CPy_DECREF(op)
|
||||
#define CPy_XDECREF_NO_IMM(op) CPy_XDECREF(op)
|
||||
|
||||
#endif
|
||||
|
||||
// Tagged integer -- our representation of Python 'int' objects.
|
||||
// Small enough integers are represented as unboxed integers (shifted
|
||||
// left by 1); larger integers (larger than 63 bits on a 64-bit
|
||||
// platform) are stored as a tagged pointer (PyObject *)
|
||||
// representing a Python int object, with the lowest bit set.
|
||||
// Tagged integers are always normalized. A small integer *must not*
|
||||
// have the tag bit set.
|
||||
typedef size_t CPyTagged;
|
||||
|
||||
typedef size_t CPyPtr;
|
||||
|
||||
#define CPY_INT_BITS (CHAR_BIT * sizeof(CPyTagged))
|
||||
|
||||
#define CPY_TAGGED_MAX (((Py_ssize_t)1 << (CPY_INT_BITS - 2)) - 1)
|
||||
#define CPY_TAGGED_MIN (-((Py_ssize_t)1 << (CPY_INT_BITS - 2)))
|
||||
#define CPY_TAGGED_ABS_MIN (0-(size_t)CPY_TAGGED_MIN)
|
||||
|
||||
typedef PyObject CPyModule;
|
||||
|
||||
// Tag bit used for long integers
|
||||
#define CPY_INT_TAG 1
|
||||
|
||||
// Error value for signed fixed-width (low-level) integers
|
||||
#define CPY_LL_INT_ERROR -113
|
||||
|
||||
// Error value for unsigned fixed-width (low-level) integers
|
||||
#define CPY_LL_UINT_ERROR 239
|
||||
|
||||
// Error value for floats
|
||||
#define CPY_FLOAT_ERROR -113.0
|
||||
|
||||
// Value for 'None' primitive type
|
||||
#define CPY_NONE_ERROR 2
|
||||
#define CPY_NONE 1
|
||||
|
||||
typedef void (*CPyVTableItem)(void);
|
||||
|
||||
static inline CPyTagged CPyTagged_ShortFromInt(int x) {
|
||||
return x << 1;
|
||||
}
|
||||
|
||||
static inline CPyTagged CPyTagged_ShortFromSsize_t(Py_ssize_t x) {
|
||||
return x << 1;
|
||||
}
|
||||
|
||||
// Are we targeting Python 3.X or newer?
|
||||
#define CPY_3_11_FEATURES (PY_VERSION_HEX >= 0x030b0000)
|
||||
#define CPY_3_12_FEATURES (PY_VERSION_HEX >= 0x030c0000)
|
||||
#define CPY_3_14_FEATURES (PY_VERSION_HEX >= 0x030e0000)
|
||||
#define CPY_3_15_FEATURES (PY_VERSION_HEX >= 0x030f0000)
|
||||
|
||||
#if CPY_3_12_FEATURES
|
||||
|
||||
// Same as macros in CPython internal/pycore_long.h, but with a CPY_ prefix
|
||||
#define CPY_NON_SIZE_BITS 3
|
||||
#define CPY_SIGN_ZERO 1
|
||||
#define CPY_SIGN_NEGATIVE 2
|
||||
#define CPY_SIGN_MASK 3
|
||||
|
||||
#define CPY_LONG_DIGIT(o, n) ((o)->long_value.ob_digit[n])
|
||||
|
||||
// Only available on Python 3.12 and later
|
||||
#define CPY_LONG_TAG(o) ((o)->long_value.lv_tag)
|
||||
#define CPY_LONG_IS_NEGATIVE(o) (((o)->long_value.lv_tag & CPY_SIGN_MASK) == CPY_SIGN_NEGATIVE)
|
||||
// Only available on Python 3.12 and later
|
||||
#define CPY_LONG_SIZE(o) ((o)->long_value.lv_tag >> CPY_NON_SIZE_BITS)
|
||||
// Number of digits; negative for negative ints
|
||||
#define CPY_LONG_SIZE_SIGNED(o) (CPY_LONG_IS_NEGATIVE(o) ? -CPY_LONG_SIZE(o) : CPY_LONG_SIZE(o))
|
||||
// Number of digits, assuming int is non-negative
|
||||
#define CPY_LONG_SIZE_UNSIGNED(o) CPY_LONG_SIZE(o)
|
||||
|
||||
#else
|
||||
|
||||
#define CPY_LONG_DIGIT(o, n) ((o)->ob_digit[n])
|
||||
#define CPY_LONG_IS_NEGATIVE(o) (((o)->ob_base.ob_size < 0)
|
||||
#define CPY_LONG_SIZE_SIGNED(o) ((o)->ob_base.ob_size)
|
||||
#define CPY_LONG_SIZE_UNSIGNED(o) ((o)->ob_base.ob_size)
|
||||
|
||||
#endif
|
||||
|
||||
// Are we targeting Python 3.13 or newer?
|
||||
#define CPY_3_13_FEATURES (PY_VERSION_HEX >= 0x030d0000)
|
||||
|
||||
// Are we targeting Python 3.14 or newer?
|
||||
#define CPY_3_14_FEATURES (PY_VERSION_HEX >= 0x030e0000)
|
||||
|
||||
#endif
|
||||
2594
venv/lib/python3.11/site-packages/mypyc/lib-rt/pythoncapi_compat.h
Normal file
2594
venv/lib/python3.11/site-packages/mypyc/lib-rt/pythoncapi_compat.h
Normal file
File diff suppressed because it is too large
Load diff
209
venv/lib/python3.11/site-packages/mypyc/lib-rt/pythonsupport.c
Normal file
209
venv/lib/python3.11/site-packages/mypyc/lib-rt/pythonsupport.c
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
// Collects code that was copied in from cpython, for a couple of different reasons:
|
||||
// * We wanted to modify it to produce a more efficient version for our uses
|
||||
// * We needed to call it and it was static :(
|
||||
// * We wanted to call it and needed to backport it
|
||||
|
||||
#include "pythonsupport.h"
|
||||
|
||||
/////////////////////////////////////////
|
||||
// Adapted from bltinmodule.c in Python 3.7.0
|
||||
PyObject*
|
||||
update_bases(PyObject *bases)
|
||||
{
|
||||
Py_ssize_t i, j;
|
||||
PyObject *base, *meth, *new_base, *result, *new_bases = NULL;
|
||||
PyObject *stack[1] = {bases};
|
||||
assert(PyTuple_Check(bases));
|
||||
|
||||
Py_ssize_t nargs = PyTuple_GET_SIZE(bases);
|
||||
for (i = 0; i < nargs; i++) {
|
||||
base = PyTuple_GET_ITEM(bases, i);
|
||||
if (PyType_Check(base)) {
|
||||
if (new_bases) {
|
||||
/* If we already have made a replacement, then we append every normal base,
|
||||
otherwise just skip it. */
|
||||
if (PyList_Append(new_bases, base) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (PyObject_GetOptionalAttr(base, mypyc_interned_str.__mro_entries__, &meth) < 0) {
|
||||
goto error;
|
||||
}
|
||||
if (!meth) {
|
||||
if (new_bases) {
|
||||
if (PyList_Append(new_bases, base) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
new_base = PyObject_Vectorcall(meth, stack, 1, NULL);
|
||||
Py_DECREF(meth);
|
||||
if (!new_base) {
|
||||
goto error;
|
||||
}
|
||||
if (!PyTuple_Check(new_base)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"__mro_entries__ must return a tuple");
|
||||
Py_DECREF(new_base);
|
||||
goto error;
|
||||
}
|
||||
if (!new_bases) {
|
||||
/* If this is a first successful replacement, create new_bases list and
|
||||
copy previously encountered bases. */
|
||||
if (!(new_bases = PyList_New(i))) {
|
||||
goto error;
|
||||
}
|
||||
for (j = 0; j < i; j++) {
|
||||
base = PyTuple_GET_ITEM(bases, j);
|
||||
PyList_SET_ITEM(new_bases, j, base);
|
||||
Py_INCREF(base);
|
||||
}
|
||||
}
|
||||
j = PyList_GET_SIZE(new_bases);
|
||||
if (PyList_SetSlice(new_bases, j, j, new_base) < 0) {
|
||||
goto error;
|
||||
}
|
||||
Py_DECREF(new_base);
|
||||
}
|
||||
if (!new_bases) {
|
||||
return bases;
|
||||
}
|
||||
result = PyList_AsTuple(new_bases);
|
||||
Py_DECREF(new_bases);
|
||||
return result;
|
||||
|
||||
error:
|
||||
Py_XDECREF(new_bases);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// From Python 3.7's typeobject.c
|
||||
int
|
||||
init_subclass(PyTypeObject *type, PyObject *kwds)
|
||||
{
|
||||
PyObject *super, *func, *result;
|
||||
PyObject *args[2] = {(PyObject *)type, (PyObject *)type};
|
||||
|
||||
super = PyObject_Vectorcall((PyObject *)&PySuper_Type, args, 2, NULL);
|
||||
if (super == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
func = PyObject_GetAttr(super, mypyc_interned_str.__init_subclass__);
|
||||
Py_DECREF(super);
|
||||
if (func == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
result = _PyObject_FastCallDict(func, NULL, 0, kwds);
|
||||
Py_DECREF(func);
|
||||
if (result == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_DECREF(result);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if CPY_3_12_FEATURES
|
||||
|
||||
// Slow path of CPyLong_AsSsize_tAndOverflow (non-inlined)
|
||||
Py_ssize_t
|
||||
CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow)
|
||||
{
|
||||
PyLongObject *v = (PyLongObject *)vv;
|
||||
size_t x, prev;
|
||||
Py_ssize_t res;
|
||||
Py_ssize_t i;
|
||||
int sign;
|
||||
|
||||
*overflow = 0;
|
||||
|
||||
res = -1;
|
||||
i = CPY_LONG_TAG(v);
|
||||
|
||||
sign = 1;
|
||||
x = 0;
|
||||
if (i & CPY_SIGN_NEGATIVE) {
|
||||
sign = -1;
|
||||
}
|
||||
i >>= CPY_NON_SIZE_BITS;
|
||||
while (--i >= 0) {
|
||||
prev = x;
|
||||
x = (x << PyLong_SHIFT) + CPY_LONG_DIGIT(v, i);
|
||||
if ((x >> PyLong_SHIFT) != prev) {
|
||||
*overflow = sign;
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
/* Haven't lost any bits, but casting to long requires extra
|
||||
* care.
|
||||
*/
|
||||
if (x <= (size_t)CPY_TAGGED_MAX) {
|
||||
res = (Py_ssize_t)x * sign;
|
||||
}
|
||||
else if (sign < 0 && x == CPY_TAGGED_ABS_MIN) {
|
||||
res = CPY_TAGGED_MIN;
|
||||
}
|
||||
else {
|
||||
*overflow = sign;
|
||||
/* res is already set to -1 */
|
||||
}
|
||||
exit:
|
||||
return res;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Slow path of CPyLong_AsSsize_tAndOverflow (non-inlined, Python 3.11 and earlier)
|
||||
Py_ssize_t
|
||||
CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow)
|
||||
{
|
||||
/* This version by Tim Peters */
|
||||
PyLongObject *v = (PyLongObject *)vv;
|
||||
size_t x, prev;
|
||||
Py_ssize_t res;
|
||||
Py_ssize_t i;
|
||||
int sign;
|
||||
|
||||
*overflow = 0;
|
||||
|
||||
res = -1;
|
||||
i = Py_SIZE(v);
|
||||
|
||||
sign = 1;
|
||||
x = 0;
|
||||
if (i < 0) {
|
||||
sign = -1;
|
||||
i = -(i);
|
||||
}
|
||||
while (--i >= 0) {
|
||||
prev = x;
|
||||
x = (x << PyLong_SHIFT) + CPY_LONG_DIGIT(v, i);
|
||||
if ((x >> PyLong_SHIFT) != prev) {
|
||||
*overflow = sign;
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
/* Haven't lost any bits, but casting to long requires extra
|
||||
* care.
|
||||
*/
|
||||
if (x <= (size_t)CPY_TAGGED_MAX) {
|
||||
res = (Py_ssize_t)x * sign;
|
||||
}
|
||||
else if (sign < 0 && x == CPY_TAGGED_ABS_MIN) {
|
||||
res = CPY_TAGGED_MIN;
|
||||
}
|
||||
else {
|
||||
*overflow = sign;
|
||||
/* res is already set to -1 */
|
||||
}
|
||||
exit:
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
363
venv/lib/python3.11/site-packages/mypyc/lib-rt/pythonsupport.h
Normal file
363
venv/lib/python3.11/site-packages/mypyc/lib-rt/pythonsupport.h
Normal file
|
|
@ -0,0 +1,363 @@
|
|||
// Collects code that was copied in from cpython, for a couple of different reasons:
|
||||
// * We wanted to modify it to produce a more efficient version for our uses
|
||||
// * We needed to call it and it was static :(
|
||||
// * We wanted to call it and needed to backport it
|
||||
|
||||
#ifndef CPY_PYTHONSUPPORT_H
|
||||
#define CPY_PYTHONSUPPORT_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <Python.h>
|
||||
#include "pythoncapi_compat.h"
|
||||
#include <frameobject.h>
|
||||
#include <assert.h>
|
||||
#include "static_data.h"
|
||||
#include "mypyc_util.h"
|
||||
|
||||
#if CPY_3_13_FEATURES
|
||||
#ifndef Py_BUILD_CORE
|
||||
#define Py_BUILD_CORE
|
||||
#endif
|
||||
#include "internal/pycore_genobject.h" // _PyGen_FetchStopIterationValue
|
||||
#include "internal/pycore_pyerrors.h" // _PyErr_FormatFromCause, _PyErr_SetKeyError
|
||||
#include "internal/pycore_setobject.h" // _PySet_Update
|
||||
#endif
|
||||
|
||||
#if CPY_3_12_FEATURES
|
||||
#include "internal/pycore_frame.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#if 0
|
||||
} // why isn't emacs smart enough to not indent this
|
||||
#endif
|
||||
|
||||
PyObject* update_bases(PyObject *bases);
|
||||
int init_subclass(PyTypeObject *type, PyObject *kwds);
|
||||
|
||||
Py_ssize_t
|
||||
CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow);
|
||||
|
||||
#if CPY_3_12_FEATURES
|
||||
|
||||
static inline Py_ssize_t
|
||||
CPyLong_AsSsize_tAndOverflow(PyObject *vv, int *overflow)
|
||||
{
|
||||
/* This version by Tim Peters */
|
||||
PyLongObject *v = (PyLongObject *)vv;
|
||||
Py_ssize_t res;
|
||||
Py_ssize_t i;
|
||||
|
||||
*overflow = 0;
|
||||
|
||||
res = -1;
|
||||
i = CPY_LONG_TAG(v);
|
||||
|
||||
// TODO: Combine zero and non-zero cases helow?
|
||||
if (likely(i == (1 << CPY_NON_SIZE_BITS))) {
|
||||
res = CPY_LONG_DIGIT(v, 0);
|
||||
} else if (likely(i == CPY_SIGN_ZERO)) {
|
||||
res = 0;
|
||||
} else if (i == ((1 << CPY_NON_SIZE_BITS) | CPY_SIGN_NEGATIVE)) {
|
||||
res = -(sdigit)CPY_LONG_DIGIT(v, 0);
|
||||
} else {
|
||||
// Slow path is moved to a non-inline helper function to
|
||||
// limit size of generated code
|
||||
int overflow_local;
|
||||
res = CPyLong_AsSsize_tAndOverflow_(vv, &overflow_local);
|
||||
*overflow = overflow_local;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Adapted from longobject.c in Python 3.7.0
|
||||
|
||||
/* This function adapted from PyLong_AsLongLongAndOverflow, but with
|
||||
* some safety checks removed and specialized to only work for objects
|
||||
* that are already longs.
|
||||
* About half of the win this provides, though, just comes from being
|
||||
* able to inline the function, which in addition to saving function call
|
||||
* overhead allows the out-parameter overflow flag to be collapsed into
|
||||
* control flow.
|
||||
* Additionally, we check against the possible range of CPyTagged, not of
|
||||
* Py_ssize_t. */
|
||||
static inline Py_ssize_t
|
||||
CPyLong_AsSsize_tAndOverflow(PyObject *vv, int *overflow)
|
||||
{
|
||||
/* This version by Tim Peters */
|
||||
PyLongObject *v = (PyLongObject *)vv;
|
||||
Py_ssize_t res;
|
||||
Py_ssize_t i;
|
||||
|
||||
*overflow = 0;
|
||||
|
||||
res = -1;
|
||||
i = Py_SIZE(v);
|
||||
|
||||
if (likely(i == 1)) {
|
||||
res = CPY_LONG_DIGIT(v, 0);
|
||||
} else if (likely(i == 0)) {
|
||||
res = 0;
|
||||
} else if (i == -1) {
|
||||
res = -(sdigit)CPY_LONG_DIGIT(v, 0);
|
||||
} else {
|
||||
// Slow path is moved to a non-inline helper function to
|
||||
// limit size of generated code
|
||||
int overflow_local;
|
||||
res = CPyLong_AsSsize_tAndOverflow_(vv, &overflow_local);
|
||||
*overflow = overflow_local;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Adapted from listobject.c in Python 3.7.0
|
||||
static int
|
||||
list_resize(PyListObject *self, Py_ssize_t newsize)
|
||||
{
|
||||
PyObject **items;
|
||||
size_t new_allocated, num_allocated_bytes;
|
||||
Py_ssize_t allocated = self->allocated;
|
||||
|
||||
/* Bypass realloc() when a previous overallocation is large enough
|
||||
to accommodate the newsize. If the newsize falls lower than half
|
||||
the allocated size, then proceed with the realloc() to shrink the list.
|
||||
*/
|
||||
if (allocated >= newsize && newsize >= (allocated >> 1)) {
|
||||
assert(self->ob_item != NULL || newsize == 0);
|
||||
Py_SET_SIZE(self, newsize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This over-allocates proportional to the list size, making room
|
||||
* for additional growth. The over-allocation is mild, but is
|
||||
* enough to give linear-time amortized behavior over a long
|
||||
* sequence of appends() in the presence of a poorly-performing
|
||||
* system realloc().
|
||||
* The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
|
||||
* Note: new_allocated won't overflow because the largest possible value
|
||||
* is PY_SSIZE_T_MAX * (9 / 8) + 6 which always fits in a size_t.
|
||||
*/
|
||||
new_allocated = (size_t)newsize + (newsize >> 3) + (newsize < 9 ? 3 : 6);
|
||||
if (new_allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (newsize == 0)
|
||||
new_allocated = 0;
|
||||
num_allocated_bytes = new_allocated * sizeof(PyObject *);
|
||||
items = (PyObject **)PyMem_Realloc(self->ob_item, num_allocated_bytes);
|
||||
if (items == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
self->ob_item = items;
|
||||
Py_SET_SIZE(self, newsize);
|
||||
self->allocated = new_allocated;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Changed to use PyList_SetSlice instead of the internal list_ass_slice
|
||||
static PyObject *
|
||||
list_pop_impl(PyListObject *self, Py_ssize_t index)
|
||||
{
|
||||
PyObject *v;
|
||||
int status;
|
||||
|
||||
if (Py_SIZE(self) == 0) {
|
||||
/* Special-case most common failure cause */
|
||||
PyErr_SetString(PyExc_IndexError, "pop from empty list");
|
||||
return NULL;
|
||||
}
|
||||
if (index < 0)
|
||||
index += Py_SIZE(self);
|
||||
if (index < 0 || index >= Py_SIZE(self)) {
|
||||
PyErr_SetString(PyExc_IndexError, "pop index out of range");
|
||||
return NULL;
|
||||
}
|
||||
v = self->ob_item[index];
|
||||
if (index == Py_SIZE(self) - 1) {
|
||||
status = list_resize(self, Py_SIZE(self) - 1);
|
||||
if (status >= 0)
|
||||
return v; /* and v now owns the reference the list had */
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
Py_INCREF(v);
|
||||
status = PyList_SetSlice((PyObject *)self, index, index+1, (PyObject *)NULL);
|
||||
if (status < 0) {
|
||||
Py_DECREF(v);
|
||||
return NULL;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
// Tweaked to directly use CPyTagged
|
||||
static CPyTagged
|
||||
list_count(PyListObject *self, PyObject *value)
|
||||
{
|
||||
Py_ssize_t count = 0;
|
||||
Py_ssize_t i;
|
||||
|
||||
for (i = 0; i < Py_SIZE(self); i++) {
|
||||
int cmp = PyObject_RichCompareBool(self->ob_item[i], value, Py_EQ);
|
||||
if (cmp > 0)
|
||||
count++;
|
||||
else if (cmp < 0)
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
return CPyTagged_ShortFromSsize_t(count);
|
||||
}
|
||||
|
||||
// Adapted from genobject.c in Python 3.7.2
|
||||
// Copied because it wasn't in 3.5.2 and it is undocumented anyways.
|
||||
/*
|
||||
* Set StopIteration with specified value. Value can be arbitrary object
|
||||
* or NULL.
|
||||
*
|
||||
* Returns 0 if StopIteration is set and -1 if any other exception is set.
|
||||
*/
|
||||
static int
|
||||
CPyGen_SetStopIterationValue(PyObject *value)
|
||||
{
|
||||
PyObject *e;
|
||||
|
||||
if (value == NULL ||
|
||||
(!PyTuple_Check(value) && !PyExceptionInstance_Check(value)))
|
||||
{
|
||||
/* Delay exception instantiation if we can */
|
||||
PyErr_SetObject(PyExc_StopIteration, value);
|
||||
return 0;
|
||||
}
|
||||
/* Construct an exception instance manually with
|
||||
* PyObject_CallOneArg and pass it to PyErr_SetObject.
|
||||
*
|
||||
* We do this to handle a situation when "value" is a tuple, in which
|
||||
* case PyErr_SetObject would set the value of StopIteration to
|
||||
* the first element of the tuple.
|
||||
*
|
||||
* (See PyErr_SetObject/_PyErr_CreateException code for details.)
|
||||
*/
|
||||
e = PyObject_CallOneArg(PyExc_StopIteration, value);
|
||||
if (e == NULL) {
|
||||
return -1;
|
||||
}
|
||||
PyErr_SetObject(PyExc_StopIteration, e);
|
||||
Py_DECREF(e);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Copied from dictobject.c and dictobject.h, these are not Public before
|
||||
// Python 3.8. Also remove some error checks that we do in the callers.
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
PyDictObject *dv_dict;
|
||||
} _CPyDictViewObject;
|
||||
|
||||
static PyObject *
|
||||
_CPyDictView_New(PyObject *dict, PyTypeObject *type)
|
||||
{
|
||||
_CPyDictViewObject *dv = PyObject_GC_New(_CPyDictViewObject, type);
|
||||
if (dv == NULL)
|
||||
return NULL;
|
||||
Py_INCREF(dict);
|
||||
dv->dv_dict = (PyDictObject *)dict;
|
||||
PyObject_GC_Track(dv);
|
||||
return (PyObject *)dv;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CPY_3_12_FEATURES
|
||||
|
||||
// These are copied from genobject.c in Python 3.12
|
||||
|
||||
static int
|
||||
gen_is_coroutine(PyObject *o)
|
||||
{
|
||||
if (PyGen_CheckExact(o)) {
|
||||
PyCodeObject *code = PyGen_GetCode((PyGenObject*)o);
|
||||
if (code->co_flags & CO_ITERABLE_COROUTINE) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Copied from genobject.c in Python 3.10
|
||||
static int
|
||||
gen_is_coroutine(PyObject *o)
|
||||
{
|
||||
if (PyGen_CheckExact(o)) {
|
||||
PyCodeObject *code = (PyCodeObject *)((PyGenObject*)o)->gi_code;
|
||||
if (code->co_flags & CO_ITERABLE_COROUTINE) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This helper function returns an awaitable for `o`:
|
||||
* - `o` if `o` is a coroutine-object;
|
||||
* - `type(o)->tp_as_async->am_await(o)`
|
||||
*
|
||||
* Raises a TypeError if it's not possible to return
|
||||
* an awaitable and returns NULL.
|
||||
*/
|
||||
static PyObject *
|
||||
CPyCoro_GetAwaitableIter(PyObject *o)
|
||||
{
|
||||
unaryfunc getter = NULL;
|
||||
PyTypeObject *ot;
|
||||
|
||||
if (PyCoro_CheckExact(o) || gen_is_coroutine(o)) {
|
||||
/* 'o' is a coroutine. */
|
||||
Py_INCREF(o);
|
||||
return o;
|
||||
}
|
||||
|
||||
ot = Py_TYPE(o);
|
||||
if (ot->tp_as_async != NULL) {
|
||||
getter = ot->tp_as_async->am_await;
|
||||
}
|
||||
if (getter != NULL) {
|
||||
PyObject *res = (*getter)(o);
|
||||
if (res != NULL) {
|
||||
if (PyCoro_CheckExact(res) || gen_is_coroutine(res)) {
|
||||
/* __await__ must return an *iterator*, not
|
||||
a coroutine or another awaitable (see PEP 492) */
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"__await__() returned a coroutine");
|
||||
Py_CLEAR(res);
|
||||
} else if (!PyIter_Check(res)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"__await__() returned non-iterator "
|
||||
"of type '%.100s'",
|
||||
Py_TYPE(res)->tp_name);
|
||||
Py_CLEAR(res);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"object %.100s can't be used in 'await' expression",
|
||||
ot->tp_name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
17
venv/lib/python3.11/site-packages/mypyc/lib-rt/set_ops.c
Normal file
17
venv/lib/python3.11/site-packages/mypyc/lib-rt/set_ops.c
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
// Set primitive operations
|
||||
//
|
||||
// These are registered in mypyc.primitives.set_ops.
|
||||
|
||||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
|
||||
bool CPySet_Remove(PyObject *set, PyObject *key) {
|
||||
int success = PySet_Discard(set, key);
|
||||
if (success == 1) {
|
||||
return true;
|
||||
}
|
||||
if (success == 0) {
|
||||
_PyErr_SetKeyError(key);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
75
venv/lib/python3.11/site-packages/mypyc/lib-rt/static_data.c
Normal file
75
venv/lib/python3.11/site-packages/mypyc/lib-rt/static_data.c
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
#ifndef STATIC_DATA
|
||||
#define STATIC_DATA
|
||||
|
||||
#include "static_data.h"
|
||||
|
||||
// Adopted from numpy 2.4.0: numpy/_core/src/multiarry/npy_static_data.c
|
||||
|
||||
mypyc_interned_str_struct mypyc_interned_str;
|
||||
|
||||
#define INTERN_STRING(struct_member, string) \
|
||||
assert(mypyc_interned_str.struct_member == NULL); \
|
||||
mypyc_interned_str.struct_member = PyUnicode_InternFromString(string); \
|
||||
if (mypyc_interned_str.struct_member == NULL) { \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
int
|
||||
intern_strings(void) {
|
||||
if (mypyc_interned_str.values != NULL) {
|
||||
// Already interned.
|
||||
return 0;
|
||||
}
|
||||
INTERN_STRING(__init_subclass__, "__init_subclass__");
|
||||
INTERN_STRING(__module__, "__module__");
|
||||
INTERN_STRING(__mro_entries__, "__mro_entries__");
|
||||
INTERN_STRING(__mypyc_attrs__, "__mypyc_attrs__");
|
||||
INTERN_STRING(__name__, "__name__");
|
||||
INTERN_STRING(__orig_bases__, "__orig_bases__");
|
||||
INTERN_STRING(__qualname__, "__qualname__");
|
||||
INTERN_STRING(__slots__, "__slots__");
|
||||
INTERN_STRING(__radd__, "__radd__");
|
||||
INTERN_STRING(__rsub__, "__rsub__");
|
||||
INTERN_STRING(__rmul__, "__rmul__");
|
||||
INTERN_STRING(__rtruediv__, "__rtruediv__");
|
||||
INTERN_STRING(__rmod__, "__rmod__");
|
||||
INTERN_STRING(__rdivmod__, "__rdivmod__");
|
||||
INTERN_STRING(__rfloordiv__, "__rfloordiv__");
|
||||
INTERN_STRING(__rpow__, "__rpow__");
|
||||
INTERN_STRING(__rmatmul__, "__rmatmul__");
|
||||
INTERN_STRING(__rand__, "__rand__");
|
||||
INTERN_STRING(__ror__, "__ror__");
|
||||
INTERN_STRING(__rxor__, "__rxor__");
|
||||
INTERN_STRING(__rlshift__, "__rlshift__");
|
||||
INTERN_STRING(__rrshift__, "__rrshift__");
|
||||
INTERN_STRING(__eq__, "__eq__");
|
||||
INTERN_STRING(__ne__, "__ne__");
|
||||
INTERN_STRING(__gt__, "__gt__");
|
||||
INTERN_STRING(__le__, "__le__");
|
||||
INTERN_STRING(__lt__, "__lt__");
|
||||
INTERN_STRING(__ge__, "__ge__");
|
||||
INTERN_STRING(clear, "clear");
|
||||
INTERN_STRING(close_, "close");
|
||||
INTERN_STRING(copy, "copy");
|
||||
INTERN_STRING(dispatch_cache, "dispatch_cache");
|
||||
INTERN_STRING(endswith, "endswith");
|
||||
INTERN_STRING(get_type_hints, "get_type_hints");
|
||||
INTERN_STRING(keys, "keys");
|
||||
INTERN_STRING(lower, "lower");
|
||||
INTERN_STRING(items, "items");
|
||||
INTERN_STRING(join, "join");
|
||||
INTERN_STRING(register_, "register");
|
||||
INTERN_STRING(registry, "registry");
|
||||
INTERN_STRING(send, "send");
|
||||
INTERN_STRING(setdefault, "setdefault");
|
||||
INTERN_STRING(startswith, "startswith");
|
||||
INTERN_STRING(super, "super");
|
||||
INTERN_STRING(throw_, "throw");
|
||||
INTERN_STRING(translate, "translate");
|
||||
INTERN_STRING(update, "update");
|
||||
INTERN_STRING(upper, "upper");
|
||||
INTERN_STRING(values, "values");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
72
venv/lib/python3.11/site-packages/mypyc/lib-rt/static_data.h
Normal file
72
venv/lib/python3.11/site-packages/mypyc/lib-rt/static_data.h
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
#ifndef STATIC_DATA_H
|
||||
#define STATIC_DATA_H
|
||||
|
||||
#include <Python.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Adopted from numpy 2.4.0: numpy/_core/src/multiarry/npy_static_data.h
|
||||
|
||||
int intern_strings(void);
|
||||
|
||||
typedef struct mypyc_interned_str_struct {
|
||||
PyObject *__init_subclass__;
|
||||
PyObject *__module__;
|
||||
PyObject *__mro_entries__;
|
||||
PyObject *__mypyc_attrs__;
|
||||
PyObject *__orig_bases__;
|
||||
PyObject *__qualname__;
|
||||
PyObject *__slots__;
|
||||
PyObject *__name__;
|
||||
PyObject *__radd__;
|
||||
PyObject *__rsub__;
|
||||
PyObject *__rmul__;
|
||||
PyObject *__rtruediv__;
|
||||
PyObject *__rmod__;
|
||||
PyObject *__rdivmod__;
|
||||
PyObject *__rfloordiv__;
|
||||
PyObject *__rpow__;
|
||||
PyObject *__rmatmul__;
|
||||
PyObject *__rand__;
|
||||
PyObject *__ror__;
|
||||
PyObject *__rxor__;
|
||||
PyObject *__rlshift__;
|
||||
PyObject *__rrshift__;
|
||||
PyObject *__eq__;
|
||||
PyObject *__ne__;
|
||||
PyObject *__gt__;
|
||||
PyObject *__le__;
|
||||
PyObject *__lt__;
|
||||
PyObject *__ge__;
|
||||
PyObject *clear;
|
||||
PyObject *close_;
|
||||
PyObject *copy;
|
||||
PyObject *dispatch_cache;
|
||||
PyObject *endswith;
|
||||
PyObject *get_type_hints;
|
||||
PyObject *keys;
|
||||
PyObject *lower;
|
||||
PyObject *items;
|
||||
PyObject *join;
|
||||
PyObject *register_;
|
||||
PyObject *registry;
|
||||
PyObject *send;
|
||||
PyObject *setdefault;
|
||||
PyObject *startswith;
|
||||
PyObject *super;
|
||||
PyObject *throw_;
|
||||
PyObject *translate;
|
||||
PyObject *update;
|
||||
PyObject *upper;
|
||||
PyObject *values;
|
||||
} mypyc_interned_str_struct;
|
||||
|
||||
extern mypyc_interned_str_struct mypyc_interned_str;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
#include "str_extra_ops.h"
|
||||
|
||||
// All str extra ops are inline functions in str_extra_ops.h
|
||||
// This file exists to satisfy the SourceDep requirements
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
#ifndef MYPYC_STR_EXTRA_OPS_H
|
||||
#define MYPYC_STR_EXTRA_OPS_H
|
||||
|
||||
#include <Python.h>
|
||||
#include <stdint.h>
|
||||
#include "CPy.h"
|
||||
|
||||
// Optimized str indexing for ord(s[i])
|
||||
|
||||
// If index is negative, convert to non-negative index (no range checking)
|
||||
static inline int64_t CPyStr_AdjustIndex(PyObject *obj, int64_t index) {
|
||||
if (index < 0) {
|
||||
return index + PyUnicode_GET_LENGTH(obj);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
// Check if index is in valid range [0, len)
|
||||
static inline bool CPyStr_RangeCheck(PyObject *obj, int64_t index) {
|
||||
return index >= 0 && index < PyUnicode_GET_LENGTH(obj);
|
||||
}
|
||||
|
||||
// Get character at index as int (ord value) - no bounds checking, returns as CPyTagged
|
||||
static inline CPyTagged CPyStr_GetItemUnsafeAsInt(PyObject *obj, int64_t index) {
|
||||
int kind = PyUnicode_KIND(obj);
|
||||
return PyUnicode_READ(kind, PyUnicode_DATA(obj), index) << 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
793
venv/lib/python3.11/site-packages/mypyc/lib-rt/str_ops.c
Normal file
793
venv/lib/python3.11/site-packages/mypyc/lib-rt/str_ops.c
Normal file
|
|
@ -0,0 +1,793 @@
|
|||
#include "pythoncapi_compat.h"
|
||||
|
||||
// String primitive operations
|
||||
//
|
||||
// These are registered in mypyc.primitives.str_ops.
|
||||
|
||||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
|
||||
// The _PyUnicode_CheckConsistency definition has been moved to the internal API
|
||||
// https://github.com/python/cpython/pull/106398
|
||||
#if defined(Py_DEBUG) && CPY_3_13_FEATURES
|
||||
#include "internal/pycore_unicodeobject.h"
|
||||
#endif
|
||||
|
||||
// Copied from cpython.git:Objects/unicodeobject.c@0ef4ffeefd1737c18dc9326133c7894d58108c2e.
|
||||
#define BLOOM_MASK unsigned long
|
||||
#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
|
||||
#if LONG_BIT >= 128
|
||||
#define BLOOM_WIDTH 128
|
||||
#elif LONG_BIT >= 64
|
||||
#define BLOOM_WIDTH 64
|
||||
#elif LONG_BIT >= 32
|
||||
#define BLOOM_WIDTH 32
|
||||
#else
|
||||
#error "LONG_BIT is smaller than 32"
|
||||
#endif
|
||||
|
||||
// Copied from cpython.git:Objects/unicodeobject.c@0ef4ffeefd1737c18dc9326133c7894d58108c2e.
|
||||
// This is needed for str.strip("...").
|
||||
static inline BLOOM_MASK
|
||||
make_bloom_mask(int kind, const void* ptr, Py_ssize_t len)
|
||||
{
|
||||
#define BLOOM_UPDATE(TYPE, MASK, PTR, LEN) \
|
||||
do { \
|
||||
TYPE *data = (TYPE *)PTR; \
|
||||
TYPE *end = data + LEN; \
|
||||
Py_UCS4 ch; \
|
||||
for (; data != end; data++) { \
|
||||
ch = *data; \
|
||||
MASK |= (1UL << (ch & (BLOOM_WIDTH - 1))); \
|
||||
} \
|
||||
break; \
|
||||
} while (0)
|
||||
|
||||
/* calculate simple bloom-style bitmask for a given unicode string */
|
||||
|
||||
BLOOM_MASK mask;
|
||||
|
||||
mask = 0;
|
||||
switch (kind) {
|
||||
case PyUnicode_1BYTE_KIND:
|
||||
BLOOM_UPDATE(Py_UCS1, mask, ptr, len);
|
||||
break;
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
BLOOM_UPDATE(Py_UCS2, mask, ptr, len);
|
||||
break;
|
||||
case PyUnicode_4BYTE_KIND:
|
||||
BLOOM_UPDATE(Py_UCS4, mask, ptr, len);
|
||||
break;
|
||||
default:
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
return mask;
|
||||
|
||||
#undef BLOOM_UPDATE
|
||||
}
|
||||
|
||||
static inline char _CPyStr_Equal_NoIdentCheck(PyObject *str1, PyObject *str2, Py_ssize_t str2_length) {
|
||||
// This helper function only exists to deduplicate code in CPyStr_Equal and CPyStr_EqualLiteral
|
||||
Py_ssize_t str1_length = PyUnicode_GET_LENGTH(str1);
|
||||
if (str1_length != str2_length)
|
||||
return 0;
|
||||
int kind = PyUnicode_KIND(str1);
|
||||
if (PyUnicode_KIND(str2) != kind)
|
||||
return 0;
|
||||
const void *data1 = PyUnicode_DATA(str1);
|
||||
const void *data2 = PyUnicode_DATA(str2);
|
||||
return memcmp(data1, data2, str1_length * kind) == 0;
|
||||
}
|
||||
|
||||
// Adapted from CPython 3.13.1 (_PyUnicode_Equal)
|
||||
char CPyStr_Equal(PyObject *str1, PyObject *str2) {
|
||||
if (str1 == str2) {
|
||||
return 1;
|
||||
}
|
||||
Py_ssize_t str2_length = PyUnicode_GET_LENGTH(str2);
|
||||
return _CPyStr_Equal_NoIdentCheck(str1, str2, str2_length);
|
||||
}
|
||||
|
||||
char CPyStr_EqualLiteral(PyObject *str, PyObject *literal_str, Py_ssize_t literal_length) {
|
||||
if (str == literal_str) {
|
||||
return 1;
|
||||
}
|
||||
return _CPyStr_Equal_NoIdentCheck(str, literal_str, literal_length);
|
||||
}
|
||||
|
||||
PyObject *CPyStr_GetItem(PyObject *str, CPyTagged index) {
|
||||
if (PyUnicode_READY(str) != -1) {
|
||||
if (CPyTagged_CheckShort(index)) {
|
||||
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
|
||||
Py_ssize_t size = PyUnicode_GET_LENGTH(str);
|
||||
if (n < 0)
|
||||
n += size;
|
||||
if (n < 0 || n >= size) {
|
||||
PyErr_SetString(PyExc_IndexError, "string index out of range");
|
||||
return NULL;
|
||||
}
|
||||
enum PyUnicode_Kind kind = (enum PyUnicode_Kind)PyUnicode_KIND(str);
|
||||
void *data = PyUnicode_DATA(str);
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, n);
|
||||
if (ch < 256) {
|
||||
// Latin-1 single-char strings are cached by CPython, so
|
||||
// PyUnicode_FromOrdinal returns the cached object (with a
|
||||
// new reference) instead of allocating a new string each time.
|
||||
return PyUnicode_FromOrdinal(ch);
|
||||
}
|
||||
PyObject *unicode = PyUnicode_New(1, ch);
|
||||
if (unicode == NULL)
|
||||
return NULL;
|
||||
if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) {
|
||||
PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch;
|
||||
} else {
|
||||
assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
|
||||
PyUnicode_4BYTE_DATA(unicode)[0] = ch;
|
||||
}
|
||||
return unicode;
|
||||
} else {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
PyObject *index_obj = CPyTagged_AsObject(index);
|
||||
return PyObject_GetItem(str, index_obj);
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPyStr_GetItemUnsafe(PyObject *str, Py_ssize_t index) {
|
||||
// This is unsafe since we don't check for overflow when doing <<.
|
||||
return CPyStr_GetItem(str, index << 1);
|
||||
}
|
||||
|
||||
// A simplification of _PyUnicode_JoinArray() from CPython 3.9.6
|
||||
PyObject *CPyStr_Build(Py_ssize_t len, ...) {
|
||||
Py_ssize_t i;
|
||||
va_list args;
|
||||
|
||||
// Calculate the total amount of space and check
|
||||
// whether all components have the same kind.
|
||||
Py_ssize_t sz = 0;
|
||||
Py_UCS4 maxchar = 0;
|
||||
int use_memcpy = 1; // Use memcpy by default
|
||||
PyObject *last_obj = NULL;
|
||||
|
||||
va_start(args, len);
|
||||
for (i = 0; i < len; i++) {
|
||||
PyObject *item = va_arg(args, PyObject *);
|
||||
if (!PyUnicode_Check(item)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"sequence item %zd: expected str instance,"
|
||||
" %.80s found",
|
||||
i, Py_TYPE(item)->tp_name);
|
||||
return NULL;
|
||||
}
|
||||
if (PyUnicode_READY(item) == -1)
|
||||
return NULL;
|
||||
|
||||
size_t add_sz = PyUnicode_GET_LENGTH(item);
|
||||
Py_UCS4 item_maxchar = PyUnicode_MAX_CHAR_VALUE(item);
|
||||
maxchar = Py_MAX(maxchar, item_maxchar);
|
||||
|
||||
// Using size_t to avoid overflow during arithmetic calculation
|
||||
if (add_sz > (size_t)(PY_SSIZE_T_MAX - sz)) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"join() result is too long for a Python string");
|
||||
return NULL;
|
||||
}
|
||||
sz += add_sz;
|
||||
|
||||
// If these strings have different kind, we would call
|
||||
// _PyUnicode_FastCopyCharacters() in the following part.
|
||||
if (use_memcpy && last_obj != NULL) {
|
||||
if (PyUnicode_KIND(last_obj) != PyUnicode_KIND(item))
|
||||
use_memcpy = 0;
|
||||
}
|
||||
last_obj = item;
|
||||
}
|
||||
va_end(args);
|
||||
|
||||
// Construct the string
|
||||
PyObject *res = PyUnicode_New(sz, maxchar);
|
||||
if (res == NULL)
|
||||
return NULL;
|
||||
|
||||
if (use_memcpy) {
|
||||
unsigned char *res_data = PyUnicode_1BYTE_DATA(res);
|
||||
unsigned int kind = PyUnicode_KIND(res);
|
||||
|
||||
va_start(args, len);
|
||||
for (i = 0; i < len; ++i) {
|
||||
PyObject *item = va_arg(args, PyObject *);
|
||||
Py_ssize_t itemlen = PyUnicode_GET_LENGTH(item);
|
||||
if (itemlen != 0) {
|
||||
memcpy(res_data, PyUnicode_DATA(item), kind * itemlen);
|
||||
res_data += kind * itemlen;
|
||||
}
|
||||
}
|
||||
va_end(args);
|
||||
assert(res_data == PyUnicode_1BYTE_DATA(res) + kind * PyUnicode_GET_LENGTH(res));
|
||||
} else {
|
||||
Py_ssize_t res_offset = 0;
|
||||
|
||||
va_start(args, len);
|
||||
for (i = 0; i < len; ++i) {
|
||||
PyObject *item = va_arg(args, PyObject *);
|
||||
Py_ssize_t itemlen = PyUnicode_GET_LENGTH(item);
|
||||
if (itemlen != 0) {
|
||||
#if CPY_3_13_FEATURES
|
||||
PyUnicode_CopyCharacters(res, res_offset, item, 0, itemlen);
|
||||
#else
|
||||
_PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen);
|
||||
#endif
|
||||
res_offset += itemlen;
|
||||
}
|
||||
}
|
||||
va_end(args);
|
||||
assert(res_offset == PyUnicode_GET_LENGTH(res));
|
||||
}
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
CPyTagged CPyStr_Find(PyObject *str, PyObject *substr, CPyTagged start, int direction) {
|
||||
CPyTagged end = PyUnicode_GET_LENGTH(str) << 1;
|
||||
return CPyStr_FindWithEnd(str, substr, start, end, direction);
|
||||
}
|
||||
|
||||
CPyTagged CPyStr_FindWithEnd(PyObject *str, PyObject *substr, CPyTagged start, CPyTagged end, int direction) {
|
||||
Py_ssize_t temp_start = CPyTagged_AsSsize_t(start);
|
||||
if (temp_start == -1 && PyErr_Occurred()) {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
Py_ssize_t temp_end = CPyTagged_AsSsize_t(end);
|
||||
if (temp_end == -1 && PyErr_Occurred()) {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
Py_ssize_t index = PyUnicode_Find(str, substr, temp_start, temp_end, direction);
|
||||
if (unlikely(index == -2)) {
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
return index << 1;
|
||||
}
|
||||
|
||||
PyObject *CPyStr_Split(PyObject *str, PyObject *sep, CPyTagged max_split) {
|
||||
Py_ssize_t temp_max_split = CPyTagged_AsSsize_t(max_split);
|
||||
if (temp_max_split == -1 && PyErr_Occurred()) {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return NULL;
|
||||
}
|
||||
return PyUnicode_Split(str, sep, temp_max_split);
|
||||
}
|
||||
|
||||
PyObject *CPyStr_RSplit(PyObject *str, PyObject *sep, CPyTagged max_split) {
|
||||
Py_ssize_t temp_max_split = CPyTagged_AsSsize_t(max_split);
|
||||
if (temp_max_split == -1 && PyErr_Occurred()) {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return NULL;
|
||||
}
|
||||
return PyUnicode_RSplit(str, sep, temp_max_split);
|
||||
}
|
||||
|
||||
// This function has been copied from _PyUnicode_XStrip in cpython.git:Objects/unicodeobject.c@0ef4ffeefd1737c18dc9326133c7894d58108c2e.
|
||||
static PyObject *_PyStr_XStrip(PyObject *self, int striptype, PyObject *sepobj) {
|
||||
const void *data;
|
||||
int kind;
|
||||
Py_ssize_t i, j, len;
|
||||
BLOOM_MASK sepmask;
|
||||
Py_ssize_t seplen;
|
||||
|
||||
// This check is needed from Python 3.9 and earlier.
|
||||
if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1)
|
||||
return NULL;
|
||||
|
||||
kind = PyUnicode_KIND(self);
|
||||
data = PyUnicode_DATA(self);
|
||||
len = PyUnicode_GET_LENGTH(self);
|
||||
seplen = PyUnicode_GET_LENGTH(sepobj);
|
||||
sepmask = make_bloom_mask(PyUnicode_KIND(sepobj),
|
||||
PyUnicode_DATA(sepobj),
|
||||
seplen);
|
||||
|
||||
i = 0;
|
||||
if (striptype != RIGHTSTRIP) {
|
||||
while (i < len) {
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
||||
if (!BLOOM(sepmask, ch))
|
||||
break;
|
||||
if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0)
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
j = len;
|
||||
if (striptype != LEFTSTRIP) {
|
||||
j--;
|
||||
while (j >= i) {
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, j);
|
||||
if (!BLOOM(sepmask, ch))
|
||||
break;
|
||||
if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0)
|
||||
break;
|
||||
j--;
|
||||
}
|
||||
|
||||
j++;
|
||||
}
|
||||
|
||||
return PyUnicode_Substring(self, i, j);
|
||||
}
|
||||
|
||||
// Copied from do_strip function in cpython.git/Objects/unicodeobject.c@0ef4ffeefd1737c18dc9326133c7894d58108c2e.
|
||||
PyObject *_CPyStr_Strip(PyObject *self, int strip_type, PyObject *sep) {
|
||||
if (sep == NULL || Py_IsNone(sep)) {
|
||||
Py_ssize_t len, i, j;
|
||||
|
||||
// This check is needed from Python 3.9 and earlier.
|
||||
if (PyUnicode_READY(self) == -1)
|
||||
return NULL;
|
||||
|
||||
len = PyUnicode_GET_LENGTH(self);
|
||||
|
||||
if (PyUnicode_IS_ASCII(self)) {
|
||||
const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
|
||||
|
||||
i = 0;
|
||||
if (strip_type != RIGHTSTRIP) {
|
||||
while (i < len) {
|
||||
Py_UCS1 ch = data[i];
|
||||
if (!_Py_ascii_whitespace[ch])
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
j = len;
|
||||
if (strip_type != LEFTSTRIP) {
|
||||
j--;
|
||||
while (j >= i) {
|
||||
Py_UCS1 ch = data[j];
|
||||
if (!_Py_ascii_whitespace[ch])
|
||||
break;
|
||||
j--;
|
||||
}
|
||||
j++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int kind = PyUnicode_KIND(self);
|
||||
const void *data = PyUnicode_DATA(self);
|
||||
|
||||
i = 0;
|
||||
if (strip_type != RIGHTSTRIP) {
|
||||
while (i < len) {
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
||||
if (!Py_UNICODE_ISSPACE(ch))
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
j = len;
|
||||
if (strip_type != LEFTSTRIP) {
|
||||
j--;
|
||||
while (j >= i) {
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, j);
|
||||
if (!Py_UNICODE_ISSPACE(ch))
|
||||
break;
|
||||
j--;
|
||||
}
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
return PyUnicode_Substring(self, i, j);
|
||||
}
|
||||
return _PyStr_XStrip(self, strip_type, sep);
|
||||
}
|
||||
|
||||
PyObject *CPyStr_Replace(PyObject *str, PyObject *old_substr,
|
||||
PyObject *new_substr, CPyTagged max_replace) {
|
||||
Py_ssize_t temp_max_replace = CPyTagged_AsSsize_t(max_replace);
|
||||
if (temp_max_replace == -1 && PyErr_Occurred()) {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return NULL;
|
||||
}
|
||||
return PyUnicode_Replace(str, old_substr, new_substr, temp_max_replace);
|
||||
}
|
||||
|
||||
int CPyStr_Startswith(PyObject *self, PyObject *subobj) {
|
||||
Py_ssize_t start = 0;
|
||||
Py_ssize_t end = PyUnicode_GET_LENGTH(self);
|
||||
if (PyTuple_Check(subobj)) {
|
||||
Py_ssize_t i;
|
||||
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
|
||||
PyObject *substring = PyTuple_GET_ITEM(subobj, i);
|
||||
if (!PyUnicode_Check(substring)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"tuple for startswith must only contain str, "
|
||||
"not %.100s",
|
||||
Py_TYPE(substring)->tp_name);
|
||||
return 2;
|
||||
}
|
||||
int result = PyUnicode_Tailmatch(self, substring, start, end, -1);
|
||||
if (result) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return PyUnicode_Tailmatch(self, subobj, start, end, -1);
|
||||
}
|
||||
|
||||
int CPyStr_Endswith(PyObject *self, PyObject *subobj) {
|
||||
Py_ssize_t start = 0;
|
||||
Py_ssize_t end = PyUnicode_GET_LENGTH(self);
|
||||
if (PyTuple_Check(subobj)) {
|
||||
Py_ssize_t i;
|
||||
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
|
||||
PyObject *substring = PyTuple_GET_ITEM(subobj, i);
|
||||
if (!PyUnicode_Check(substring)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"tuple for endswith must only contain str, "
|
||||
"not %.100s",
|
||||
Py_TYPE(substring)->tp_name);
|
||||
return 2;
|
||||
}
|
||||
int result = PyUnicode_Tailmatch(self, substring, start, end, 1);
|
||||
if (result) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return PyUnicode_Tailmatch(self, subobj, start, end, 1);
|
||||
}
|
||||
|
||||
PyObject *CPyStr_Removeprefix(PyObject *self, PyObject *prefix) {
|
||||
Py_ssize_t end = PyUnicode_GET_LENGTH(self);
|
||||
int match = PyUnicode_Tailmatch(self, prefix, 0, end, -1);
|
||||
if (match) {
|
||||
Py_ssize_t prefix_end = PyUnicode_GET_LENGTH(prefix);
|
||||
return PyUnicode_Substring(self, prefix_end, end);
|
||||
}
|
||||
return Py_NewRef(self);
|
||||
}
|
||||
|
||||
PyObject *CPyStr_Removesuffix(PyObject *self, PyObject *suffix) {
|
||||
Py_ssize_t end = PyUnicode_GET_LENGTH(self);
|
||||
int match = PyUnicode_Tailmatch(self, suffix, 0, end, 1);
|
||||
if (match) {
|
||||
Py_ssize_t suffix_end = PyUnicode_GET_LENGTH(suffix);
|
||||
return PyUnicode_Substring(self, 0, end - suffix_end);
|
||||
}
|
||||
return Py_NewRef(self);
|
||||
}
|
||||
|
||||
/* This does a dodgy attempt to append in place */
|
||||
PyObject *CPyStr_Append(PyObject *o1, PyObject *o2) {
|
||||
PyUnicode_Append(&o1, o2);
|
||||
return o1;
|
||||
}
|
||||
|
||||
PyObject *CPyStr_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end) {
|
||||
if (likely(PyUnicode_CheckExact(obj)
|
||||
&& CPyTagged_CheckShort(start) && CPyTagged_CheckShort(end))) {
|
||||
Py_ssize_t startn = CPyTagged_ShortAsSsize_t(start);
|
||||
Py_ssize_t endn = CPyTagged_ShortAsSsize_t(end);
|
||||
if (startn < 0) {
|
||||
startn += PyUnicode_GET_LENGTH(obj);
|
||||
if (startn < 0) {
|
||||
startn = 0;
|
||||
}
|
||||
}
|
||||
if (endn < 0) {
|
||||
endn += PyUnicode_GET_LENGTH(obj);
|
||||
if (endn < 0) {
|
||||
endn = 0;
|
||||
}
|
||||
}
|
||||
return PyUnicode_Substring(obj, startn, endn);
|
||||
}
|
||||
return CPyObject_GetSlice(obj, start, end);
|
||||
}
|
||||
|
||||
/* Check if the given string is true (i.e. its length isn't zero) */
|
||||
bool CPyStr_IsTrue(PyObject *obj) {
|
||||
Py_ssize_t length = PyUnicode_GET_LENGTH(obj);
|
||||
return length != 0;
|
||||
}
|
||||
|
||||
Py_ssize_t CPyStr_Size_size_t(PyObject *str) {
|
||||
if (PyUnicode_READY(str) != -1) {
|
||||
return PyUnicode_GET_LENGTH(str);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
PyObject *CPy_Decode(PyObject *obj, PyObject *encoding, PyObject *errors) {
|
||||
const char *enc = NULL;
|
||||
const char *err = NULL;
|
||||
if (encoding) {
|
||||
enc = PyUnicode_AsUTF8AndSize(encoding, NULL);
|
||||
if (!enc) return NULL;
|
||||
}
|
||||
if (errors) {
|
||||
err = PyUnicode_AsUTF8AndSize(errors, NULL);
|
||||
if (!err) return NULL;
|
||||
}
|
||||
if (PyBytes_Check(obj)) {
|
||||
return PyUnicode_Decode(((PyBytesObject *)obj)->ob_sval,
|
||||
((PyVarObject *)obj)->ob_size,
|
||||
enc, err);
|
||||
} else {
|
||||
return PyUnicode_FromEncodedObject(obj, enc, err);
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPy_DecodeUTF8(PyObject *bytes) {
|
||||
if (PyBytes_CheckExact(bytes)) {
|
||||
char *buffer = PyBytes_AsString(bytes); // Borrowed reference
|
||||
if (buffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
Py_ssize_t size = PyBytes_Size(bytes);
|
||||
return PyUnicode_DecodeUTF8(buffer, size, "strict");
|
||||
} else {
|
||||
return PyUnicode_FromEncodedObject(bytes, "utf-8", "strict");
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPy_DecodeASCII(PyObject *bytes) {
|
||||
if (PyBytes_CheckExact(bytes)) {
|
||||
char *buffer = PyBytes_AsString(bytes); // Borrowed reference
|
||||
if (buffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
Py_ssize_t size = PyBytes_Size(bytes);
|
||||
return PyUnicode_DecodeASCII(buffer, size, "strict");;
|
||||
} else {
|
||||
return PyUnicode_FromEncodedObject(bytes, "ascii", "strict");
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPy_DecodeLatin1(PyObject *bytes) {
|
||||
if (PyBytes_CheckExact(bytes)) {
|
||||
char *buffer = PyBytes_AsString(bytes); // Borrowed reference
|
||||
if (buffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
Py_ssize_t size = PyBytes_Size(bytes);
|
||||
return PyUnicode_DecodeLatin1(buffer, size, "strict");
|
||||
} else {
|
||||
return PyUnicode_FromEncodedObject(bytes, "latin1", "strict");
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPy_Encode(PyObject *obj, PyObject *encoding, PyObject *errors) {
|
||||
const char *enc = NULL;
|
||||
const char *err = NULL;
|
||||
if (encoding) {
|
||||
enc = PyUnicode_AsUTF8AndSize(encoding, NULL);
|
||||
if (!enc) return NULL;
|
||||
}
|
||||
if (errors) {
|
||||
err = PyUnicode_AsUTF8AndSize(errors, NULL);
|
||||
if (!err) return NULL;
|
||||
}
|
||||
if (PyUnicode_Check(obj)) {
|
||||
return PyUnicode_AsEncodedString(obj, enc, err);
|
||||
} else {
|
||||
PyErr_BadArgument();
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
Py_ssize_t CPyStr_Count(PyObject *unicode, PyObject *substring, CPyTagged start) {
|
||||
Py_ssize_t temp_start = CPyTagged_AsSsize_t(start);
|
||||
if (temp_start == -1 && PyErr_Occurred()) {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return -1;
|
||||
}
|
||||
Py_ssize_t end = PyUnicode_GET_LENGTH(unicode);
|
||||
return PyUnicode_Count(unicode, substring, temp_start, end);
|
||||
}
|
||||
|
||||
Py_ssize_t CPyStr_CountFull(PyObject *unicode, PyObject *substring, CPyTagged start, CPyTagged end) {
|
||||
Py_ssize_t temp_start = CPyTagged_AsSsize_t(start);
|
||||
if (temp_start == -1 && PyErr_Occurred()) {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return -1;
|
||||
}
|
||||
Py_ssize_t temp_end = CPyTagged_AsSsize_t(end);
|
||||
if (temp_end == -1 && PyErr_Occurred()) {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return -1;
|
||||
}
|
||||
return PyUnicode_Count(unicode, substring, temp_start, temp_end);
|
||||
}
|
||||
|
||||
|
||||
CPyTagged CPyStr_Ord(PyObject *obj) {
|
||||
Py_ssize_t s = PyUnicode_GET_LENGTH(obj);
|
||||
if (s == 1) {
|
||||
int kind = PyUnicode_KIND(obj);
|
||||
return PyUnicode_READ(kind, PyUnicode_DATA(obj), 0) << 1;
|
||||
}
|
||||
PyErr_Format(
|
||||
PyExc_TypeError, "ord() expected a character, but a string of length %zd found", s);
|
||||
return CPY_INT_TAG;
|
||||
}
|
||||
|
||||
PyObject *CPyStr_Multiply(PyObject *str, CPyTagged count) {
|
||||
Py_ssize_t temp_count = CPyTagged_AsSsize_t(count);
|
||||
if (temp_count == -1 && PyErr_Occurred()) {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return NULL;
|
||||
}
|
||||
return PySequence_Repeat(str, temp_count);
|
||||
}
|
||||
|
||||
|
||||
bool CPyStr_IsSpace(PyObject *str) {
|
||||
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
|
||||
if (len == 0) return false;
|
||||
|
||||
if (PyUnicode_IS_ASCII(str)) {
|
||||
const Py_UCS1 *data = PyUnicode_1BYTE_DATA(str);
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
if (!_Py_ascii_whitespace[data[i]])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int kind = PyUnicode_KIND(str);
|
||||
const void *data = PyUnicode_DATA(str);
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
||||
if (!Py_UNICODE_ISSPACE(ch))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CPyStr_IsAlnum(PyObject *str) {
|
||||
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
|
||||
if (len == 0) return false;
|
||||
|
||||
if (PyUnicode_IS_ASCII(str)) {
|
||||
const Py_UCS1 *data = PyUnicode_1BYTE_DATA(str);
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
if (!Py_ISALNUM(data[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int kind = PyUnicode_KIND(str);
|
||||
const void *data = PyUnicode_DATA(str);
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
||||
if (!Py_UNICODE_ISALNUM(ch))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int CPy_ASCII_Lower(unsigned char c) { return Py_TOLOWER(c); }
|
||||
static inline int CPy_ASCII_Upper(unsigned char c) { return Py_TOUPPER(c); }
|
||||
|
||||
static inline PyObject *CPyStr_ChangeCase(PyObject *self,
|
||||
int (*ascii_func)(unsigned char),
|
||||
#if CPY_3_13_FEATURES
|
||||
PyObject *method_name
|
||||
#else
|
||||
int (*unicode_func)(Py_UCS4, Py_UCS4 *)
|
||||
#endif
|
||||
) {
|
||||
Py_ssize_t len = PyUnicode_GET_LENGTH(self);
|
||||
if (len == 0) {
|
||||
Py_INCREF(self);
|
||||
return self;
|
||||
}
|
||||
|
||||
// ASCII fast path: 1-to-1, no expansion possible
|
||||
if (PyUnicode_IS_ASCII(self)) {
|
||||
PyObject *res = PyUnicode_New(len, 127);
|
||||
if (res == NULL) return NULL;
|
||||
const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
|
||||
Py_UCS1 *res_data = PyUnicode_1BYTE_DATA(res);
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
res_data[i] = ascii_func(data[i]);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#if CPY_3_13_FEATURES
|
||||
// On 3.13+, _PyUnicode_ToLowerFull/ToUpperFull are no longer exported,
|
||||
// so fall back to CPython's method implementation for non-ASCII strings.
|
||||
return PyObject_CallMethodNoArgs(self, method_name);
|
||||
#else
|
||||
// General Unicode: unicode_func handles 1-to-N expansion.
|
||||
// Worst case: each codepoint expands to 3 (per Unicode standard).
|
||||
// The tmp buffer is short-lived, and PyUnicode_FromKindAndData
|
||||
// compacts the result to the optimal string kind automatically.
|
||||
int kind = PyUnicode_KIND(self);
|
||||
const void *data = PyUnicode_DATA(self);
|
||||
Py_UCS4 *tmp = PyMem_Malloc(sizeof(Py_UCS4) * len * 3);
|
||||
if (tmp == NULL) return PyErr_NoMemory();
|
||||
|
||||
Py_UCS4 mapped[3];
|
||||
Py_ssize_t out_len = 0;
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
int n = unicode_func(PyUnicode_READ(kind, data, i), mapped);
|
||||
for (int j = 0; j < n; j++) {
|
||||
tmp[out_len++] = mapped[j];
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *res = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, tmp, out_len);
|
||||
PyMem_Free(tmp);
|
||||
return res;
|
||||
#endif
|
||||
}
|
||||
|
||||
PyObject *CPyStr_Lower(PyObject *self) {
|
||||
#if CPY_3_13_FEATURES
|
||||
return CPyStr_ChangeCase(self, CPy_ASCII_Lower, mypyc_interned_str.lower);
|
||||
#else
|
||||
return CPyStr_ChangeCase(self, CPy_ASCII_Lower, _PyUnicode_ToLowerFull);
|
||||
#endif
|
||||
}
|
||||
|
||||
PyObject *CPyStr_Upper(PyObject *self) {
|
||||
#if CPY_3_13_FEATURES
|
||||
return CPyStr_ChangeCase(self, CPy_ASCII_Upper, mypyc_interned_str.upper);
|
||||
#else
|
||||
return CPyStr_ChangeCase(self, CPy_ASCII_Upper, _PyUnicode_ToUpperFull);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool CPyStr_IsDigit(PyObject *str) {
|
||||
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
|
||||
if (len == 0) return false;
|
||||
|
||||
#define CHECK_ISDIGIT(TYPE, DATA, CHECK) \
|
||||
{ \
|
||||
const TYPE *data = (const TYPE *)(DATA); \
|
||||
for (Py_ssize_t i = 0; i < len; i++) { \
|
||||
if (!CHECK(data[i])) \
|
||||
return false; \
|
||||
} \
|
||||
}
|
||||
|
||||
// ASCII fast path
|
||||
if (PyUnicode_IS_ASCII(str)) {
|
||||
CHECK_ISDIGIT(Py_UCS1, PyUnicode_1BYTE_DATA(str), Py_ISDIGIT);
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (PyUnicode_KIND(str)) {
|
||||
case PyUnicode_1BYTE_KIND:
|
||||
CHECK_ISDIGIT(Py_UCS1, PyUnicode_1BYTE_DATA(str), Py_UNICODE_ISDIGIT);
|
||||
break;
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
CHECK_ISDIGIT(Py_UCS2, PyUnicode_2BYTE_DATA(str), Py_UNICODE_ISDIGIT);
|
||||
break;
|
||||
case PyUnicode_4BYTE_KIND:
|
||||
CHECK_ISDIGIT(Py_UCS4, PyUnicode_4BYTE_DATA(str), Py_UNICODE_ISDIGIT);
|
||||
break;
|
||||
default:
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
return true;
|
||||
|
||||
#undef CHECK_ISDIGIT
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,102 @@
|
|||
#ifndef LIBRT_STRINGS_H
|
||||
#define LIBRT_STRINGS_H
|
||||
|
||||
#ifndef MYPYC_EXPERIMENTAL
|
||||
|
||||
static int
|
||||
import_librt_strings(void)
|
||||
{
|
||||
// All librt.base64 features are experimental for now, so don't set up the API here
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else // MYPYC_EXPERIMENTAL
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <Python.h>
|
||||
#include "librt_strings_common.h"
|
||||
|
||||
// ABI version -- only an exact match is compatible. This will only be changed in
|
||||
// very exceptional cases (likely never) due to strict backward compatibility
|
||||
// requirements.
|
||||
#define LIBRT_STRINGS_ABI_VERSION 1
|
||||
|
||||
// API version -- more recent versions must maintain backward compatibility, i.e.
|
||||
// we can add new features but not remove or change existing features (unless
|
||||
// ABI version is changed, but see the comment above).
|
||||
#define LIBRT_STRINGS_API_VERSION 4
|
||||
|
||||
// Number of functions in the capsule API. If you add a new function, also increase
|
||||
// LIBRT_STRINGS_API_VERSION.
|
||||
#define LIBRT_STRINGS_API_LEN 14
|
||||
|
||||
static void *LibRTStrings_API[LIBRT_STRINGS_API_LEN];
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
char *buf; // Beginning of the buffer
|
||||
char kind; // Bytes per code point (1, 2 or 4)
|
||||
Py_ssize_t len; // Current length (number of code points written)
|
||||
Py_ssize_t capacity; // Total capacity of the buffer (number of code points)
|
||||
char data[WRITER_EMBEDDED_BUF_LEN]; // Default buffer
|
||||
} StringWriterObject;
|
||||
|
||||
#define LibRTStrings_ABIVersion (*(int (*)(void)) LibRTStrings_API[0])
|
||||
#define LibRTStrings_APIVersion (*(int (*)(void)) LibRTStrings_API[1])
|
||||
#define LibRTStrings_BytesWriter_internal (*(PyObject* (*)(void)) LibRTStrings_API[2])
|
||||
#define LibRTStrings_BytesWriter_getvalue_internal (*(PyObject* (*)(PyObject *source)) LibRTStrings_API[3])
|
||||
#define LibRTStrings_BytesWriter_append_internal (*(char (*)(PyObject *source, uint8_t value)) LibRTStrings_API[4])
|
||||
#define LibRTStrings_ByteWriter_grow_buffer_internal (*(bool (*)(BytesWriterObject *obj, Py_ssize_t size)) LibRTStrings_API[5])
|
||||
#define LibRTStrings_BytesWriter_type_internal (*(PyTypeObject* (*)(void)) LibRTStrings_API[6])
|
||||
#define LibRTStrings_BytesWriter_truncate_internal (*(char (*)(PyObject *self, int64_t size)) LibRTStrings_API[7])
|
||||
#define LibRTStrings_StringWriter_internal (*(PyObject* (*)(void)) LibRTStrings_API[8])
|
||||
#define LibRTStrings_StringWriter_getvalue_internal (*(PyObject* (*)(PyObject *source)) LibRTStrings_API[9])
|
||||
#define LibRTStrings_string_append_slow_path (*(char (*)(StringWriterObject *obj, int32_t value)) LibRTStrings_API[10])
|
||||
#define LibRTStrings_StringWriter_type_internal (*(PyTypeObject* (*)(void)) LibRTStrings_API[11])
|
||||
#define LibRTStrings_StringWriter_write_internal (*(char (*)(PyObject *source, PyObject *value)) LibRTStrings_API[12])
|
||||
#define LibRTStrings_grow_string_buffer (*(bool (*)(StringWriterObject *obj, Py_ssize_t n)) LibRTStrings_API[13])
|
||||
|
||||
static int
|
||||
import_librt_strings(void)
|
||||
{
|
||||
PyObject *mod = PyImport_ImportModule("librt.strings");
|
||||
if (mod == NULL)
|
||||
return -1;
|
||||
Py_DECREF(mod); // we import just for the side effect of making the below work.
|
||||
void *capsule = PyCapsule_Import("librt.strings._C_API", 0);
|
||||
if (capsule == NULL)
|
||||
return -1;
|
||||
memcpy(LibRTStrings_API, capsule, sizeof(LibRTStrings_API));
|
||||
if (LibRTStrings_ABIVersion() != LIBRT_STRINGS_ABI_VERSION) {
|
||||
char err[128];
|
||||
snprintf(err, sizeof(err), "ABI version conflict for librt.strings, expected %d, found %d",
|
||||
LIBRT_STRINGS_ABI_VERSION,
|
||||
LibRTStrings_ABIVersion()
|
||||
);
|
||||
PyErr_SetString(PyExc_ValueError, err);
|
||||
return -1;
|
||||
}
|
||||
if (LibRTStrings_APIVersion() < LIBRT_STRINGS_API_VERSION) {
|
||||
char err[128];
|
||||
snprintf(err, sizeof(err),
|
||||
"API version conflict for librt.strings, expected %d or newer, found %d (hint: upgrade librt)",
|
||||
LIBRT_STRINGS_API_VERSION,
|
||||
LibRTStrings_APIVersion()
|
||||
);
|
||||
PyErr_SetString(PyExc_ValueError, err);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool CPyBytesWriter_Check(PyObject *obj) {
|
||||
return Py_TYPE(obj) == LibRTStrings_BytesWriter_type_internal();
|
||||
}
|
||||
|
||||
static inline bool CPyStringWriter_Check(PyObject *obj) {
|
||||
return Py_TYPE(obj) == LibRTStrings_StringWriter_type_internal();
|
||||
}
|
||||
|
||||
#endif // MYPYC_EXPERIMENTAL
|
||||
|
||||
#endif // LIBRT_STRINGS_H
|
||||
|
|
@ -0,0 +1,352 @@
|
|||
#ifndef LIBRT_STRINGS_COMMON_H
|
||||
#define LIBRT_STRINGS_COMMON_H
|
||||
|
||||
#include <Python.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
// Byte-swap functions for endianness conversion (needed for both LE and BE operations)
|
||||
#if defined(_MSC_VER)
|
||||
# include <stdlib.h>
|
||||
# define BSWAP16(x) _byteswap_ushort(x)
|
||||
# define BSWAP32(x) _byteswap_ulong(x)
|
||||
# define BSWAP64(x) _byteswap_uint64(x)
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
# define BSWAP16(x) __builtin_bswap16(x)
|
||||
# define BSWAP32(x) __builtin_bswap32(x)
|
||||
# define BSWAP64(x) __builtin_bswap64(x)
|
||||
#else
|
||||
// Fallback for other compilers (slower but portable)
|
||||
static inline uint16_t BSWAP16(uint16_t x) {
|
||||
return (uint16_t)((x >> 8) | (x << 8));
|
||||
}
|
||||
static inline uint32_t BSWAP32(uint32_t x) {
|
||||
return ((x >> 24) & 0xFFU) |
|
||||
((x >> 8) & 0xFF00U) |
|
||||
((x << 8) & 0xFF0000U) |
|
||||
((x << 24) & 0xFF000000U);
|
||||
}
|
||||
static inline uint64_t BSWAP64(uint64_t x) {
|
||||
return ((x >> 56) & 0xFFULL) |
|
||||
((x >> 40) & 0xFF00ULL) |
|
||||
((x >> 24) & 0xFF0000ULL) |
|
||||
((x >> 8) & 0xFF000000ULL) |
|
||||
((x << 8) & 0xFF00000000ULL) |
|
||||
((x << 24) & 0xFF0000000000ULL) |
|
||||
((x << 40) & 0xFF000000000000ULL) |
|
||||
((x << 56) & 0xFF00000000000000ULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Length of the default buffer embedded directly in a BytesWriter object
|
||||
#define WRITER_EMBEDDED_BUF_LEN 256
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
char *buf; // Beginning of the buffer
|
||||
Py_ssize_t len; // Current length (number of bytes written)
|
||||
Py_ssize_t capacity; // Total capacity of the buffer
|
||||
char data[WRITER_EMBEDDED_BUF_LEN]; // Default buffer
|
||||
} BytesWriterObject;
|
||||
|
||||
// Write a 16-bit signed integer in little-endian format to BytesWriter.
|
||||
// NOTE: This does NOT check buffer capacity - caller must ensure space is available.
|
||||
static inline void
|
||||
BytesWriter_WriteI16LEUnsafe(BytesWriterObject *self, int16_t value) {
|
||||
// memcpy is reliably optimized to a single store by GCC, Clang, and MSVC
|
||||
#if PY_BIG_ENDIAN
|
||||
uint16_t swapped = BSWAP16((uint16_t)value);
|
||||
memcpy(self->buf + self->len, &swapped, 2);
|
||||
#else
|
||||
memcpy(self->buf + self->len, &value, 2);
|
||||
#endif
|
||||
self->len += 2;
|
||||
}
|
||||
|
||||
// Write a 16-bit signed integer in big-endian format to BytesWriter.
|
||||
// NOTE: This does NOT check buffer capacity - caller must ensure space is available.
|
||||
static inline void
|
||||
BytesWriter_WriteI16BEUnsafe(BytesWriterObject *self, int16_t value) {
|
||||
// memcpy is reliably optimized to a single store by GCC, Clang, and MSVC
|
||||
#if PY_BIG_ENDIAN
|
||||
memcpy(self->buf + self->len, &value, 2);
|
||||
#else
|
||||
uint16_t swapped = BSWAP16((uint16_t)value);
|
||||
memcpy(self->buf + self->len, &swapped, 2);
|
||||
#endif
|
||||
self->len += 2;
|
||||
}
|
||||
|
||||
// Read a 16-bit signed integer in little-endian format from bytes.
|
||||
// NOTE: This does NOT check bounds - caller must ensure valid index.
|
||||
static inline int16_t
|
||||
CPyBytes_ReadI16LEUnsafe(const unsigned char *data) {
|
||||
// memcpy is reliably optimized to a single load by GCC, Clang, and MSVC
|
||||
uint16_t value;
|
||||
memcpy(&value, data, 2);
|
||||
#if PY_BIG_ENDIAN
|
||||
value = BSWAP16(value);
|
||||
#endif
|
||||
return (int16_t)value;
|
||||
}
|
||||
|
||||
// Read a 16-bit signed integer in big-endian format from bytes.
|
||||
// NOTE: This does NOT check bounds - caller must ensure valid index.
|
||||
static inline int16_t
|
||||
CPyBytes_ReadI16BEUnsafe(const unsigned char *data) {
|
||||
// memcpy is reliably optimized to a single load by GCC, Clang, and MSVC
|
||||
uint16_t value;
|
||||
memcpy(&value, data, 2);
|
||||
#if PY_BIG_ENDIAN
|
||||
// Already in big-endian format, no swap needed
|
||||
#else
|
||||
value = BSWAP16(value);
|
||||
#endif
|
||||
return (int16_t)value;
|
||||
}
|
||||
|
||||
// Write a 32-bit signed integer in little-endian format to BytesWriter.
|
||||
// NOTE: This does NOT check buffer capacity - caller must ensure space is available.
|
||||
static inline void
|
||||
BytesWriter_WriteI32LEUnsafe(BytesWriterObject *self, int32_t value) {
|
||||
// memcpy is reliably optimized to a single store by GCC, Clang, and MSVC
|
||||
#if PY_BIG_ENDIAN
|
||||
uint32_t swapped = BSWAP32((uint32_t)value);
|
||||
memcpy(self->buf + self->len, &swapped, 4);
|
||||
#else
|
||||
memcpy(self->buf + self->len, &value, 4);
|
||||
#endif
|
||||
self->len += 4;
|
||||
}
|
||||
|
||||
// Write a 32-bit signed integer in big-endian format to BytesWriter.
|
||||
// NOTE: This does NOT check buffer capacity - caller must ensure space is available.
|
||||
static inline void
|
||||
BytesWriter_WriteI32BEUnsafe(BytesWriterObject *self, int32_t value) {
|
||||
// memcpy is reliably optimized to a single store by GCC, Clang, and MSVC
|
||||
#if PY_BIG_ENDIAN
|
||||
memcpy(self->buf + self->len, &value, 4);
|
||||
#else
|
||||
uint32_t swapped = BSWAP32((uint32_t)value);
|
||||
memcpy(self->buf + self->len, &swapped, 4);
|
||||
#endif
|
||||
self->len += 4;
|
||||
}
|
||||
|
||||
// Read a 32-bit signed integer in little-endian format from bytes.
|
||||
// NOTE: This does NOT check bounds - caller must ensure valid index.
|
||||
static inline int32_t
|
||||
CPyBytes_ReadI32LEUnsafe(const unsigned char *data) {
|
||||
// memcpy is reliably optimized to a single load by GCC, Clang, and MSVC
|
||||
uint32_t value;
|
||||
memcpy(&value, data, 4);
|
||||
#if PY_BIG_ENDIAN
|
||||
value = BSWAP32(value);
|
||||
#endif
|
||||
return (int32_t)value;
|
||||
}
|
||||
|
||||
// Read a 32-bit signed integer in big-endian format from bytes.
|
||||
// NOTE: This does NOT check bounds - caller must ensure valid index.
|
||||
static inline int32_t
|
||||
CPyBytes_ReadI32BEUnsafe(const unsigned char *data) {
|
||||
// memcpy is reliably optimized to a single load by GCC, Clang, and MSVC
|
||||
uint32_t value;
|
||||
memcpy(&value, data, 4);
|
||||
#if PY_BIG_ENDIAN
|
||||
// Already in big-endian format, no swap needed
|
||||
#else
|
||||
value = BSWAP32(value);
|
||||
#endif
|
||||
return (int32_t)value;
|
||||
}
|
||||
|
||||
// Write a 64-bit signed integer in little-endian format to BytesWriter.
|
||||
// NOTE: This does NOT check buffer capacity - caller must ensure space is available.
|
||||
static inline void
|
||||
BytesWriter_WriteI64LEUnsafe(BytesWriterObject *self, int64_t value) {
|
||||
// memcpy is reliably optimized to a single store by GCC, Clang, and MSVC
|
||||
#if PY_BIG_ENDIAN
|
||||
uint64_t swapped = BSWAP64((uint64_t)value);
|
||||
memcpy(self->buf + self->len, &swapped, 8);
|
||||
#else
|
||||
memcpy(self->buf + self->len, &value, 8);
|
||||
#endif
|
||||
self->len += 8;
|
||||
}
|
||||
|
||||
// Write a 64-bit signed integer in big-endian format to BytesWriter.
|
||||
// NOTE: This does NOT check buffer capacity - caller must ensure space is available.
|
||||
static inline void
|
||||
BytesWriter_WriteI64BEUnsafe(BytesWriterObject *self, int64_t value) {
|
||||
// memcpy is reliably optimized to a single store by GCC, Clang, and MSVC
|
||||
#if PY_BIG_ENDIAN
|
||||
memcpy(self->buf + self->len, &value, 8);
|
||||
#else
|
||||
uint64_t swapped = BSWAP64((uint64_t)value);
|
||||
memcpy(self->buf + self->len, &swapped, 8);
|
||||
#endif
|
||||
self->len += 8;
|
||||
}
|
||||
|
||||
// Read a 64-bit signed integer in little-endian format from bytes.
|
||||
// NOTE: This does NOT check bounds - caller must ensure valid index.
|
||||
static inline int64_t
|
||||
CPyBytes_ReadI64LEUnsafe(const unsigned char *data) {
|
||||
// memcpy is reliably optimized to a single load by GCC, Clang, and MSVC
|
||||
uint64_t value;
|
||||
memcpy(&value, data, 8);
|
||||
#if PY_BIG_ENDIAN
|
||||
value = BSWAP64(value);
|
||||
#endif
|
||||
return (int64_t)value;
|
||||
}
|
||||
|
||||
// Read a 64-bit signed integer in big-endian format from bytes.
|
||||
// NOTE: This does NOT check bounds - caller must ensure valid index.
|
||||
static inline int64_t
|
||||
CPyBytes_ReadI64BEUnsafe(const unsigned char *data) {
|
||||
// memcpy is reliably optimized to a single load by GCC, Clang, and MSVC
|
||||
uint64_t value;
|
||||
memcpy(&value, data, 8);
|
||||
#if PY_BIG_ENDIAN
|
||||
// Already in big-endian format, no swap needed
|
||||
#else
|
||||
value = BSWAP64(value);
|
||||
#endif
|
||||
return (int64_t)value;
|
||||
}
|
||||
|
||||
// Write a 32-bit float in little-endian format to BytesWriter.
|
||||
// NOTE: This does NOT check buffer capacity - caller must ensure space is available.
|
||||
static inline void
|
||||
BytesWriter_WriteF32LEUnsafe(BytesWriterObject *self, float value) {
|
||||
// memcpy is reliably optimized to a single store by GCC, Clang, and MSVC
|
||||
#if PY_BIG_ENDIAN
|
||||
uint32_t bits;
|
||||
memcpy(&bits, &value, 4);
|
||||
bits = BSWAP32(bits);
|
||||
memcpy(self->buf + self->len, &bits, 4);
|
||||
#else
|
||||
memcpy(self->buf + self->len, &value, 4);
|
||||
#endif
|
||||
self->len += 4;
|
||||
}
|
||||
|
||||
// Write a 32-bit float in big-endian format to BytesWriter.
|
||||
// NOTE: This does NOT check buffer capacity - caller must ensure space is available.
|
||||
static inline void
|
||||
BytesWriter_WriteF32BEUnsafe(BytesWriterObject *self, float value) {
|
||||
// memcpy is reliably optimized to a single store by GCC, Clang, and MSVC
|
||||
#if PY_BIG_ENDIAN
|
||||
memcpy(self->buf + self->len, &value, 4);
|
||||
#else
|
||||
uint32_t bits;
|
||||
memcpy(&bits, &value, 4);
|
||||
bits = BSWAP32(bits);
|
||||
memcpy(self->buf + self->len, &bits, 4);
|
||||
#endif
|
||||
self->len += 4;
|
||||
}
|
||||
|
||||
// Read a 32-bit float in little-endian format from bytes.
|
||||
// NOTE: This does NOT check bounds - caller must ensure valid index.
|
||||
static inline float
|
||||
CPyBytes_ReadF32LEUnsafe(const unsigned char *data) {
|
||||
// memcpy is reliably optimized to a single load by GCC, Clang, and MSVC
|
||||
float value;
|
||||
#if PY_BIG_ENDIAN
|
||||
uint32_t bits;
|
||||
memcpy(&bits, data, 4);
|
||||
bits = BSWAP32(bits);
|
||||
memcpy(&value, &bits, 4);
|
||||
#else
|
||||
memcpy(&value, data, 4);
|
||||
#endif
|
||||
return value;
|
||||
}
|
||||
|
||||
// Read a 32-bit float in big-endian format from bytes.
|
||||
// NOTE: This does NOT check bounds - caller must ensure valid index.
|
||||
static inline float
|
||||
CPyBytes_ReadF32BEUnsafe(const unsigned char *data) {
|
||||
// memcpy is reliably optimized to a single load by GCC, Clang, and MSVC
|
||||
float value;
|
||||
#if PY_BIG_ENDIAN
|
||||
memcpy(&value, data, 4);
|
||||
#else
|
||||
uint32_t bits;
|
||||
memcpy(&bits, data, 4);
|
||||
bits = BSWAP32(bits);
|
||||
memcpy(&value, &bits, 4);
|
||||
#endif
|
||||
return value;
|
||||
}
|
||||
|
||||
// Write a 64-bit float (double) in little-endian format to BytesWriter.
|
||||
// NOTE: This does NOT check buffer capacity - caller must ensure space is available.
|
||||
static inline void
|
||||
BytesWriter_WriteF64LEUnsafe(BytesWriterObject *self, double value) {
|
||||
// memcpy is reliably optimized to a single store by GCC, Clang, and MSVC
|
||||
#if PY_BIG_ENDIAN
|
||||
uint64_t bits;
|
||||
memcpy(&bits, &value, 8);
|
||||
bits = BSWAP64(bits);
|
||||
memcpy(self->buf + self->len, &bits, 8);
|
||||
#else
|
||||
memcpy(self->buf + self->len, &value, 8);
|
||||
#endif
|
||||
self->len += 8;
|
||||
}
|
||||
|
||||
// Write a 64-bit float (double) in big-endian format to BytesWriter.
|
||||
// NOTE: This does NOT check buffer capacity - caller must ensure space is available.
|
||||
static inline void
|
||||
BytesWriter_WriteF64BEUnsafe(BytesWriterObject *self, double value) {
|
||||
// memcpy is reliably optimized to a single store by GCC, Clang, and MSVC
|
||||
#if PY_BIG_ENDIAN
|
||||
memcpy(self->buf + self->len, &value, 8);
|
||||
#else
|
||||
uint64_t bits;
|
||||
memcpy(&bits, &value, 8);
|
||||
bits = BSWAP64(bits);
|
||||
memcpy(self->buf + self->len, &bits, 8);
|
||||
#endif
|
||||
self->len += 8;
|
||||
}
|
||||
|
||||
// Read a 64-bit float (double) in little-endian format from bytes.
|
||||
// NOTE: This does NOT check bounds - caller must ensure valid index.
|
||||
static inline double
|
||||
CPyBytes_ReadF64LEUnsafe(const unsigned char *data) {
|
||||
// memcpy is reliably optimized to a single load by GCC, Clang, and MSVC
|
||||
double value;
|
||||
#if PY_BIG_ENDIAN
|
||||
uint64_t bits;
|
||||
memcpy(&bits, data, 8);
|
||||
bits = BSWAP64(bits);
|
||||
memcpy(&value, &bits, 8);
|
||||
#else
|
||||
memcpy(&value, data, 8);
|
||||
#endif
|
||||
return value;
|
||||
}
|
||||
|
||||
// Read a 64-bit float (double) in big-endian format from bytes.
|
||||
// NOTE: This does NOT check bounds - caller must ensure valid index.
|
||||
static inline double
|
||||
CPyBytes_ReadF64BEUnsafe(const unsigned char *data) {
|
||||
// memcpy is reliably optimized to a single load by GCC, Clang, and MSVC
|
||||
double value;
|
||||
#if PY_BIG_ENDIAN
|
||||
memcpy(&value, data, 8);
|
||||
#else
|
||||
uint64_t bits;
|
||||
memcpy(&bits, data, 8);
|
||||
bits = BSWAP64(bits);
|
||||
memcpy(&value, &bits, 8);
|
||||
#endif
|
||||
return value;
|
||||
}
|
||||
|
||||
#endif // LIBRT_STRINGS_COMMON_H
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
// Primitives related to librt.strings.StringWriter that get linked statically
|
||||
// with compiled modules, instead of being called via a capsule.
|
||||
|
||||
#include "stringwriter_extra_ops.h"
|
||||
|
||||
#ifdef MYPYC_EXPERIMENTAL
|
||||
|
||||
// All StringWriter operations are currently implemented as inline functions
|
||||
// in stringwriter_extra_ops.h, or use the exported capsule API directly.
|
||||
|
||||
#endif // MYPYC_EXPERIMENTAL
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
#ifndef STRINGWRITER_EXTRA_OPS_H
|
||||
#define STRINGWRITER_EXTRA_OPS_H
|
||||
|
||||
#ifdef MYPYC_EXPERIMENTAL
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <Python.h>
|
||||
|
||||
#include "mypyc_util.h"
|
||||
#include "strings/librt_strings.h"
|
||||
|
||||
static inline CPyTagged
|
||||
CPyStringWriter_Len(PyObject *obj) {
|
||||
return (CPyTagged)((StringWriterObject *)obj)->len << 1;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
CPyStringWriter_EnsureSize(StringWriterObject *data, Py_ssize_t n) {
|
||||
if (likely(data->capacity - data->len >= n)) {
|
||||
return true;
|
||||
} else {
|
||||
return LibRTStrings_grow_string_buffer(data, n);
|
||||
}
|
||||
}
|
||||
|
||||
static inline char
|
||||
CPyStringWriter_Append(PyObject *obj, int32_t value) {
|
||||
StringWriterObject *self = (StringWriterObject *)obj;
|
||||
char kind = self->kind;
|
||||
|
||||
// Fast path: kind 1 (ASCII/Latin-1) with character < 256
|
||||
if (kind == 1 && (uint32_t)value < 256) {
|
||||
// Store length in local variable to enable additional optimizations
|
||||
Py_ssize_t len = self->len;
|
||||
if (!CPyStringWriter_EnsureSize(self, 1))
|
||||
return CPY_NONE_ERROR;
|
||||
self->buf[len] = (char)value;
|
||||
self->len = len + 1;
|
||||
return CPY_NONE;
|
||||
}
|
||||
|
||||
// Slow path: handles kind switching and other cases
|
||||
return LibRTStrings_string_append_slow_path(self, value);
|
||||
}
|
||||
|
||||
// If index is negative, convert to non-negative index (no range checking)
|
||||
static inline int64_t CPyStringWriter_AdjustIndex(PyObject *obj, int64_t index) {
|
||||
if (index < 0) {
|
||||
return index + ((StringWriterObject *)obj)->len;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
static inline bool CPyStringWriter_RangeCheck(PyObject *obj, int64_t index) {
|
||||
return index >= 0 && index < ((StringWriterObject *)obj)->len;
|
||||
}
|
||||
|
||||
static inline int32_t CPyStringWriter_GetItem(PyObject *obj, int64_t index) {
|
||||
StringWriterObject *self = (StringWriterObject *)obj;
|
||||
char kind = self->kind;
|
||||
char *buf = self->buf;
|
||||
|
||||
if (kind == 1) {
|
||||
return (uint8_t)buf[index];
|
||||
} else if (kind == 2) {
|
||||
uint16_t val;
|
||||
memcpy(&val, buf + index * 2, 2);
|
||||
return (int32_t)val;
|
||||
} else {
|
||||
uint32_t val;
|
||||
memcpy(&val, buf + index * 4, 4);
|
||||
return (int32_t)val;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // MYPYC_EXPERIMENTAL
|
||||
|
||||
#endif
|
||||
140
venv/lib/python3.11/site-packages/mypyc/lib-rt/time/librt_time.c
Normal file
140
venv/lib/python3.11/site-packages/mypyc/lib-rt/time/librt_time.c
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include <time.h>
|
||||
#include <stdint.h>
|
||||
#include "librt_time.h"
|
||||
#include "pythoncapi_compat.h"
|
||||
#include "mypyc_util.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
#ifdef MYPYC_EXPERIMENTAL
|
||||
|
||||
// Internal function that returns a C double for mypyc primitives
|
||||
// Returns high-precision time in seconds (like time.time())
|
||||
static double
|
||||
time_time_internal(void) {
|
||||
#ifdef _WIN32
|
||||
// Windows: Use GetSystemTimePreciseAsFileTime for ~100ns precision
|
||||
FILETIME ft;
|
||||
ULARGE_INTEGER large;
|
||||
|
||||
GetSystemTimePreciseAsFileTime(&ft);
|
||||
large.LowPart = ft.dwLowDateTime;
|
||||
large.HighPart = ft.dwHighDateTime;
|
||||
|
||||
// Windows FILETIME is 100-nanosecond intervals since January 1, 1601
|
||||
// 116444736000000000 = number of 100-ns intervals between 1601 and 1970
|
||||
// Convert directly to seconds: 100ns * 1e-9 = 1e-7
|
||||
int64_t intervals = large.QuadPart - 116444736000000000LL;
|
||||
return (double)intervals * 1e-7;
|
||||
|
||||
#else // Unix-like systems (Linux, macOS, BSD, etc.)
|
||||
|
||||
// Try clock_gettime(CLOCK_REALTIME) for nanosecond precision
|
||||
// This is available on POSIX.1-2001 and later (widely available on modern systems)
|
||||
#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0
|
||||
struct timespec ts;
|
||||
if (clock_gettime(CLOCK_REALTIME, &ts) == 0) {
|
||||
// Convert seconds and nanoseconds separately to avoid large integer operations
|
||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1e-9;
|
||||
}
|
||||
// Fall through to gettimeofday if clock_gettime failed
|
||||
#endif
|
||||
|
||||
// Fallback: gettimeofday for microsecond precision
|
||||
// This is widely available (POSIX.1-2001, BSD, etc.)
|
||||
struct timeval tv;
|
||||
if (unlikely(gettimeofday(&tv, NULL) != 0)) {
|
||||
PyErr_SetFromErrno(PyExc_OSError);
|
||||
return CPY_FLOAT_ERROR;
|
||||
}
|
||||
|
||||
// Convert seconds and microseconds separately to avoid large integer operations
|
||||
return (double)tv.tv_sec + (double)tv.tv_usec * 1e-6;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Wrapper function for normal Python extension usage
|
||||
static PyObject*
|
||||
time_time(PyObject *self, PyObject *const *args, size_t nargs) {
|
||||
if (nargs != 0) {
|
||||
PyErr_SetString(PyExc_TypeError, "time() takes no arguments");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
double result = time_time_internal();
|
||||
if (result == CPY_FLOAT_ERROR) {
|
||||
return NULL;
|
||||
}
|
||||
return PyFloat_FromDouble(result);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static PyMethodDef librt_time_module_methods[] = {
|
||||
#ifdef MYPYC_EXPERIMENTAL
|
||||
{"time", (PyCFunction)time_time, METH_FASTCALL,
|
||||
PyDoc_STR("Return the current time in seconds since the Unix epoch as a floating point number.")},
|
||||
#endif
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
||||
#ifdef MYPYC_EXPERIMENTAL
|
||||
|
||||
static int
|
||||
time_abi_version(void) {
|
||||
return LIBRT_TIME_ABI_VERSION;
|
||||
}
|
||||
|
||||
static int
|
||||
time_api_version(void) {
|
||||
return LIBRT_TIME_API_VERSION;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int
|
||||
librt_time_module_exec(PyObject *m)
|
||||
{
|
||||
#ifdef MYPYC_EXPERIMENTAL
|
||||
// Export mypyc internal C API via capsule
|
||||
static void *time_api[LIBRT_TIME_API_LEN] = {
|
||||
(void *)time_abi_version,
|
||||
(void *)time_api_version,
|
||||
(void *)time_time_internal,
|
||||
};
|
||||
PyObject *c_api_object = PyCapsule_New((void *)time_api, "librt.time._C_API", NULL);
|
||||
if (PyModule_Add(m, "_C_API", c_api_object) < 0) {
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static PyModuleDef_Slot librt_time_module_slots[] = {
|
||||
{Py_mod_exec, librt_time_module_exec},
|
||||
#ifdef Py_MOD_GIL_NOT_USED
|
||||
{Py_mod_gil, Py_MOD_GIL_NOT_USED},
|
||||
#endif
|
||||
{0, NULL}
|
||||
};
|
||||
|
||||
static PyModuleDef librt_time_module = {
|
||||
.m_base = PyModuleDef_HEAD_INIT,
|
||||
.m_name = "time",
|
||||
.m_doc = "Fast time() function optimized for mypyc",
|
||||
.m_size = 0,
|
||||
.m_methods = librt_time_module_methods,
|
||||
.m_slots = librt_time_module_slots,
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC
|
||||
PyInit_time(void)
|
||||
{
|
||||
return PyModuleDef_Init(&librt_time_module);
|
||||
}
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
#ifndef LIBRT_TIME_H
|
||||
#define LIBRT_TIME_H
|
||||
|
||||
#ifndef MYPYC_EXPERIMENTAL
|
||||
|
||||
static int
|
||||
import_librt_time(void)
|
||||
{
|
||||
// All librt.time features are experimental for now, so don't set up the API here
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else // MYPYC_EXPERIMENTAL
|
||||
|
||||
#include <Python.h>
|
||||
|
||||
#define LIBRT_TIME_ABI_VERSION 1
|
||||
#define LIBRT_TIME_API_VERSION 1
|
||||
#define LIBRT_TIME_API_LEN 3
|
||||
|
||||
static void *LibRTTime_API[LIBRT_TIME_API_LEN];
|
||||
|
||||
#define LibRTTime_ABIVersion (*(int (*)(void)) LibRTTime_API[0])
|
||||
#define LibRTTime_APIVersion (*(int (*)(void)) LibRTTime_API[1])
|
||||
#define LibRTTime_time (*(double (*)(void)) LibRTTime_API[2])
|
||||
|
||||
static int
|
||||
import_librt_time(void)
|
||||
{
|
||||
PyObject *mod = PyImport_ImportModule("librt.time");
|
||||
if (mod == NULL)
|
||||
return -1;
|
||||
Py_DECREF(mod); // we import just for the side effect of making the below work.
|
||||
void *capsule = PyCapsule_Import("librt.time._C_API", 0);
|
||||
if (capsule == NULL)
|
||||
return -1;
|
||||
memcpy(LibRTTime_API, capsule, sizeof(LibRTTime_API));
|
||||
if (LibRTTime_ABIVersion() != LIBRT_TIME_ABI_VERSION) {
|
||||
char err[128];
|
||||
snprintf(err, sizeof(err), "ABI version conflict for librt.time, expected %d, found %d",
|
||||
LIBRT_TIME_ABI_VERSION,
|
||||
LibRTTime_ABIVersion()
|
||||
);
|
||||
PyErr_SetString(PyExc_ValueError, err);
|
||||
return -1;
|
||||
}
|
||||
if (LibRTTime_APIVersion() < LIBRT_TIME_API_VERSION) {
|
||||
char err[128];
|
||||
snprintf(err, sizeof(err),
|
||||
"API version conflict for librt.time, expected %d or newer, found %d (hint: upgrade librt)",
|
||||
LIBRT_TIME_API_VERSION,
|
||||
LibRTTime_APIVersion()
|
||||
);
|
||||
PyErr_SetString(PyExc_ValueError, err);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // MYPYC_EXPERIMENTAL
|
||||
|
||||
#endif // LIBRT_TIME_H
|
||||
62
venv/lib/python3.11/site-packages/mypyc/lib-rt/tuple_ops.c
Normal file
62
venv/lib/python3.11/site-packages/mypyc/lib-rt/tuple_ops.c
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
// Tuple primitive operations
|
||||
//
|
||||
// These are registered in mypyc.primitives.tuple_ops.
|
||||
|
||||
#include <Python.h>
|
||||
#include "CPy.h"
|
||||
|
||||
PyObject *CPySequenceTuple_GetItem(PyObject *tuple, CPyTagged index) {
|
||||
if (CPyTagged_CheckShort(index)) {
|
||||
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
|
||||
Py_ssize_t size = PyTuple_GET_SIZE(tuple);
|
||||
if (n >= 0) {
|
||||
if (n >= size) {
|
||||
PyErr_SetString(PyExc_IndexError, "tuple index out of range");
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
n += size;
|
||||
if (n < 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "tuple index out of range");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
PyObject *result = PyTuple_GET_ITEM(tuple, n);
|
||||
Py_INCREF(result);
|
||||
return result;
|
||||
} else {
|
||||
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *CPySequenceTuple_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end) {
|
||||
if (likely(PyTuple_CheckExact(obj)
|
||||
&& CPyTagged_CheckShort(start) && CPyTagged_CheckShort(end))) {
|
||||
Py_ssize_t startn = CPyTagged_ShortAsSsize_t(start);
|
||||
Py_ssize_t endn = CPyTagged_ShortAsSsize_t(end);
|
||||
if (startn < 0) {
|
||||
startn += PyTuple_GET_SIZE(obj);
|
||||
}
|
||||
if (endn < 0) {
|
||||
endn += PyTuple_GET_SIZE(obj);
|
||||
}
|
||||
return PyTuple_GetSlice(obj, startn, endn);
|
||||
}
|
||||
return CPyObject_GetSlice(obj, start, end);
|
||||
}
|
||||
|
||||
// No error checking
|
||||
PyObject *CPySequenceTuple_GetItemUnsafe(PyObject *tuple, Py_ssize_t index)
|
||||
{
|
||||
PyObject *result = PyTuple_GET_ITEM(tuple, index);
|
||||
Py_INCREF(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// PyTuple_SET_ITEM does no error checking,
|
||||
// and should only be used to fill in brand new tuples.
|
||||
void CPySequenceTuple_SetItemUnsafe(PyObject *tuple, Py_ssize_t index, PyObject *value)
|
||||
{
|
||||
PyTuple_SET_ITEM(tuple, index, value);
|
||||
}
|
||||
1026
venv/lib/python3.11/site-packages/mypyc/lib-rt/vecs/librt_vecs.c
Normal file
1026
venv/lib/python3.11/site-packages/mypyc/lib-rt/vecs/librt_vecs.c
Normal file
File diff suppressed because it is too large
Load diff
873
venv/lib/python3.11/site-packages/mypyc/lib-rt/vecs/librt_vecs.h
Normal file
873
venv/lib/python3.11/site-packages/mypyc/lib-rt/vecs/librt_vecs.h
Normal file
|
|
@ -0,0 +1,873 @@
|
|||
#ifndef VEC_H_INCL
|
||||
#define VEC_H_INCL
|
||||
|
||||
// Header for the implementation of librt.vecs, which defines the 'vec' type.
|
||||
// Refer to librt_vecs.c for more detailed information.
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifndef MYPYC_EXPERIMENTAL
|
||||
|
||||
static int
|
||||
import_librt_vecs(void)
|
||||
{
|
||||
// All librt.vecs features are experimental for now, so don't set up the API here
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else // MYPYC_EXPERIMENTAL
|
||||
|
||||
// Magic (native) integer return value on exception. Caller must also
|
||||
// use PyErr_Occurred() since this overlaps with valid integer values.
|
||||
#define MYPYC_INT_ERROR -113
|
||||
|
||||
// Item type constants for supported packed/specialized item types; must be
|
||||
// even but not a multiple of 4 (2 + 4 * n). Each of these has a corresponding
|
||||
// distinct implementation C extension class. For example, vec[i64] has a
|
||||
// different runtime type than vec[i32]. All other item types use generic
|
||||
// implementations.
|
||||
#define VEC_ITEM_TYPE_I64 2
|
||||
#define VEC_ITEM_TYPE_I32 6
|
||||
#define VEC_ITEM_TYPE_I16 10
|
||||
#define VEC_ITEM_TYPE_U8 14
|
||||
#define VEC_ITEM_TYPE_FLOAT 18
|
||||
#define VEC_ITEM_TYPE_BOOL 22
|
||||
|
||||
static inline size_t Vec_IsMagicItemType(size_t item_type) {
|
||||
return item_type & 2;
|
||||
}
|
||||
|
||||
|
||||
// Buffer objects
|
||||
|
||||
|
||||
// vecbuf[i64]
|
||||
typedef struct _VecI64BufObject {
|
||||
PyObject_VAR_HEAD
|
||||
int64_t items[1];
|
||||
} VecI64BufObject;
|
||||
|
||||
// vecbuf[i32]
|
||||
typedef struct _VecI32BufObject {
|
||||
PyObject_VAR_HEAD
|
||||
int32_t items[1];
|
||||
} VecI32BufObject;
|
||||
|
||||
// vecbuf[i16]
|
||||
typedef struct _VecI16BufObject {
|
||||
PyObject_VAR_HEAD
|
||||
int16_t items[1];
|
||||
} VecI16BufObject;
|
||||
|
||||
// vecbuf[u8]
|
||||
typedef struct _VecU8BufObject {
|
||||
PyObject_VAR_HEAD
|
||||
uint8_t items[1];
|
||||
} VecU8BufObject;
|
||||
|
||||
// vecbuf[float]
|
||||
typedef struct _VecFloatBufObject {
|
||||
PyObject_VAR_HEAD
|
||||
double items[1];
|
||||
} VecFloatBufObject;
|
||||
|
||||
// vecbuf[bool]
|
||||
typedef struct _VecBoolBufObject {
|
||||
PyObject_VAR_HEAD
|
||||
char items[1];
|
||||
} VecBoolBufObject;
|
||||
|
||||
// Simple generic vecbuf: vecbuf[t] when t is a type object
|
||||
typedef struct _VecTBufObject {
|
||||
PyObject_VAR_HEAD
|
||||
// Tagged pointer to PyTypeObject *. The lowest bit is 1 for optional item type.
|
||||
size_t item_type;
|
||||
PyObject *items[1];
|
||||
} VecTBufObject;
|
||||
|
||||
typedef struct _VecNestedBufItem {
|
||||
Py_ssize_t len;
|
||||
PyObject *buf;
|
||||
} VecNestedBufItem;
|
||||
|
||||
// Nested vec type: vec[vec[...]], vec[vec[...] | None], etc.
|
||||
typedef struct _VecNestedBufObject {
|
||||
PyObject_VAR_HEAD
|
||||
// Tagged pointer to PyTypeObject *. Lowest bit is set for optional item type.
|
||||
// The second lowest bit is set for a packed item type (VEC_ITEM_TYPE_*).
|
||||
size_t item_type;
|
||||
// Number of nested vec types (of any kind, at least 1)
|
||||
size_t depth;
|
||||
VecNestedBufItem items[1];
|
||||
} VecNestedBufObject;
|
||||
|
||||
|
||||
// Unboxed vec objects
|
||||
|
||||
|
||||
typedef struct _VecI64 {
|
||||
Py_ssize_t len;
|
||||
VecI64BufObject *buf;
|
||||
} VecI64;
|
||||
|
||||
typedef struct _VecI32 {
|
||||
Py_ssize_t len;
|
||||
VecI32BufObject *buf;
|
||||
} VecI32;
|
||||
|
||||
typedef struct _VecI16 {
|
||||
Py_ssize_t len;
|
||||
VecI16BufObject *buf;
|
||||
} VecI16;
|
||||
|
||||
typedef struct _VecU8 {
|
||||
Py_ssize_t len;
|
||||
VecU8BufObject *buf;
|
||||
} VecU8;
|
||||
|
||||
typedef struct _VecFloat {
|
||||
Py_ssize_t len;
|
||||
VecFloatBufObject *buf;
|
||||
} VecFloat;
|
||||
|
||||
typedef struct _VecBool {
|
||||
Py_ssize_t len;
|
||||
VecBoolBufObject *buf;
|
||||
} VecBool;
|
||||
|
||||
typedef struct _VecT {
|
||||
Py_ssize_t len;
|
||||
VecTBufObject *buf;
|
||||
} VecT;
|
||||
|
||||
typedef struct _VecNested {
|
||||
Py_ssize_t len;
|
||||
VecNestedBufObject *buf;
|
||||
} VecNested;
|
||||
|
||||
|
||||
// Boxed vec objects
|
||||
|
||||
|
||||
// Arbitrary boxed vec object (only shared bits)
|
||||
typedef struct _VecObject {
|
||||
PyObject_HEAD
|
||||
Py_ssize_t len;
|
||||
} VecObject;
|
||||
|
||||
// Base vec type object (for isinstance checks)
|
||||
// This is an abstract base type that all specialized vec types inherit from.
|
||||
// It cannot be instantiated directly - only used for isinstance(x, vec).
|
||||
typedef struct _VecBaseObject {
|
||||
PyObject_HEAD
|
||||
} VecBaseObject;
|
||||
|
||||
// Boxed vec[i64]
|
||||
typedef struct _VecI64Object {
|
||||
PyObject_HEAD
|
||||
VecI64 vec;
|
||||
} VecI64Object;
|
||||
|
||||
// Boxed vec[i32]
|
||||
typedef struct _VecI32Object {
|
||||
PyObject_HEAD
|
||||
VecI32 vec;
|
||||
} VecI32Object;
|
||||
|
||||
// Boxed vec[i16]
|
||||
typedef struct _VecI16Object {
|
||||
PyObject_HEAD
|
||||
VecI16 vec;
|
||||
} VecI16Object;
|
||||
|
||||
// Boxed vec[u8]
|
||||
typedef struct _VecU8Object {
|
||||
PyObject_HEAD
|
||||
VecU8 vec;
|
||||
} VecU8Object;
|
||||
|
||||
// Boxed vec[float]
|
||||
typedef struct _VecFloatObject {
|
||||
PyObject_HEAD
|
||||
VecFloat vec;
|
||||
} VecFloatObject;
|
||||
|
||||
// Boxed vec[bool]
|
||||
typedef struct _VecBoolObject {
|
||||
PyObject_HEAD
|
||||
VecBool vec;
|
||||
} VecBoolObject;
|
||||
|
||||
// Simple boxed generic vecbuf: vecbuf[t] when t is a type object
|
||||
typedef struct _VecTObject {
|
||||
PyObject_HEAD
|
||||
VecT vec;
|
||||
} VecTObject;
|
||||
|
||||
// Extended generic vec type: vec[t | None], vec[vec[...]], etc.
|
||||
typedef struct _VecNestedObject {
|
||||
PyObject_HEAD
|
||||
VecNested vec;
|
||||
} VecNestedObject;
|
||||
|
||||
|
||||
#ifndef MYPYC_DECLARED_tuple_T2V88
|
||||
#define MYPYC_DECLARED_tuple_T2V88
|
||||
typedef struct tuple_T2V88 {
|
||||
VecI64 f0;
|
||||
int64_t f1;
|
||||
} tuple_T2V88;
|
||||
static tuple_T2V88 tuple_undefined_T2V88 = { { -1, NULL } , 0 };
|
||||
#endif
|
||||
|
||||
#ifndef MYPYC_DECLARED_tuple_T2V44
|
||||
#define MYPYC_DECLARED_tuple_T2V44
|
||||
typedef struct tuple_T2V44 {
|
||||
VecI32 f0;
|
||||
int32_t f1;
|
||||
} tuple_T2V44;
|
||||
static tuple_T2V44 tuple_undefined_T2V44 = { { -1, NULL } , 0 };
|
||||
#endif
|
||||
|
||||
#ifndef MYPYC_DECLARED_tuple_T2V22
|
||||
#define MYPYC_DECLARED_tuple_T2V22
|
||||
typedef struct tuple_T2V22 {
|
||||
VecI16 f0;
|
||||
int16_t f1;
|
||||
} tuple_T2V22;
|
||||
static tuple_T2V22 tuple_undefined_T2V22 = { { -1, NULL } , 0 };
|
||||
#endif
|
||||
|
||||
#ifndef MYPYC_DECLARED_tuple_T2VU1U1
|
||||
#define MYPYC_DECLARED_tuple_T2VU1U1
|
||||
typedef struct tuple_T2VU1U1 {
|
||||
VecU8 f0;
|
||||
uint8_t f1;
|
||||
} tuple_T2VU1U1;
|
||||
static tuple_T2VU1U1 tuple_undefined_T2VU1U1 = { { -1, NULL } , 0 };
|
||||
#endif
|
||||
|
||||
#ifndef MYPYC_DECLARED_tuple_T2VFF
|
||||
#define MYPYC_DECLARED_tuple_T2VFF
|
||||
typedef struct tuple_T2VFF {
|
||||
VecFloat f0;
|
||||
double f1;
|
||||
} tuple_T2VFF;
|
||||
static tuple_T2VFF tuple_undefined_T2VFF = { { -1, NULL } , 0.0 };
|
||||
#endif
|
||||
|
||||
#ifndef MYPYC_DECLARED_tuple_T2VCC
|
||||
#define MYPYC_DECLARED_tuple_T2VCC
|
||||
typedef struct tuple_T2VCC {
|
||||
VecBool f0;
|
||||
char f1;
|
||||
} tuple_T2VCC;
|
||||
static tuple_T2VCC tuple_undefined_T2VCC = { { -1, NULL } , 0 };
|
||||
#endif
|
||||
|
||||
typedef tuple_T2V88 VecI64PopResult;
|
||||
typedef tuple_T2V44 VecI32PopResult;
|
||||
typedef tuple_T2V22 VecI16PopResult;
|
||||
typedef tuple_T2VU1U1 VecU8PopResult;
|
||||
typedef tuple_T2VFF VecFloatPopResult;
|
||||
typedef tuple_T2VCC VecBoolPopResult;
|
||||
|
||||
// vec[i64] operations + type objects (stored in a capsule)
|
||||
typedef struct _VecI64API {
|
||||
PyTypeObject *boxed_type;
|
||||
PyTypeObject *buf_type;
|
||||
VecI64 (*alloc)(Py_ssize_t, Py_ssize_t);
|
||||
PyObject *(*box)(VecI64);
|
||||
VecI64 (*unbox)(PyObject *);
|
||||
VecI64 (*convert_from_nested)(VecNestedBufItem);
|
||||
VecI64 (*append)(VecI64, int64_t);
|
||||
VecI64PopResult (*pop)(VecI64, Py_ssize_t);
|
||||
VecI64 (*remove)(VecI64, int64_t);
|
||||
// TODO: Py_ssize_t
|
||||
VecI64 (*slice)(VecI64, int64_t, int64_t);
|
||||
// PyObject *(*extend)(PyObject *, PyObject *);
|
||||
// PyObject *(*concat)(PyObject *, PyObject *);
|
||||
// bool (*contains)(PyObject *, int64_t);
|
||||
// iter?
|
||||
} VecI64API;
|
||||
|
||||
// vec[i32] operations + type objects (stored in a capsule)
|
||||
typedef struct _VecI32API {
|
||||
PyTypeObject *boxed_type;
|
||||
PyTypeObject *buf_type;
|
||||
VecI32 (*alloc)(Py_ssize_t, Py_ssize_t);
|
||||
PyObject *(*box)(VecI32);
|
||||
VecI32 (*unbox)(PyObject *);
|
||||
VecI32 (*convert_from_nested)(VecNestedBufItem);
|
||||
VecI32 (*append)(VecI32, int32_t);
|
||||
VecI32PopResult (*pop)(VecI32, Py_ssize_t);
|
||||
VecI32 (*remove)(VecI32, int32_t);
|
||||
// TODO: Py_ssize_t
|
||||
VecI32 (*slice)(VecI32, int64_t, int64_t);
|
||||
// PyObject *(*extend)(PyObject *, PyObject *);
|
||||
// PyObject *(*concat)(PyObject *, PyObject *);
|
||||
// bool (*contains)(PyObject *, int32_t);
|
||||
// iter?
|
||||
} VecI32API;
|
||||
|
||||
// vec[i16] operations + type objects (stored in a capsule)
|
||||
typedef struct _VecI16API {
|
||||
PyTypeObject *boxed_type;
|
||||
PyTypeObject *buf_type;
|
||||
VecI16 (*alloc)(Py_ssize_t, Py_ssize_t);
|
||||
PyObject *(*box)(VecI16);
|
||||
VecI16 (*unbox)(PyObject *);
|
||||
VecI16 (*convert_from_nested)(VecNestedBufItem);
|
||||
VecI16 (*append)(VecI16, int16_t);
|
||||
VecI16PopResult (*pop)(VecI16, Py_ssize_t);
|
||||
VecI16 (*remove)(VecI16, int16_t);
|
||||
// TODO: Py_ssize_t
|
||||
VecI16 (*slice)(VecI16, int64_t, int64_t);
|
||||
// PyObject *(*extend)(PyObject *, PyObject *);
|
||||
// PyObject *(*concat)(PyObject *, PyObject *);
|
||||
// bool (*contains)(PyObject *, int16_t);
|
||||
// iter?
|
||||
} VecI16API;
|
||||
|
||||
// vec[u8] operations + type objects (stored in a capsule)
|
||||
typedef struct _VecU8API {
|
||||
PyTypeObject *boxed_type;
|
||||
PyTypeObject *buf_type;
|
||||
VecU8 (*alloc)(Py_ssize_t, Py_ssize_t);
|
||||
PyObject *(*box)(VecU8);
|
||||
VecU8 (*unbox)(PyObject *);
|
||||
VecU8 (*convert_from_nested)(VecNestedBufItem);
|
||||
VecU8 (*append)(VecU8, uint8_t);
|
||||
VecU8PopResult (*pop)(VecU8, Py_ssize_t);
|
||||
VecU8 (*remove)(VecU8, uint8_t);
|
||||
// TODO: Py_ssize_t
|
||||
VecU8 (*slice)(VecU8, int64_t, int64_t);
|
||||
// PyObject *(*extend)(PyObject *, PyObject *);
|
||||
// PyObject *(*concat)(PyObject *, PyObject *);
|
||||
// bool (*contains)(PyObject *, uint8_t);
|
||||
// iter?
|
||||
} VecU8API;
|
||||
|
||||
// vec[float] operations + type objects (stored in a capsule)
|
||||
typedef struct _VecFloatAPI {
|
||||
PyTypeObject *boxed_type;
|
||||
PyTypeObject *buf_type;
|
||||
VecFloat (*alloc)(Py_ssize_t, Py_ssize_t);
|
||||
PyObject *(*box)(VecFloat);
|
||||
VecFloat (*unbox)(PyObject *);
|
||||
VecFloat (*convert_from_nested)(VecNestedBufItem);
|
||||
VecFloat (*append)(VecFloat, double);
|
||||
VecFloatPopResult (*pop)(VecFloat, Py_ssize_t);
|
||||
VecFloat (*remove)(VecFloat, double);
|
||||
// TODO: Py_ssize_t
|
||||
VecFloat (*slice)(VecFloat, int64_t, int64_t);
|
||||
// PyObject *(*extend)(PyObject *, PyObject *);
|
||||
// PyObject *(*concat)(PyObject *, PyObject *);
|
||||
// bool (*contains)(PyObject *, double);
|
||||
// iter?
|
||||
} VecFloatAPI;
|
||||
|
||||
// vec[bool] operations + type objects (stored in a capsule)
|
||||
typedef struct _VecBoolAPI {
|
||||
PyTypeObject *boxed_type;
|
||||
PyTypeObject *buf_type;
|
||||
VecBool (*alloc)(Py_ssize_t, Py_ssize_t);
|
||||
PyObject *(*box)(VecBool);
|
||||
VecBool (*unbox)(PyObject *);
|
||||
VecBool (*convert_from_nested)(VecNestedBufItem);
|
||||
VecBool (*append)(VecBool, char);
|
||||
VecBoolPopResult (*pop)(VecBool, Py_ssize_t);
|
||||
VecBool (*remove)(VecBool, char);
|
||||
// TODO: Py_ssize_t
|
||||
VecBool (*slice)(VecBool, int64_t, int64_t);
|
||||
// PyObject *(*extend)(PyObject *, PyObject *);
|
||||
// PyObject *(*concat)(PyObject *, PyObject *);
|
||||
// bool (*contains)(PyObject *, char);
|
||||
// iter?
|
||||
} VecBoolAPI;
|
||||
|
||||
#ifndef MYPYC_DECLARED_tuple_T2VOO
|
||||
#define MYPYC_DECLARED_tuple_T2VOO
|
||||
typedef struct tuple_T2VOO {
|
||||
VecT f0;
|
||||
PyObject *f1;
|
||||
} tuple_T2VOO;
|
||||
static tuple_T2VOO tuple_undefined_T2VOO = { { -1, NULL } , NULL };
|
||||
#endif
|
||||
|
||||
typedef tuple_T2VOO VecTPopResult;
|
||||
|
||||
// vec[T] operations + type objects (stored in a capsule)
|
||||
//
|
||||
// T is a class type or class type | None
|
||||
typedef struct _VecTAPI {
|
||||
PyTypeObject *boxed_type;
|
||||
PyTypeObject *buf_type;
|
||||
VecT (*alloc)(Py_ssize_t, Py_ssize_t, size_t);
|
||||
PyObject *(*box)(VecT, size_t);
|
||||
VecT (*unbox)(PyObject *, size_t);
|
||||
VecT (*convert_from_nested)(VecNestedBufItem);
|
||||
VecT (*append)(VecT, PyObject *, size_t);
|
||||
VecTPopResult (*pop)(VecT, Py_ssize_t);
|
||||
VecT (*remove)(VecT, PyObject *);
|
||||
// TODO: Py_ssize_t
|
||||
VecT (*slice)(VecT, int64_t, int64_t);
|
||||
// PyObject *(*extend)(PyObject *, PyObject *);
|
||||
// PyObject *(*concat)(PyObject *, PyObject *);
|
||||
// bool (*contains)(PyObject *, PyObject *);
|
||||
// iter?
|
||||
} VecTAPI;
|
||||
|
||||
|
||||
#ifndef MYPYC_DECLARED_tuple_T2VvVi
|
||||
#define MYPYC_DECLARED_tuple_T2VvVi
|
||||
typedef struct tuple_T2VvVi {
|
||||
VecNested f0;
|
||||
VecNestedBufItem f1;
|
||||
} tuple_T2VvVi;
|
||||
static tuple_T2VvVi tuple_undefined_T2VvVi = { { -1, NULL } , { -1, NULL } };
|
||||
#endif
|
||||
|
||||
typedef tuple_T2VvVi VecNestedPopResult;
|
||||
|
||||
// Nested vec operations + type objects (stored in a capsule)
|
||||
typedef struct _VecNestedAPI {
|
||||
PyTypeObject *boxed_type;
|
||||
PyTypeObject *buf_type;
|
||||
VecNested (*alloc)(Py_ssize_t, Py_ssize_t, size_t, size_t depth);
|
||||
PyObject *(*box)(VecNested);
|
||||
VecNested (*unbox)(PyObject *, size_t, size_t depth);
|
||||
VecNested (*convert_from_nested)(VecNestedBufItem);
|
||||
VecNested (*append)(VecNested, VecNestedBufItem);
|
||||
VecNestedPopResult (*pop)(VecNested, Py_ssize_t);
|
||||
VecNested (*remove)(VecNested, VecNestedBufItem);
|
||||
// TODO: Py_ssize_t
|
||||
VecNested (*slice)(VecNested, int64_t, int64_t);
|
||||
// PyObject *(*extend)(PyObject *, PyObject *);
|
||||
// PyObject *(*concat)(PyObject *, PyObject *);
|
||||
// bool (*contains)(PyObject *, PyObject *);
|
||||
// iter?
|
||||
} VecNestedAPI;
|
||||
|
||||
typedef struct {
|
||||
VecTAPI *t;
|
||||
VecNestedAPI *nested;
|
||||
VecI64API *i64;
|
||||
VecI32API *i32;
|
||||
VecI16API *i16;
|
||||
VecU8API *u8;
|
||||
VecFloatAPI *float_;
|
||||
VecBoolAPI *bool_;
|
||||
PyTypeObject *(*get_vec_type)(void); // Function to get base VecType for isinstance checks
|
||||
} VecCapsule;
|
||||
|
||||
#define VEC_BUF_SIZE(b) ((b)->ob_base.ob_size)
|
||||
#define VEC_ITEM_TYPE(t) ((PyTypeObject *)((t) & ~1))
|
||||
#define VEC_BUF_ITEM_TYPE(b) VEC_ITEM_TYPE((b)->item_type)
|
||||
#define VEC_CAP(v) ((v).buf->ob_base.ob_size)
|
||||
#define VEC_IS_ERROR(v) ((v).len < 0)
|
||||
#define VEC_DECREF(v) Py_XDECREF((v).buf)
|
||||
#define VEC_INCREF(v) Py_XINCREF((v).buf)
|
||||
|
||||
// Type objects
|
||||
|
||||
// Buffer type objects that store vec items
|
||||
extern PyTypeObject VecI64BufType;
|
||||
extern PyTypeObject VecI32BufType;
|
||||
extern PyTypeObject VecI16BufType;
|
||||
extern PyTypeObject VecU8BufType;
|
||||
extern PyTypeObject VecFloatBufType;
|
||||
extern PyTypeObject VecBoolBufType;
|
||||
extern PyTypeObject VecTBufType;
|
||||
extern PyTypeObject VecNestedBufType;
|
||||
|
||||
// Wrapper type objects for boxed vec values
|
||||
extern PyTypeObject VecI64Type;
|
||||
extern PyTypeObject VecI32Type;
|
||||
extern PyTypeObject VecI16Type;
|
||||
extern PyTypeObject VecU8Type;
|
||||
extern PyTypeObject VecFloatType;
|
||||
extern PyTypeObject VecBoolType;
|
||||
extern PyTypeObject VecTType;
|
||||
extern PyTypeObject VecNestedType;
|
||||
|
||||
// Type objects corresponding to the 'i64', 'i32', 'i16, and 'u8' types
|
||||
extern PyTypeObject *LibRTVecs_I64TypeObj;
|
||||
extern PyTypeObject *LibRTVecs_I32TypeObj;
|
||||
extern PyTypeObject *LibRTVecs_I16TypeObj;
|
||||
extern PyTypeObject *LibRTVecs_U8TypeObj;
|
||||
|
||||
extern VecI64API Vec_I64API;
|
||||
extern VecI32API Vec_I32API;
|
||||
extern VecI16API Vec_I16API;
|
||||
extern VecU8API Vec_U8API;
|
||||
extern VecFloatAPI Vec_FloatAPI;
|
||||
extern VecBoolAPI Vec_BoolAPI;
|
||||
extern VecTAPI Vec_TAPI;
|
||||
extern VecNestedAPI Vec_NestedAPI;
|
||||
|
||||
static inline int Vec_CheckFloatError(PyObject *o) {
|
||||
if (PyFloat_Check(o)) {
|
||||
PyErr_SetString(PyExc_TypeError, "integer argument expected, got float");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// vec[i64] operations
|
||||
|
||||
static inline int VecI64_Check(PyObject *o) {
|
||||
return o->ob_type == &VecI64Type;
|
||||
}
|
||||
|
||||
static inline PyObject *VecI64_BoxItem(int64_t x) {
|
||||
return PyLong_FromLongLong(x);
|
||||
}
|
||||
|
||||
static inline int64_t VecI64_UnboxItem(PyObject *o) {
|
||||
if (Vec_CheckFloatError(o))
|
||||
return -1;
|
||||
return PyLong_AsLongLong(o);
|
||||
}
|
||||
|
||||
static inline int VecI64_IsUnboxError(int64_t x) {
|
||||
return x == -1 && PyErr_Occurred();
|
||||
}
|
||||
|
||||
PyObject *VecI64_Box(VecI64);
|
||||
VecI64 VecI64_Append(VecI64, int64_t x);
|
||||
VecI64 VecI64_Remove(VecI64, int64_t x);
|
||||
VecI64PopResult VecI64_Pop(VecI64 v, Py_ssize_t index);
|
||||
|
||||
// vec[i32] operations
|
||||
|
||||
static inline int VecI32_Check(PyObject *o) {
|
||||
return o->ob_type == &VecI32Type;
|
||||
}
|
||||
|
||||
static inline PyObject *VecI32_BoxItem(int32_t x) {
|
||||
return PyLong_FromLongLong(x);
|
||||
}
|
||||
|
||||
static inline int32_t VecI32_UnboxItem(PyObject *o) {
|
||||
if (Vec_CheckFloatError(o))
|
||||
return -1;
|
||||
long x = PyLong_AsLong(o);
|
||||
if (x > INT32_MAX || x < INT32_MIN) {
|
||||
PyErr_SetString(PyExc_OverflowError, "Python int too large to convert to i32");
|
||||
return -1;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
static inline int VecI32_IsUnboxError(int32_t x) {
|
||||
return x == -1 && PyErr_Occurred();
|
||||
}
|
||||
|
||||
PyObject *VecI32_Box(VecI32);
|
||||
VecI32 VecI32_Append(VecI32, int32_t x);
|
||||
VecI32 VecI32_Remove(VecI32, int32_t x);
|
||||
VecI32PopResult VecI32_Pop(VecI32 v, Py_ssize_t index);
|
||||
|
||||
// vec[i16] operations
|
||||
|
||||
static inline int VecI16_Check(PyObject *o) {
|
||||
return o->ob_type == &VecI16Type;
|
||||
}
|
||||
|
||||
static inline PyObject *VecI16_BoxItem(int16_t x) {
|
||||
return PyLong_FromLongLong(x);
|
||||
}
|
||||
|
||||
static inline int16_t VecI16_UnboxItem(PyObject *o) {
|
||||
if (Vec_CheckFloatError(o))
|
||||
return -1;
|
||||
long x = PyLong_AsLong(o);
|
||||
if (x >= 32768 || x < -32768) {
|
||||
PyErr_SetString(PyExc_OverflowError, "Python int too large to convert to i16");
|
||||
return -1;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
static inline int VecI16_IsUnboxError(int16_t x) {
|
||||
return x == -1 && PyErr_Occurred();
|
||||
}
|
||||
|
||||
PyObject *VecI16_Box(VecI16);
|
||||
VecI16 VecI16_Append(VecI16, int16_t x);
|
||||
VecI16 VecI16_Remove(VecI16, int16_t x);
|
||||
VecI16PopResult VecI16_Pop(VecI16 v, Py_ssize_t index);
|
||||
|
||||
// vec[u8] operations
|
||||
|
||||
static inline int VecU8_Check(PyObject *o) {
|
||||
return o->ob_type == &VecU8Type;
|
||||
}
|
||||
|
||||
static inline PyObject *VecU8_BoxItem(uint8_t x) {
|
||||
return PyLong_FromUnsignedLong(x);
|
||||
}
|
||||
|
||||
static inline uint8_t VecU8_UnboxItem(PyObject *o) {
|
||||
if (Vec_CheckFloatError(o))
|
||||
return -1;
|
||||
unsigned long x = PyLong_AsUnsignedLong(o);
|
||||
if (x <= 255)
|
||||
return x;
|
||||
else if (x == (unsigned long)-1)
|
||||
return 239;
|
||||
else {
|
||||
PyErr_SetString(PyExc_OverflowError, "Python int too large to convert to u8");
|
||||
return 239;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int VecU8_IsUnboxError(uint8_t x) {
|
||||
return x == 239 && PyErr_Occurred();
|
||||
}
|
||||
|
||||
PyObject *VecU8_Box(VecU8);
|
||||
VecU8 VecU8_Append(VecU8, uint8_t x);
|
||||
VecU8 VecU8_Remove(VecU8, uint8_t x);
|
||||
VecU8PopResult VecU8_Pop(VecU8 v, Py_ssize_t index);
|
||||
|
||||
// vec[float] operations
|
||||
|
||||
static inline int VecFloat_Check(PyObject *o) {
|
||||
return o->ob_type == &VecFloatType;
|
||||
}
|
||||
|
||||
static inline PyObject *VecFloat_BoxItem(double x) {
|
||||
return PyFloat_FromDouble(x);
|
||||
}
|
||||
|
||||
static inline double VecFloat_UnboxItem(PyObject *o) {
|
||||
return PyFloat_AsDouble(o);
|
||||
}
|
||||
|
||||
static inline int VecFloat_IsUnboxError(double x) {
|
||||
return x == -1.0 && PyErr_Occurred();
|
||||
}
|
||||
|
||||
PyObject *VecFloat_Box(VecFloat);
|
||||
VecFloat VecFloat_Append(VecFloat, double x);
|
||||
VecFloat VecFloat_Remove(VecFloat, double x);
|
||||
VecFloatPopResult VecFloat_Pop(VecFloat v, Py_ssize_t index);
|
||||
|
||||
// vec[bool] operations
|
||||
|
||||
static inline int VecBool_Check(PyObject *o) {
|
||||
return o->ob_type == &VecBoolType;
|
||||
}
|
||||
|
||||
static inline PyObject *VecBool_BoxItem(char x) {
|
||||
if (x == 1) {
|
||||
Py_INCREF(Py_True);
|
||||
return Py_True;
|
||||
} else {
|
||||
Py_INCREF(Py_False);
|
||||
return Py_False;
|
||||
}
|
||||
}
|
||||
|
||||
static inline char VecBool_UnboxItem(PyObject *o) {
|
||||
if (o == Py_False) {
|
||||
return 0;
|
||||
} else if (o == Py_True) {
|
||||
return 1;
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError, "bool value expected");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int VecBool_IsUnboxError(char x) {
|
||||
return x == 2;
|
||||
}
|
||||
|
||||
PyObject *VecBool_Box(VecBool);
|
||||
VecBool VecBool_Append(VecBool, char x);
|
||||
VecBool VecBool_Remove(VecBool, char x);
|
||||
VecBoolPopResult VecBool_Pop(VecBool v, Py_ssize_t index);
|
||||
|
||||
// vec[t] operations
|
||||
|
||||
static inline int VecT_Check(PyObject *o) {
|
||||
return o->ob_type == &VecTType;
|
||||
}
|
||||
|
||||
static inline int VecT_ItemCheck(VecT v, PyObject *item, size_t item_type) {
|
||||
if (PyObject_TypeCheck(item, VEC_ITEM_TYPE(item_type))) {
|
||||
return 1;
|
||||
} else if ((item_type & 1) && item == Py_None) {
|
||||
return 1;
|
||||
} else {
|
||||
// TODO: better error message
|
||||
PyErr_SetString(PyExc_TypeError, "invalid item type");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
VecT VecT_New(Py_ssize_t size, Py_ssize_t cap, size_t item_type);
|
||||
PyObject *VecT_FromIterable(size_t item_type, PyObject *iterable);
|
||||
PyObject *VecT_Box(VecT vec, size_t item_type);
|
||||
VecT VecT_Append(VecT vec, PyObject *x, size_t item_type);
|
||||
VecT VecT_Remove(VecT vec, PyObject *x);
|
||||
VecTPopResult VecT_Pop(VecT v, Py_ssize_t index);
|
||||
|
||||
// Nested vec operations
|
||||
|
||||
static inline int VecNested_Check(PyObject *o) {
|
||||
return o->ob_type == &VecNestedType;
|
||||
}
|
||||
|
||||
VecNested VecNested_New(Py_ssize_t size, Py_ssize_t cap, size_t item_type, size_t depth);
|
||||
PyObject *VecNested_FromIterable(size_t item_type, size_t depth, PyObject *iterable);
|
||||
PyObject *VecNested_Box(VecNested);
|
||||
VecNested VecNested_Append(VecNested vec, VecNestedBufItem x);
|
||||
VecNested VecNested_Remove(VecNested vec, VecNestedBufItem x);
|
||||
VecNestedPopResult VecNested_Pop(VecNested v, Py_ssize_t index);
|
||||
|
||||
// Return 0 on success, -1 on error. Store unboxed item in *unboxed if successful.
|
||||
// Return a *borrowed* reference.
|
||||
static inline int VecNested_UnboxItem(VecNested v, PyObject *item, VecNestedBufItem *unboxed) {
|
||||
size_t depth = v.buf->depth;
|
||||
if (depth == 1) {
|
||||
if (item->ob_type == &VecTType) {
|
||||
VecNestedObject *o = (VecNestedObject *)item;
|
||||
if (o->vec.buf->item_type == v.buf->item_type) {
|
||||
unboxed->len = o->vec.len;
|
||||
unboxed->buf = (PyObject *)o->vec.buf;
|
||||
return 0;
|
||||
}
|
||||
} else if (item->ob_type == &VecI64Type && v.buf->item_type == VEC_ITEM_TYPE_I64) {
|
||||
VecI64Object *o = (VecI64Object *)item;
|
||||
unboxed->len = o->vec.len;
|
||||
unboxed->buf = (PyObject *)o->vec.buf;
|
||||
return 0;
|
||||
} else if (item->ob_type == &VecU8Type && v.buf->item_type == VEC_ITEM_TYPE_U8) {
|
||||
VecU8Object *o = (VecU8Object *)item;
|
||||
unboxed->len = o->vec.len;
|
||||
unboxed->buf = (PyObject *)o->vec.buf;
|
||||
return 0;
|
||||
} else if (item->ob_type == &VecFloatType && v.buf->item_type == VEC_ITEM_TYPE_FLOAT) {
|
||||
VecFloatObject *o = (VecFloatObject *)item;
|
||||
unboxed->len = o->vec.len;
|
||||
unboxed->buf = (PyObject *)o->vec.buf;
|
||||
return 0;
|
||||
} else if (item->ob_type == &VecI32Type && v.buf->item_type == VEC_ITEM_TYPE_I32) {
|
||||
VecI32Object *o = (VecI32Object *)item;
|
||||
unboxed->len = o->vec.len;
|
||||
unboxed->buf = (PyObject *)o->vec.buf;
|
||||
return 0;
|
||||
} else if (item->ob_type == &VecI16Type && v.buf->item_type == VEC_ITEM_TYPE_I16) {
|
||||
VecI16Object *o = (VecI16Object *)item;
|
||||
unboxed->len = o->vec.len;
|
||||
unboxed->buf = (PyObject *)o->vec.buf;
|
||||
return 0;
|
||||
} else if (item->ob_type == &VecBoolType && v.buf->item_type == VEC_ITEM_TYPE_BOOL) {
|
||||
VecBoolObject *o = (VecBoolObject *)item;
|
||||
unboxed->len = o->vec.len;
|
||||
unboxed->buf = (PyObject *)o->vec.buf;
|
||||
return 0;
|
||||
}
|
||||
} else if (item->ob_type == &VecNestedType) {
|
||||
VecNestedObject *o = (VecNestedObject *)item;
|
||||
if (o->vec.buf->depth == v.buf->depth - 1
|
||||
&& o->vec.buf->item_type == v.buf->item_type) {
|
||||
unboxed->len = o->vec.len;
|
||||
unboxed->buf = (PyObject *)o->vec.buf;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
// TODO: better error message
|
||||
PyErr_SetString(PyExc_TypeError, "invalid item type");
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline PyObject *VecNested_BoxItem(VecNested v, VecNestedBufItem item) {
|
||||
if (item.len < 0)
|
||||
Py_RETURN_NONE;
|
||||
Py_XINCREF(item.buf);
|
||||
if (v.buf->depth > 1) {
|
||||
// Item is a nested vec
|
||||
VecNested v = { .len = item.len, .buf = (VecNestedBufObject *)item.buf };
|
||||
return VecNested_Box(v);
|
||||
} else {
|
||||
// Item is a non-nested vec
|
||||
size_t item_type = v.buf->item_type;
|
||||
if (item_type == VEC_ITEM_TYPE_I64) {
|
||||
VecI64 v = { .len = item.len, .buf = (VecI64BufObject *)item.buf };
|
||||
return VecI64_Box(v);
|
||||
} else if (item_type == VEC_ITEM_TYPE_U8) {
|
||||
VecU8 v = { .len = item.len, .buf = (VecU8BufObject *)item.buf };
|
||||
return VecU8_Box(v);
|
||||
} else if (item_type == VEC_ITEM_TYPE_FLOAT) {
|
||||
VecFloat v = { .len = item.len, .buf = (VecFloatBufObject *)item.buf };
|
||||
return VecFloat_Box(v);
|
||||
} else if (item_type == VEC_ITEM_TYPE_I32) {
|
||||
VecI32 v = { .len = item.len, .buf = (VecI32BufObject *)item.buf };
|
||||
return VecI32_Box(v);
|
||||
} else if (item_type == VEC_ITEM_TYPE_I16) {
|
||||
VecI16 v = { .len = item.len, .buf = (VecI16BufObject *)item.buf };
|
||||
return VecI16_Box(v);
|
||||
} else if (item_type == VEC_ITEM_TYPE_BOOL) {
|
||||
VecBool v = { .len = item.len, .buf = (VecBoolBufObject *)item.buf };
|
||||
return VecBool_Box(v);
|
||||
} else {
|
||||
// Generic vec[t]
|
||||
VecT v = { .len = item.len, .buf = (VecTBufObject *)item.buf };
|
||||
return VecT_Box(v, item_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Misc helpers
|
||||
|
||||
PyObject *Vec_TypeToStr(size_t item_type, size_t depth);
|
||||
PyObject *Vec_GenericRepr(PyObject *vec, size_t item_type, size_t depth, int verbose);
|
||||
PyObject *Vec_GenericRichcompare(Py_ssize_t *len, PyObject **items,
|
||||
Py_ssize_t *other_len, PyObject **other_items,
|
||||
int op);
|
||||
int Vec_GenericRemove(Py_ssize_t *len, PyObject **items, PyObject *item);
|
||||
PyObject *Vec_GenericPopWrapper(Py_ssize_t *len, PyObject **items, PyObject *args);
|
||||
PyObject *Vec_GenericPop(Py_ssize_t *len, PyObject **items, Py_ssize_t index);
|
||||
|
||||
// Global API pointers initialized by import_librt_vecs()
|
||||
static VecCapsule *VecApi;
|
||||
static VecI64API VecI64Api;
|
||||
static VecI32API VecI32Api;
|
||||
static VecI16API VecI16Api;
|
||||
static VecU8API VecU8Api;
|
||||
static VecFloatAPI VecFloatApi;
|
||||
static VecBoolAPI VecBoolApi;
|
||||
static VecTAPI VecTApi;
|
||||
static VecNestedAPI VecNestedApi;
|
||||
|
||||
static int
|
||||
import_librt_vecs(void)
|
||||
{
|
||||
PyObject *mod = PyImport_ImportModule("librt.vecs");
|
||||
if (mod == NULL)
|
||||
return -1;
|
||||
Py_DECREF(mod); // we import just for the side effect of making the below work.
|
||||
VecApi = PyCapsule_Import("librt.vecs._C_API", 0);
|
||||
if (!VecApi)
|
||||
return -1;
|
||||
VecI64Api = *VecApi->i64;
|
||||
VecI32Api = *VecApi->i32;
|
||||
VecI16Api = *VecApi->i16;
|
||||
VecU8Api = *VecApi->u8;
|
||||
VecFloatApi = *VecApi->float_;
|
||||
VecBoolApi = *VecApi->bool_;
|
||||
VecTApi = *VecApi->t;
|
||||
VecNestedApi = *VecApi->nested;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // MYPYC_EXPERIMENTAL
|
||||
|
||||
#endif // VEC_H_INCL
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
#ifdef MYPYC_EXPERIMENTAL
|
||||
#define VEC VecBool
|
||||
#define VEC_TYPE VecBoolType
|
||||
#define VEC_OBJECT VecBoolObject
|
||||
#define BUF_OBJECT VecBoolBufObject
|
||||
#define BUF_TYPE VecBoolBufType
|
||||
#define NAME(suffix) VecBool##suffix
|
||||
#define FUNC(suffix) VecBool_##suffix
|
||||
#define ITEM_TYPE_STR "bool"
|
||||
#define ITEM_TYPE_MAGIC VEC_ITEM_TYPE_BOOL
|
||||
#define ITEM_C_TYPE char
|
||||
#define FEATURES Vec_BoolAPI
|
||||
|
||||
#define BOX_ITEM VecBool_BoxItem
|
||||
#define UNBOX_ITEM VecBool_UnboxItem
|
||||
#define IS_UNBOX_ERROR VecBool_IsUnboxError
|
||||
|
||||
#include "vec_template.c"
|
||||
|
||||
#endif // MYPYC_EXPERIMENTAL
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
#ifdef MYPYC_EXPERIMENTAL
|
||||
#define VEC VecFloat
|
||||
#define VEC_TYPE VecFloatType
|
||||
#define VEC_OBJECT VecFloatObject
|
||||
#define BUF_OBJECT VecFloatBufObject
|
||||
#define BUF_TYPE VecFloatBufType
|
||||
#define NAME(suffix) VecFloat##suffix
|
||||
#define FUNC(suffix) VecFloat_##suffix
|
||||
#define ITEM_TYPE_STR "float"
|
||||
#define ITEM_TYPE_MAGIC VEC_ITEM_TYPE_FLOAT
|
||||
#define ITEM_C_TYPE double
|
||||
#define FEATURES Vec_FloatAPI
|
||||
|
||||
#define BOX_ITEM VecFloat_BoxItem
|
||||
#define UNBOX_ITEM VecFloat_UnboxItem
|
||||
#define IS_UNBOX_ERROR VecFloat_IsUnboxError
|
||||
|
||||
#include "vec_template.c"
|
||||
|
||||
#endif // MYPYC_EXPERIMENTAL
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
#ifdef MYPYC_EXPERIMENTAL
|
||||
#define VEC VecI16
|
||||
#define VEC_TYPE VecI16Type
|
||||
#define VEC_OBJECT VecI16Object
|
||||
#define BUF_OBJECT VecI16BufObject
|
||||
#define BUF_TYPE VecI16BufType
|
||||
#define NAME(suffix) VecI16##suffix
|
||||
#define FUNC(suffix) VecI16_##suffix
|
||||
#define ITEM_TYPE_STR "i16"
|
||||
#define ITEM_TYPE_MAGIC VEC_ITEM_TYPE_I16
|
||||
#define ITEM_C_TYPE int16_t
|
||||
#define FEATURES Vec_I16API
|
||||
|
||||
#define BOX_ITEM VecI16_BoxItem
|
||||
#define UNBOX_ITEM VecI16_UnboxItem
|
||||
#define IS_UNBOX_ERROR VecI16_IsUnboxError
|
||||
|
||||
#include "vec_template.c"
|
||||
|
||||
#endif // MYPYC_EXPERIMENTAL
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
#ifdef MYPYC_EXPERIMENTAL
|
||||
#define VEC VecI32
|
||||
#define VEC_TYPE VecI32Type
|
||||
#define VEC_OBJECT VecI32Object
|
||||
#define BUF_OBJECT VecI32BufObject
|
||||
#define BUF_TYPE VecI32BufType
|
||||
#define NAME(suffix) VecI32##suffix
|
||||
#define FUNC(suffix) VecI32_##suffix
|
||||
#define ITEM_TYPE_STR "i32"
|
||||
#define ITEM_TYPE_MAGIC VEC_ITEM_TYPE_I32
|
||||
#define ITEM_C_TYPE int32_t
|
||||
#define FEATURES Vec_I32API
|
||||
|
||||
#define BOX_ITEM VecI32_BoxItem
|
||||
#define UNBOX_ITEM VecI32_UnboxItem
|
||||
#define IS_UNBOX_ERROR VecI32_IsUnboxError
|
||||
|
||||
#include "vec_template.c"
|
||||
|
||||
#endif // MYPYC_EXPERIMENTAL
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue