summaryrefslogtreecommitdiff
path: root/venv/lib/python3.11/site-packages/websockets/speedups.c
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.11/site-packages/websockets/speedups.c')
-rw-r--r--venv/lib/python3.11/site-packages/websockets/speedups.c223
1 files changed, 223 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/websockets/speedups.c b/venv/lib/python3.11/site-packages/websockets/speedups.c
new file mode 100644
index 0000000..a195904
--- /dev/null
+++ b/venv/lib/python3.11/site-packages/websockets/speedups.c
@@ -0,0 +1,223 @@
+/* C implementation of performance sensitive functions. */
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <stdint.h> /* uint8_t, uint32_t, uint64_t */
+
+#if __ARM_NEON
+#include <arm_neon.h>
+#elif __SSE2__
+#include <emmintrin.h>
+#endif
+
+static const Py_ssize_t MASK_LEN = 4;
+
+/* Similar to PyBytes_AsStringAndSize, but accepts more types */
+
+static int
+_PyBytesLike_AsStringAndSize(PyObject *obj, PyObject **tmp, char **buffer, Py_ssize_t *length)
+{
+ // This supports bytes, bytearrays, and memoryview objects,
+ // which are common data structures for handling byte streams.
+ // websockets.framing.prepare_data() returns only these types.
+ // If *tmp isn't NULL, the caller gets a new reference.
+ if (PyBytes_Check(obj))
+ {
+ *tmp = NULL;
+ *buffer = PyBytes_AS_STRING(obj);
+ *length = PyBytes_GET_SIZE(obj);
+ }
+ else if (PyByteArray_Check(obj))
+ {
+ *tmp = NULL;
+ *buffer = PyByteArray_AS_STRING(obj);
+ *length = PyByteArray_GET_SIZE(obj);
+ }
+ else if (PyMemoryView_Check(obj))
+ {
+ *tmp = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C');
+ if (*tmp == NULL)
+ {
+ return -1;
+ }
+ Py_buffer *mv_buf;
+ mv_buf = PyMemoryView_GET_BUFFER(*tmp);
+ *buffer = mv_buf->buf;
+ *length = mv_buf->len;
+ }
+ else
+ {
+ PyErr_Format(
+ PyExc_TypeError,
+ "expected a bytes-like object, %.200s found",
+ Py_TYPE(obj)->tp_name);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* C implementation of websockets.utils.apply_mask */
+
+static PyObject *
+apply_mask(PyObject *self, PyObject *args, PyObject *kwds)
+{
+
+ // In order to support various bytes-like types, accept any Python object.
+
+ static char *kwlist[] = {"data", "mask", NULL};
+ PyObject *input_obj;
+ PyObject *mask_obj;
+
+ // A pointer to a char * + length will be extracted from the data and mask
+ // arguments, possibly via a Py_buffer.
+
+ PyObject *input_tmp = NULL;
+ char *input;
+ Py_ssize_t input_len;
+ PyObject *mask_tmp = NULL;
+ char *mask;
+ Py_ssize_t mask_len;
+
+ // Initialize a PyBytesObject then get a pointer to the underlying char *
+ // in order to avoid an extra memory copy in PyBytes_FromStringAndSize.
+
+ PyObject *result = NULL;
+ char *output;
+
+ // Other variables.
+
+ Py_ssize_t i = 0;
+
+ // Parse inputs.
+
+ if (!PyArg_ParseTupleAndKeywords(
+ args, kwds, "OO", kwlist, &input_obj, &mask_obj))
+ {
+ goto exit;
+ }
+
+ if (_PyBytesLike_AsStringAndSize(input_obj, &input_tmp, &input, &input_len) == -1)
+ {
+ goto exit;
+ }
+
+ if (_PyBytesLike_AsStringAndSize(mask_obj, &mask_tmp, &mask, &mask_len) == -1)
+ {
+ goto exit;
+ }
+
+ if (mask_len != MASK_LEN)
+ {
+ PyErr_SetString(PyExc_ValueError, "mask must contain 4 bytes");
+ goto exit;
+ }
+
+ // Create output.
+
+ result = PyBytes_FromStringAndSize(NULL, input_len);
+ if (result == NULL)
+ {
+ goto exit;
+ }
+
+ // Since we just created result, we don't need error checks.
+ output = PyBytes_AS_STRING(result);
+
+ // Perform the masking operation.
+
+ // Apparently GCC cannot figure out the following optimizations by itself.
+
+ // We need a new scope for MSVC 2010 (non C99 friendly)
+ {
+#if __ARM_NEON
+
+ // With NEON support, XOR by blocks of 16 bytes = 128 bits.
+
+ Py_ssize_t input_len_128 = input_len & ~15;
+ uint8x16_t mask_128 = vreinterpretq_u8_u32(vdupq_n_u32(*(uint32_t *)mask));
+
+ for (; i < input_len_128; i += 16)
+ {
+ uint8x16_t in_128 = vld1q_u8((uint8_t *)(input + i));
+ uint8x16_t out_128 = veorq_u8(in_128, mask_128);
+ vst1q_u8((uint8_t *)(output + i), out_128);
+ }
+
+#elif __SSE2__
+
+ // With SSE2 support, XOR by blocks of 16 bytes = 128 bits.
+
+ // Since we cannot control the 16-bytes alignment of input and output
+ // buffers, we rely on loadu/storeu rather than load/store.
+
+ Py_ssize_t input_len_128 = input_len & ~15;
+ __m128i mask_128 = _mm_set1_epi32(*(uint32_t *)mask);
+
+ for (; i < input_len_128; i += 16)
+ {
+ __m128i in_128 = _mm_loadu_si128((__m128i *)(input + i));
+ __m128i out_128 = _mm_xor_si128(in_128, mask_128);
+ _mm_storeu_si128((__m128i *)(output + i), out_128);
+ }
+
+#else
+
+ // Without SSE2 support, XOR by blocks of 8 bytes = 64 bits.
+
+ // We assume the memory allocator aligns everything on 8 bytes boundaries.
+
+ Py_ssize_t input_len_64 = input_len & ~7;
+ uint32_t mask_32 = *(uint32_t *)mask;
+ uint64_t mask_64 = ((uint64_t)mask_32 << 32) | (uint64_t)mask_32;
+
+ for (; i < input_len_64; i += 8)
+ {
+ *(uint64_t *)(output + i) = *(uint64_t *)(input + i) ^ mask_64;
+ }
+
+#endif
+ }
+
+ // XOR the remainder of the input byte by byte.
+
+ for (; i < input_len; i++)
+ {
+ output[i] = input[i] ^ mask[i & (MASK_LEN - 1)];
+ }
+
+exit:
+ Py_XDECREF(input_tmp);
+ Py_XDECREF(mask_tmp);
+ return result;
+
+}
+
+static PyMethodDef speedups_methods[] = {
+ {
+ "apply_mask",
+ (PyCFunction)apply_mask,
+ METH_VARARGS | METH_KEYWORDS,
+ "Apply masking to the data of a WebSocket message.",
+ },
+ {NULL, NULL, 0, NULL}, /* Sentinel */
+};
+
+static struct PyModuleDef speedups_module = {
+ PyModuleDef_HEAD_INIT,
+ "websocket.speedups", /* m_name */
+ "C implementation of performance sensitive functions.",
+ /* m_doc */
+ -1, /* m_size */
+ speedups_methods, /* m_methods */
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+
+PyMODINIT_FUNC
+PyInit_speedups(void)
+{
+ return PyModule_Create(&speedups_module);
+}