diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/pgvector')
37 files changed, 1482 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/pgvector/asyncpg/__init__.py b/.venv/lib/python3.12/site-packages/pgvector/asyncpg/__init__.py new file mode 100644 index 00000000..543b8823 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/asyncpg/__init__.py @@ -0,0 +1,9 @@ +from .register import register_vector +from ..utils import Vector, HalfVector, SparseVector + +__all__ = [ + 'register_vector', + 'Vector', + 'HalfVector', + 'SparseVector' +] diff --git a/.venv/lib/python3.12/site-packages/pgvector/asyncpg/register.py b/.venv/lib/python3.12/site-packages/pgvector/asyncpg/register.py new file mode 100644 index 00000000..a3880585 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/asyncpg/register.py @@ -0,0 +1,31 @@ +from ..utils import Vector, HalfVector, SparseVector + + +async def register_vector(conn, schema='public'): + await conn.set_type_codec( + 'vector', + schema=schema, + encoder=Vector._to_db_binary, + decoder=Vector._from_db_binary, + format='binary' + ) + + try: + await conn.set_type_codec( + 'halfvec', + schema=schema, + encoder=HalfVector._to_db_binary, + decoder=HalfVector._from_db_binary, + format='binary' + ) + + await conn.set_type_codec( + 'sparsevec', + schema=schema, + encoder=SparseVector._to_db_binary, + decoder=SparseVector._from_db_binary, + format='binary' + ) + except ValueError as e: + if not str(e).startswith('unknown type:'): + raise e diff --git a/.venv/lib/python3.12/site-packages/pgvector/django/__init__.py b/.venv/lib/python3.12/site-packages/pgvector/django/__init__.py new file mode 100644 index 00000000..09978a92 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/django/__init__.py @@ -0,0 +1,26 @@ +from .bit import BitField +from .extensions import VectorExtension +from .functions import L2Distance, MaxInnerProduct, CosineDistance, L1Distance, HammingDistance, JaccardDistance +from .halfvec import HalfVectorField +from .indexes import IvfflatIndex, HnswIndex +from .sparsevec import SparseVectorField +from .vector import VectorField +from ..utils import HalfVector, SparseVector + +__all__ = [ + 'VectorExtension', + 'VectorField', + 'HalfVectorField', + 'BitField', + 'SparseVectorField', + 'IvfflatIndex', + 'HnswIndex', + 'L2Distance', + 'MaxInnerProduct', + 'CosineDistance', + 'L1Distance', + 'HammingDistance', + 'JaccardDistance', + 'HalfVector', + 'SparseVector' +] diff --git a/.venv/lib/python3.12/site-packages/pgvector/django/bit.py b/.venv/lib/python3.12/site-packages/pgvector/django/bit.py new file mode 100644 index 00000000..2cc847ad --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/django/bit.py @@ -0,0 +1,32 @@ +from django import forms +from django.db.models import Field + + +# https://docs.djangoproject.com/en/5.0/howto/custom-model-fields/ +class BitField(Field): + description = 'Bit string' + + def __init__(self, *args, length=None, **kwargs): + self.length = length + super().__init__(*args, **kwargs) + + def deconstruct(self): + name, path, args, kwargs = super().deconstruct() + if self.length is not None: + kwargs['length'] = self.length + return name, path, args, kwargs + + def db_type(self, connection): + if self.length is None: + return 'bit' + return 'bit(%d)' % self.length + + def formfield(self, **kwargs): + return super().formfield(form_class=BitFormField, **kwargs) + + +class BitFormField(forms.CharField): + def to_python(self, value): + if isinstance(value, str) and value == '': + return None + return super().to_python(value) diff --git a/.venv/lib/python3.12/site-packages/pgvector/django/extensions.py b/.venv/lib/python3.12/site-packages/pgvector/django/extensions.py new file mode 100644 index 00000000..0573f72b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/django/extensions.py @@ -0,0 +1,6 @@ +from django.contrib.postgres.operations import CreateExtension + + +class VectorExtension(CreateExtension): + def __init__(self): + self.name = 'vector' diff --git a/.venv/lib/python3.12/site-packages/pgvector/django/functions.py b/.venv/lib/python3.12/site-packages/pgvector/django/functions.py new file mode 100644 index 00000000..da9fbf83 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/django/functions.py @@ -0,0 +1,55 @@ +from django.db.models import FloatField, Func, Value +from ..utils import Vector, HalfVector, SparseVector + + +class DistanceBase(Func): + output_field = FloatField() + + def __init__(self, expression, vector, **extra): + if not hasattr(vector, 'resolve_expression'): + if isinstance(vector, HalfVector): + vector = Value(HalfVector._to_db(vector)) + elif isinstance(vector, SparseVector): + vector = Value(SparseVector._to_db(vector)) + else: + vector = Value(Vector._to_db(vector)) + super().__init__(expression, vector, **extra) + + +class BitDistanceBase(Func): + output_field = FloatField() + + def __init__(self, expression, vector, **extra): + if not hasattr(vector, 'resolve_expression'): + vector = Value(vector) + super().__init__(expression, vector, **extra) + + +class L2Distance(DistanceBase): + function = '' + arg_joiner = ' <-> ' + + +class MaxInnerProduct(DistanceBase): + function = '' + arg_joiner = ' <#> ' + + +class CosineDistance(DistanceBase): + function = '' + arg_joiner = ' <=> ' + + +class L1Distance(DistanceBase): + function = '' + arg_joiner = ' <+> ' + + +class HammingDistance(BitDistanceBase): + function = '' + arg_joiner = ' <~> ' + + +class JaccardDistance(BitDistanceBase): + function = '' + arg_joiner = ' <%%> ' diff --git a/.venv/lib/python3.12/site-packages/pgvector/django/halfvec.py b/.venv/lib/python3.12/site-packages/pgvector/django/halfvec.py new file mode 100644 index 00000000..6b59a7fa --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/django/halfvec.py @@ -0,0 +1,60 @@ +from django import forms +from django.db.models import Field +from ..utils import HalfVector + + +# https://docs.djangoproject.com/en/5.0/howto/custom-model-fields/ +class HalfVectorField(Field): + description = 'Half vector' + empty_strings_allowed = False + + def __init__(self, *args, dimensions=None, **kwargs): + self.dimensions = dimensions + super().__init__(*args, **kwargs) + + def deconstruct(self): + name, path, args, kwargs = super().deconstruct() + if self.dimensions is not None: + kwargs['dimensions'] = self.dimensions + return name, path, args, kwargs + + def db_type(self, connection): + if self.dimensions is None: + return 'halfvec' + return 'halfvec(%d)' % self.dimensions + + def from_db_value(self, value, expression, connection): + return HalfVector._from_db(value) + + def to_python(self, value): + if value is None or isinstance(value, HalfVector): + return value + elif isinstance(value, str): + return HalfVector._from_db(value) + else: + return HalfVector(value) + + def get_prep_value(self, value): + return HalfVector._to_db(value) + + def value_to_string(self, obj): + return self.get_prep_value(self.value_from_object(obj)) + + def formfield(self, **kwargs): + return super().formfield(form_class=HalfVectorFormField, **kwargs) + + +class HalfVectorWidget(forms.TextInput): + def format_value(self, value): + if isinstance(value, HalfVector): + value = value.to_list() + return super().format_value(value) + + +class HalfVectorFormField(forms.CharField): + widget = HalfVectorWidget + + def to_python(self, value): + if isinstance(value, str) and value == '': + return None + return super().to_python(value) diff --git a/.venv/lib/python3.12/site-packages/pgvector/django/indexes.py b/.venv/lib/python3.12/site-packages/pgvector/django/indexes.py new file mode 100644 index 00000000..5bec0eba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/django/indexes.py @@ -0,0 +1,46 @@ +from django.contrib.postgres.indexes import PostgresIndex + + +class IvfflatIndex(PostgresIndex): + suffix = 'ivfflat' + + def __init__(self, *expressions, lists=None, **kwargs): + self.lists = lists + super().__init__(*expressions, **kwargs) + + def deconstruct(self): + path, args, kwargs = super().deconstruct() + if self.lists is not None: + kwargs['lists'] = self.lists + return path, args, kwargs + + def get_with_params(self): + with_params = [] + if self.lists is not None: + with_params.append('lists = %d' % self.lists) + return with_params + + +class HnswIndex(PostgresIndex): + suffix = 'hnsw' + + def __init__(self, *expressions, m=None, ef_construction=None, **kwargs): + self.m = m + self.ef_construction = ef_construction + super().__init__(*expressions, **kwargs) + + def deconstruct(self): + path, args, kwargs = super().deconstruct() + if self.m is not None: + kwargs['m'] = self.m + if self.ef_construction is not None: + kwargs['ef_construction'] = self.ef_construction + return path, args, kwargs + + def get_with_params(self): + with_params = [] + if self.m is not None: + with_params.append('m = %d' % self.m) + if self.ef_construction is not None: + with_params.append('ef_construction = %d' % self.ef_construction) + return with_params diff --git a/.venv/lib/python3.12/site-packages/pgvector/django/sparsevec.py b/.venv/lib/python3.12/site-packages/pgvector/django/sparsevec.py new file mode 100644 index 00000000..d0d2d073 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/django/sparsevec.py @@ -0,0 +1,55 @@ +from django import forms +from django.db.models import Field +from ..utils import SparseVector + + +# https://docs.djangoproject.com/en/5.0/howto/custom-model-fields/ +class SparseVectorField(Field): + description = 'Sparse vector' + empty_strings_allowed = False + + def __init__(self, *args, dimensions=None, **kwargs): + self.dimensions = dimensions + super().__init__(*args, **kwargs) + + def deconstruct(self): + name, path, args, kwargs = super().deconstruct() + if self.dimensions is not None: + kwargs['dimensions'] = self.dimensions + return name, path, args, kwargs + + def db_type(self, connection): + if self.dimensions is None: + return 'sparsevec' + return 'sparsevec(%d)' % self.dimensions + + def from_db_value(self, value, expression, connection): + return SparseVector._from_db(value) + + def to_python(self, value): + return SparseVector._from_db(value) + + def get_prep_value(self, value): + return SparseVector._to_db(value) + + def value_to_string(self, obj): + return self.get_prep_value(self.value_from_object(obj)) + + def formfield(self, **kwargs): + return super().formfield(form_class=SparseVectorFormField, **kwargs) + + +class SparseVectorWidget(forms.TextInput): + def format_value(self, value): + if isinstance(value, SparseVector): + value = value.to_text() + return super().format_value(value) + + +class SparseVectorFormField(forms.CharField): + widget = SparseVectorWidget + + def to_python(self, value): + if isinstance(value, str) and value == '': + return None + return super().to_python(value) diff --git a/.venv/lib/python3.12/site-packages/pgvector/django/vector.py b/.venv/lib/python3.12/site-packages/pgvector/django/vector.py new file mode 100644 index 00000000..a89d5408 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/django/vector.py @@ -0,0 +1,73 @@ +from django import forms +from django.db.models import Field +import numpy as np +from ..utils import Vector + + +# https://docs.djangoproject.com/en/5.0/howto/custom-model-fields/ +class VectorField(Field): + description = 'Vector' + empty_strings_allowed = False + + def __init__(self, *args, dimensions=None, **kwargs): + self.dimensions = dimensions + super().__init__(*args, **kwargs) + + def deconstruct(self): + name, path, args, kwargs = super().deconstruct() + if self.dimensions is not None: + kwargs['dimensions'] = self.dimensions + return name, path, args, kwargs + + def db_type(self, connection): + if self.dimensions is None: + return 'vector' + return 'vector(%d)' % self.dimensions + + def from_db_value(self, value, expression, connection): + return Vector._from_db(value) + + def to_python(self, value): + if isinstance(value, list): + return np.array(value, dtype=np.float32) + return Vector._from_db(value) + + def get_prep_value(self, value): + return Vector._to_db(value) + + def value_to_string(self, obj): + return self.get_prep_value(self.value_from_object(obj)) + + def validate(self, value, model_instance): + if isinstance(value, np.ndarray): + value = value.tolist() + super().validate(value, model_instance) + + def run_validators(self, value): + if isinstance(value, np.ndarray): + value = value.tolist() + super().run_validators(value) + + def formfield(self, **kwargs): + return super().formfield(form_class=VectorFormField, **kwargs) + + +class VectorWidget(forms.TextInput): + def format_value(self, value): + if isinstance(value, np.ndarray): + value = value.tolist() + return super().format_value(value) + + +class VectorFormField(forms.CharField): + widget = VectorWidget + + def has_changed(self, initial, data): + if isinstance(initial, np.ndarray): + initial = initial.tolist() + return super().has_changed(initial, data) + + def to_python(self, value): + if isinstance(value, str) and value == '': + return None + return super().to_python(value) diff --git a/.venv/lib/python3.12/site-packages/pgvector/peewee/__init__.py b/.venv/lib/python3.12/site-packages/pgvector/peewee/__init__.py new file mode 100644 index 00000000..945e0dca --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/peewee/__init__.py @@ -0,0 +1,14 @@ +from .bit import FixedBitField +from .halfvec import HalfVectorField +from .sparsevec import SparseVectorField +from .vector import VectorField +from ..utils import HalfVector, SparseVector + +__all__ = [ + 'VectorField', + 'HalfVectorField', + 'FixedBitField', + 'SparseVectorField', + 'HalfVector', + 'SparseVector' +] diff --git a/.venv/lib/python3.12/site-packages/pgvector/peewee/bit.py b/.venv/lib/python3.12/site-packages/pgvector/peewee/bit.py new file mode 100644 index 00000000..ee5f12fe --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/peewee/bit.py @@ -0,0 +1,21 @@ +from peewee import Expression, Field + + +class FixedBitField(Field): + field_type = 'bit' + + def __init__(self, max_length=None, *args, **kwargs): + self.max_length = max_length + super(FixedBitField, self).__init__(*args, **kwargs) + + def get_modifiers(self): + return self.max_length and [self.max_length] or None + + def _distance(self, op, vector): + return Expression(lhs=self, op=op, rhs=self.to_value(vector)) + + def hamming_distance(self, vector): + return self._distance('<~>', vector) + + def jaccard_distance(self, vector): + return self._distance('<%%>', vector) diff --git a/.venv/lib/python3.12/site-packages/pgvector/peewee/halfvec.py b/.venv/lib/python3.12/site-packages/pgvector/peewee/halfvec.py new file mode 100644 index 00000000..deaa14da --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/peewee/halfvec.py @@ -0,0 +1,34 @@ +from peewee import Expression, Field +from ..utils import HalfVector + + +class HalfVectorField(Field): + field_type = 'halfvec' + + def __init__(self, dimensions=None, *args, **kwargs): + self.dimensions = dimensions + super(HalfVectorField, self).__init__(*args, **kwargs) + + def get_modifiers(self): + return self.dimensions and [self.dimensions] or None + + def db_value(self, value): + return HalfVector._to_db(value) + + def python_value(self, value): + return HalfVector._from_db(value) + + def _distance(self, op, vector): + return Expression(lhs=self, op=op, rhs=self.to_value(vector)) + + def l2_distance(self, vector): + return self._distance('<->', vector) + + def max_inner_product(self, vector): + return self._distance('<#>', vector) + + def cosine_distance(self, vector): + return self._distance('<=>', vector) + + def l1_distance(self, vector): + return self._distance('<+>', vector) diff --git a/.venv/lib/python3.12/site-packages/pgvector/peewee/sparsevec.py b/.venv/lib/python3.12/site-packages/pgvector/peewee/sparsevec.py new file mode 100644 index 00000000..67f7d1b9 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/peewee/sparsevec.py @@ -0,0 +1,34 @@ +from peewee import Expression, Field +from ..utils import SparseVector + + +class SparseVectorField(Field): + field_type = 'sparsevec' + + def __init__(self, dimensions=None, *args, **kwargs): + self.dimensions = dimensions + super(SparseVectorField, self).__init__(*args, **kwargs) + + def get_modifiers(self): + return self.dimensions and [self.dimensions] or None + + def db_value(self, value): + return SparseVector._to_db(value) + + def python_value(self, value): + return SparseVector._from_db(value) + + def _distance(self, op, vector): + return Expression(lhs=self, op=op, rhs=self.to_value(vector)) + + def l2_distance(self, vector): + return self._distance('<->', vector) + + def max_inner_product(self, vector): + return self._distance('<#>', vector) + + def cosine_distance(self, vector): + return self._distance('<=>', vector) + + def l1_distance(self, vector): + return self._distance('<+>', vector) diff --git a/.venv/lib/python3.12/site-packages/pgvector/peewee/vector.py b/.venv/lib/python3.12/site-packages/pgvector/peewee/vector.py new file mode 100644 index 00000000..22a87e53 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/peewee/vector.py @@ -0,0 +1,34 @@ +from peewee import Expression, Field +from ..utils import Vector + + +class VectorField(Field): + field_type = 'vector' + + def __init__(self, dimensions=None, *args, **kwargs): + self.dimensions = dimensions + super(VectorField, self).__init__(*args, **kwargs) + + def get_modifiers(self): + return self.dimensions and [self.dimensions] or None + + def db_value(self, value): + return Vector._to_db(value) + + def python_value(self, value): + return Vector._from_db(value) + + def _distance(self, op, vector): + return Expression(lhs=self, op=op, rhs=self.to_value(vector)) + + def l2_distance(self, vector): + return self._distance('<->', vector) + + def max_inner_product(self, vector): + return self._distance('<#>', vector) + + def cosine_distance(self, vector): + return self._distance('<=>', vector) + + def l1_distance(self, vector): + return self._distance('<+>', vector) diff --git a/.venv/lib/python3.12/site-packages/pgvector/psycopg/__init__.py b/.venv/lib/python3.12/site-packages/pgvector/psycopg/__init__.py new file mode 100644 index 00000000..9007c374 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/psycopg/__init__.py @@ -0,0 +1,11 @@ +from .register import register_vector, register_vector_async +from ..utils import Bit, HalfVector, SparseVector, Vector + +__all__ = [ + 'register_vector', + 'register_vector_async', + 'Vector', + 'HalfVector', + 'Bit', + 'SparseVector' +] diff --git a/.venv/lib/python3.12/site-packages/pgvector/psycopg/bit.py b/.venv/lib/python3.12/site-packages/pgvector/psycopg/bit.py new file mode 100644 index 00000000..f8eeb610 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/psycopg/bit.py @@ -0,0 +1,31 @@ +from psycopg.adapt import Dumper +from psycopg.pq import Format +from ..utils import Bit + + +class BitDumper(Dumper): + + format = Format.TEXT + + def dump(self, obj): + return Bit._to_db(obj).encode('utf8') + + +class BitBinaryDumper(BitDumper): + + format = Format.BINARY + + def dump(self, obj): + return Bit._to_db_binary(obj) + + +def register_bit_info(context, info): + info.register(context) + + # add oid to anonymous class for set_types + text_dumper = type('', (BitDumper,), {'oid': info.oid}) + binary_dumper = type('', (BitBinaryDumper,), {'oid': info.oid}) + + adapters = context.adapters + adapters.register_dumper(Bit, text_dumper) + adapters.register_dumper(Bit, binary_dumper) diff --git a/.venv/lib/python3.12/site-packages/pgvector/psycopg/halfvec.py b/.venv/lib/python3.12/site-packages/pgvector/psycopg/halfvec.py new file mode 100644 index 00000000..351d2cb7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/psycopg/halfvec.py @@ -0,0 +1,53 @@ +from psycopg.adapt import Loader, Dumper +from psycopg.pq import Format +from ..utils import HalfVector + + +class HalfVectorDumper(Dumper): + + format = Format.TEXT + + def dump(self, obj): + return HalfVector._to_db(obj).encode('utf8') + + +class HalfVectorBinaryDumper(HalfVectorDumper): + + format = Format.BINARY + + def dump(self, obj): + return HalfVector._to_db_binary(obj) + + +class HalfVectorLoader(Loader): + + format = Format.TEXT + + def load(self, data): + if isinstance(data, memoryview): + data = bytes(data) + return HalfVector._from_db(data.decode('utf8')) + + +class HalfVectorBinaryLoader(HalfVectorLoader): + + format = Format.BINARY + + def load(self, data): + if isinstance(data, memoryview): + data = bytes(data) + return HalfVector._from_db_binary(data) + + +def register_halfvec_info(context, info): + info.register(context) + + # add oid to anonymous class for set_types + text_dumper = type('', (HalfVectorDumper,), {'oid': info.oid}) + binary_dumper = type('', (HalfVectorBinaryDumper,), {'oid': info.oid}) + + adapters = context.adapters + adapters.register_dumper(HalfVector, text_dumper) + adapters.register_dumper(HalfVector, binary_dumper) + adapters.register_loader(info.oid, HalfVectorLoader) + adapters.register_loader(info.oid, HalfVectorBinaryLoader) diff --git a/.venv/lib/python3.12/site-packages/pgvector/psycopg/register.py b/.venv/lib/python3.12/site-packages/pgvector/psycopg/register.py new file mode 100644 index 00000000..b93fd3ee --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/psycopg/register.py @@ -0,0 +1,37 @@ +from psycopg.types import TypeInfo +from .bit import register_bit_info +from .halfvec import register_halfvec_info +from .sparsevec import register_sparsevec_info +from .vector import register_vector_info + + +def register_vector(context): + info = TypeInfo.fetch(context, 'vector') + register_vector_info(context, info) + + info = TypeInfo.fetch(context, 'bit') + register_bit_info(context, info) + + info = TypeInfo.fetch(context, 'halfvec') + if info is not None: + register_halfvec_info(context, info) + + info = TypeInfo.fetch(context, 'sparsevec') + if info is not None: + register_sparsevec_info(context, info) + + +async def register_vector_async(context): + info = await TypeInfo.fetch(context, 'vector') + register_vector_info(context, info) + + info = await TypeInfo.fetch(context, 'bit') + register_bit_info(context, info) + + info = await TypeInfo.fetch(context, 'halfvec') + if info is not None: + register_halfvec_info(context, info) + + info = await TypeInfo.fetch(context, 'sparsevec') + if info is not None: + register_sparsevec_info(context, info) diff --git a/.venv/lib/python3.12/site-packages/pgvector/psycopg/sparsevec.py b/.venv/lib/python3.12/site-packages/pgvector/psycopg/sparsevec.py new file mode 100644 index 00000000..435fd067 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/psycopg/sparsevec.py @@ -0,0 +1,53 @@ +from psycopg.adapt import Loader, Dumper +from psycopg.pq import Format +from ..utils import SparseVector + + +class SparseVectorDumper(Dumper): + + format = Format.TEXT + + def dump(self, obj): + return SparseVector._to_db(obj).encode('utf8') + + +class SparseVectorBinaryDumper(SparseVectorDumper): + + format = Format.BINARY + + def dump(self, obj): + return SparseVector._to_db_binary(obj) + + +class SparseVectorLoader(Loader): + + format = Format.TEXT + + def load(self, data): + if isinstance(data, memoryview): + data = bytes(data) + return SparseVector._from_db(data.decode('utf8')) + + +class SparseVectorBinaryLoader(SparseVectorLoader): + + format = Format.BINARY + + def load(self, data): + if isinstance(data, memoryview): + data = bytes(data) + return SparseVector._from_db_binary(data) + + +def register_sparsevec_info(context, info): + info.register(context) + + # add oid to anonymous class for set_types + text_dumper = type('', (SparseVectorDumper,), {'oid': info.oid}) + binary_dumper = type('', (SparseVectorBinaryDumper,), {'oid': info.oid}) + + adapters = context.adapters + adapters.register_dumper(SparseVector, text_dumper) + adapters.register_dumper(SparseVector, binary_dumper) + adapters.register_loader(info.oid, SparseVectorLoader) + adapters.register_loader(info.oid, SparseVectorBinaryLoader) diff --git a/.venv/lib/python3.12/site-packages/pgvector/psycopg/vector.py b/.venv/lib/python3.12/site-packages/pgvector/psycopg/vector.py new file mode 100644 index 00000000..0f62ca98 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/psycopg/vector.py @@ -0,0 +1,58 @@ +import psycopg +from psycopg.adapt import Loader, Dumper +from psycopg.pq import Format +from ..utils import Vector + + +class VectorDumper(Dumper): + + format = Format.TEXT + + def dump(self, obj): + return Vector._to_db(obj).encode('utf8') + + +class VectorBinaryDumper(VectorDumper): + + format = Format.BINARY + + def dump(self, obj): + return Vector._to_db_binary(obj) + + +class VectorLoader(Loader): + + format = Format.TEXT + + def load(self, data): + if isinstance(data, memoryview): + data = bytes(data) + return Vector._from_db(data.decode('utf8')) + + +class VectorBinaryLoader(VectorLoader): + + format = Format.BINARY + + def load(self, data): + if isinstance(data, memoryview): + data = bytes(data) + return Vector._from_db_binary(data) + + +def register_vector_info(context, info): + if info is None: + raise psycopg.ProgrammingError('vector type not found in the database') + info.register(context) + + # add oid to anonymous class for set_types + text_dumper = type('', (VectorDumper,), {'oid': info.oid}) + binary_dumper = type('', (VectorBinaryDumper,), {'oid': info.oid}) + + adapters = context.adapters + adapters.register_dumper('numpy.ndarray', text_dumper) + adapters.register_dumper('numpy.ndarray', binary_dumper) + adapters.register_dumper(Vector, text_dumper) + adapters.register_dumper(Vector, binary_dumper) + adapters.register_loader(info.oid, VectorLoader) + adapters.register_loader(info.oid, VectorBinaryLoader) diff --git a/.venv/lib/python3.12/site-packages/pgvector/psycopg2/__init__.py b/.venv/lib/python3.12/site-packages/pgvector/psycopg2/__init__.py new file mode 100644 index 00000000..7c952951 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/psycopg2/__init__.py @@ -0,0 +1,8 @@ +from .register import register_vector +from ..utils import HalfVector, SparseVector + +__all__ = [ + 'register_vector', + 'HalfVector', + 'SparseVector' +] diff --git a/.venv/lib/python3.12/site-packages/pgvector/psycopg2/halfvec.py b/.venv/lib/python3.12/site-packages/pgvector/psycopg2/halfvec.py new file mode 100644 index 00000000..b50e89b6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/psycopg2/halfvec.py @@ -0,0 +1,25 @@ +from psycopg2.extensions import adapt, new_array_type, new_type, register_adapter, register_type +from ..utils import HalfVector + + +class HalfvecAdapter: + def __init__(self, value): + self._value = value + + def getquoted(self): + return adapt(HalfVector._to_db(self._value)).getquoted() + + +def cast_halfvec(value, cur): + return HalfVector._from_db(value) + + +def register_halfvec_info(oid, array_oid, scope): + halfvec = new_type((oid,), 'HALFVEC', cast_halfvec) + register_type(halfvec, scope) + + if array_oid is not None: + halfvecarray = new_array_type((array_oid,), 'HALFVECARRAY', halfvec) + register_type(halfvecarray, scope) + + register_adapter(HalfVector, HalfvecAdapter) diff --git a/.venv/lib/python3.12/site-packages/pgvector/psycopg2/register.py b/.venv/lib/python3.12/site-packages/pgvector/psycopg2/register.py new file mode 100644 index 00000000..77528520 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/psycopg2/register.py @@ -0,0 +1,29 @@ +import psycopg2 +from psycopg2.extensions import cursor +from .halfvec import register_halfvec_info +from .sparsevec import register_sparsevec_info +from .vector import register_vector_info + + +# TODO make globally False by default in 0.4.0 +# note: register_adapter is always global +# TODO make arrays True by defalt in 0.4.0 +def register_vector(conn_or_curs=None, globally=True, arrays=False): + conn = conn_or_curs if hasattr(conn_or_curs, 'cursor') else conn_or_curs.connection + cur = conn.cursor(cursor_factory=cursor) + scope = None if globally else conn_or_curs + + # use to_regtype to get first matching type in search path + cur.execute("SELECT typname, oid FROM pg_type WHERE oid IN (to_regtype('vector'), to_regtype('_vector'), to_regtype('halfvec'), to_regtype('_halfvec'), to_regtype('sparsevec'), to_regtype('_sparsevec'))") + type_info = dict(cur.fetchall()) + + if 'vector' not in type_info: + raise psycopg2.ProgrammingError('vector type not found in the database') + + register_vector_info(type_info['vector'], type_info['_vector'] if arrays else None, scope) + + if 'halfvec' in type_info: + register_halfvec_info(type_info['halfvec'], type_info['_halfvec'] if arrays else None, scope) + + if 'sparsevec' in type_info: + register_sparsevec_info(type_info['sparsevec'], type_info['_sparsevec'] if arrays else None, scope) diff --git a/.venv/lib/python3.12/site-packages/pgvector/psycopg2/sparsevec.py b/.venv/lib/python3.12/site-packages/pgvector/psycopg2/sparsevec.py new file mode 100644 index 00000000..a542807b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/psycopg2/sparsevec.py @@ -0,0 +1,25 @@ +from psycopg2.extensions import adapt, new_array_type, new_type, register_adapter, register_type +from ..utils import SparseVector + + +class SparsevecAdapter: + def __init__(self, value): + self._value = value + + def getquoted(self): + return adapt(SparseVector._to_db(self._value)).getquoted() + + +def cast_sparsevec(value, cur): + return SparseVector._from_db(value) + + +def register_sparsevec_info(oid, array_oid, scope): + sparsevec = new_type((oid,), 'SPARSEVEC', cast_sparsevec) + register_type(sparsevec, scope) + + if array_oid is not None: + sparsevecarray = new_array_type((array_oid,), 'SPARSEVECARRAY', sparsevec) + register_type(sparsevecarray, scope) + + register_adapter(SparseVector, SparsevecAdapter) diff --git a/.venv/lib/python3.12/site-packages/pgvector/psycopg2/vector.py b/.venv/lib/python3.12/site-packages/pgvector/psycopg2/vector.py new file mode 100644 index 00000000..9861f01f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/psycopg2/vector.py @@ -0,0 +1,26 @@ +import numpy as np +from psycopg2.extensions import adapt, new_array_type, new_type, register_adapter, register_type +from ..utils import Vector + + +class VectorAdapter: + def __init__(self, value): + self._value = value + + def getquoted(self): + return adapt(Vector._to_db(self._value)).getquoted() + + +def cast_vector(value, cur): + return Vector._from_db(value) + + +def register_vector_info(oid, array_oid, scope): + vector = new_type((oid,), 'VECTOR', cast_vector) + register_type(vector, scope) + + if array_oid is not None: + vectorarray = new_array_type((array_oid,), 'VECTORARRAY', vector) + register_type(vectorarray, scope) + + register_adapter(np.ndarray, VectorAdapter) diff --git a/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/__init__.py b/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/__init__.py new file mode 100644 index 00000000..4955eeb9 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/__init__.py @@ -0,0 +1,19 @@ +from .bit import BIT +from .functions import avg, sum +from .halfvec import HALFVEC +from .sparsevec import SPARSEVEC +from .vector import VECTOR +from .vector import VECTOR as Vector +from ..utils import HalfVector, SparseVector + +__all__ = [ + 'Vector', + 'VECTOR', + 'HALFVEC', + 'BIT', + 'SPARSEVEC', + 'HalfVector', + 'SparseVector', + 'avg', + 'sum' +] diff --git a/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/bit.py b/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/bit.py new file mode 100644 index 00000000..0f83f3c6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/bit.py @@ -0,0 +1,26 @@ +from sqlalchemy.dialects.postgresql.base import ischema_names +from sqlalchemy.types import UserDefinedType, Float + + +class BIT(UserDefinedType): + cache_ok = True + + def __init__(self, length=None): + super(UserDefinedType, self).__init__() + self.length = length + + def get_col_spec(self, **kw): + if self.length is None: + return 'BIT' + return 'BIT(%d)' % self.length + + class comparator_factory(UserDefinedType.Comparator): + def hamming_distance(self, other): + return self.op('<~>', return_type=Float)(other) + + def jaccard_distance(self, other): + return self.op('<%>', return_type=Float)(other) + + +# for reflection +ischema_names['bit'] = BIT diff --git a/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/functions.py b/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/functions.py new file mode 100644 index 00000000..72e3ca7e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/functions.py @@ -0,0 +1,14 @@ +# https://docs.sqlalchemy.org/en/20/core/functions.html +# include sum for a consistent API +from sqlalchemy.sql.functions import ReturnTypeFromArgs, sum + + +class avg(ReturnTypeFromArgs): + inherit_cache = True + package = 'pgvector' + + +__all__ = [ + 'avg', + 'sum' +] diff --git a/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/halfvec.py b/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/halfvec.py new file mode 100644 index 00000000..639f77bd --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/halfvec.py @@ -0,0 +1,51 @@ +from sqlalchemy.dialects.postgresql.base import ischema_names +from sqlalchemy.types import UserDefinedType, Float, String +from ..utils import HalfVector + + +class HALFVEC(UserDefinedType): + cache_ok = True + _string = String() + + def __init__(self, dim=None): + super(UserDefinedType, self).__init__() + self.dim = dim + + def get_col_spec(self, **kw): + if self.dim is None: + return 'HALFVEC' + return 'HALFVEC(%d)' % self.dim + + def bind_processor(self, dialect): + def process(value): + return HalfVector._to_db(value, self.dim) + return process + + def literal_processor(self, dialect): + string_literal_processor = self._string._cached_literal_processor(dialect) + + def process(value): + return string_literal_processor(HalfVector._to_db(value, self.dim)) + return process + + def result_processor(self, dialect, coltype): + def process(value): + return HalfVector._from_db(value) + return process + + class comparator_factory(UserDefinedType.Comparator): + def l2_distance(self, other): + return self.op('<->', return_type=Float)(other) + + def max_inner_product(self, other): + return self.op('<#>', return_type=Float)(other) + + def cosine_distance(self, other): + return self.op('<=>', return_type=Float)(other) + + def l1_distance(self, other): + return self.op('<+>', return_type=Float)(other) + + +# for reflection +ischema_names['halfvec'] = HALFVEC diff --git a/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/sparsevec.py b/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/sparsevec.py new file mode 100644 index 00000000..370f5d14 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/sparsevec.py @@ -0,0 +1,51 @@ +from sqlalchemy.dialects.postgresql.base import ischema_names +from sqlalchemy.types import UserDefinedType, Float, String +from ..utils import SparseVector + + +class SPARSEVEC(UserDefinedType): + cache_ok = True + _string = String() + + def __init__(self, dim=None): + super(UserDefinedType, self).__init__() + self.dim = dim + + def get_col_spec(self, **kw): + if self.dim is None: + return 'SPARSEVEC' + return 'SPARSEVEC(%d)' % self.dim + + def bind_processor(self, dialect): + def process(value): + return SparseVector._to_db(value, self.dim) + return process + + def literal_processor(self, dialect): + string_literal_processor = self._string._cached_literal_processor(dialect) + + def process(value): + return string_literal_processor(SparseVector._to_db(value, self.dim)) + return process + + def result_processor(self, dialect, coltype): + def process(value): + return SparseVector._from_db(value) + return process + + class comparator_factory(UserDefinedType.Comparator): + def l2_distance(self, other): + return self.op('<->', return_type=Float)(other) + + def max_inner_product(self, other): + return self.op('<#>', return_type=Float)(other) + + def cosine_distance(self, other): + return self.op('<=>', return_type=Float)(other) + + def l1_distance(self, other): + return self.op('<+>', return_type=Float)(other) + + +# for reflection +ischema_names['sparsevec'] = SPARSEVEC diff --git a/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/vector.py b/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/vector.py new file mode 100644 index 00000000..f57a045d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/sqlalchemy/vector.py @@ -0,0 +1,51 @@ +from sqlalchemy.dialects.postgresql.base import ischema_names +from sqlalchemy.types import UserDefinedType, Float, String +from ..utils import Vector + + +class VECTOR(UserDefinedType): + cache_ok = True + _string = String() + + def __init__(self, dim=None): + super(UserDefinedType, self).__init__() + self.dim = dim + + def get_col_spec(self, **kw): + if self.dim is None: + return 'VECTOR' + return 'VECTOR(%d)' % self.dim + + def bind_processor(self, dialect): + def process(value): + return Vector._to_db(value, self.dim) + return process + + def literal_processor(self, dialect): + string_literal_processor = self._string._cached_literal_processor(dialect) + + def process(value): + return string_literal_processor(Vector._to_db(value, self.dim)) + return process + + def result_processor(self, dialect, coltype): + def process(value): + return Vector._from_db(value) + return process + + class comparator_factory(UserDefinedType.Comparator): + def l2_distance(self, other): + return self.op('<->', return_type=Float)(other) + + def max_inner_product(self, other): + return self.op('<#>', return_type=Float)(other) + + def cosine_distance(self, other): + return self.op('<=>', return_type=Float)(other) + + def l1_distance(self, other): + return self.op('<+>', return_type=Float)(other) + + +# for reflection +ischema_names['vector'] = VECTOR diff --git a/.venv/lib/python3.12/site-packages/pgvector/utils/__init__.py b/.venv/lib/python3.12/site-packages/pgvector/utils/__init__.py new file mode 100644 index 00000000..3c011605 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/utils/__init__.py @@ -0,0 +1,11 @@ +from .bit import Bit +from .halfvec import HalfVector +from .sparsevec import SparseVector +from .vector import Vector + +__all__ = [ + 'Vector', + 'HalfVector', + 'Bit', + 'SparseVector' +] diff --git a/.venv/lib/python3.12/site-packages/pgvector/utils/bit.py b/.venv/lib/python3.12/site-packages/pgvector/utils/bit.py new file mode 100644 index 00000000..51f75568 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/utils/bit.py @@ -0,0 +1,61 @@ +import numpy as np +from struct import pack, unpack_from + + +class Bit: + def __init__(self, value): + if isinstance(value, str): + self._value = self.from_text(value)._value + else: + # TODO change in 0.4.0 + # TODO raise if dtype not bool or uint8 + # if isinstance(value, np.ndarray) and value.dtype == np.uint8: + # value = np.unpackbits(value) + # else: + # value = np.asarray(value, dtype=bool) + + value = np.asarray(value, dtype=bool) + + if value.ndim != 1: + raise ValueError('expected ndim to be 1') + + self._value = value + + def __repr__(self): + return f'Bit({self.to_text()})' + + def to_list(self): + return self._value.tolist() + + def to_numpy(self): + return self._value + + def to_text(self): + return ''.join(self._value.astype(np.uint8).astype(str)) + + def to_binary(self): + return pack('>i', len(self._value)) + np.packbits(self._value).tobytes() + + @classmethod + def from_text(cls, value): + return cls(np.asarray([v != '0' for v in value], dtype=bool)) + + @classmethod + def from_binary(cls, value): + count = unpack_from('>i', value)[0] + buf = np.frombuffer(value, dtype=np.uint8, offset=4) + return cls(np.unpackbits(buf, count=count).astype(bool)) + + @classmethod + def _to_db(cls, value): + if not isinstance(value, cls): + raise ValueError('expected bit') + + return value.to_text() + + @classmethod + def _to_db_binary(cls, value): + if not isinstance(value, cls): + raise ValueError('expected bit') + + return value.to_binary() diff --git a/.venv/lib/python3.12/site-packages/pgvector/utils/halfvec.py b/.venv/lib/python3.12/site-packages/pgvector/utils/halfvec.py new file mode 100644 index 00000000..e1e50514 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/utils/halfvec.py @@ -0,0 +1,78 @@ +import numpy as np +from struct import pack, unpack_from + + +class HalfVector: + def __init__(self, value): + # asarray still copies if same dtype + if not isinstance(value, np.ndarray) or value.dtype != '>f2': + value = np.asarray(value, dtype='>f2') + + if value.ndim != 1: + raise ValueError('expected ndim to be 1') + + self._value = value + + def __repr__(self): + return f'HalfVector({self.to_list()})' + + def dimensions(self): + return len(self._value) + + def to_list(self): + return self._value.tolist() + + def to_numpy(self): + return self._value + + def to_text(self): + return '[' + ','.join([str(float(v)) for v in self._value]) + ']' + + def to_binary(self): + return pack('>HH', self.dimensions(), 0) + self._value.tobytes() + + @classmethod + def from_text(cls, value): + return cls([float(v) for v in value[1:-1].split(',')]) + + @classmethod + def from_binary(cls, value): + dim, unused = unpack_from('>HH', value) + return cls(np.frombuffer(value, dtype='>f2', count=dim, offset=4)) + + @classmethod + def _to_db(cls, value, dim=None): + if value is None: + return value + + if not isinstance(value, cls): + value = cls(value) + + if dim is not None and value.dimensions() != dim: + raise ValueError('expected %d dimensions, not %d' % (dim, value.dimensions())) + + return value.to_text() + + @classmethod + def _to_db_binary(cls, value): + if value is None: + return value + + if not isinstance(value, cls): + value = cls(value) + + return value.to_binary() + + @classmethod + def _from_db(cls, value): + if value is None or isinstance(value, cls): + return value + + return cls.from_text(value) + + @classmethod + def _from_db_binary(cls, value): + if value is None or isinstance(value, cls): + return value + + return cls.from_binary(value) diff --git a/.venv/lib/python3.12/site-packages/pgvector/utils/sparsevec.py b/.venv/lib/python3.12/site-packages/pgvector/utils/sparsevec.py new file mode 100644 index 00000000..fd9ccff3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/utils/sparsevec.py @@ -0,0 +1,156 @@ +import numpy as np +from struct import pack, unpack_from + +NO_DEFAULT = object() + + +class SparseVector: + def __init__(self, value, dimensions=NO_DEFAULT, /): + if value.__class__.__module__.startswith('scipy.sparse.'): + if dimensions is not NO_DEFAULT: + raise ValueError('extra argument') + + self._from_sparse(value) + elif isinstance(value, dict): + if dimensions is NO_DEFAULT: + raise ValueError('missing dimensions') + + self._from_dict(value, dimensions) + else: + if dimensions is not NO_DEFAULT: + raise ValueError('extra argument') + + self._from_dense(value) + + def __repr__(self): + elements = dict(zip(self._indices, self._values)) + return f'SparseVector({elements}, {self._dim})' + + def dimensions(self): + return self._dim + + def indices(self): + return self._indices + + def values(self): + return self._values + + def to_coo(self): + from scipy.sparse import coo_array + + coords = ([0] * len(self._indices), self._indices) + return coo_array((self._values, coords), shape=(1, self._dim)) + + def to_list(self): + vec = [0.0] * self._dim + for i, v in zip(self._indices, self._values): + vec[i] = v + return vec + + def to_numpy(self): + vec = np.repeat(0.0, self._dim).astype(np.float32) + for i, v in zip(self._indices, self._values): + vec[i] = v + return vec + + def to_text(self): + return '{' + ','.join([f'{int(i) + 1}:{float(v)}' for i, v in zip(self._indices, self._values)]) + '}/' + str(int(self._dim)) + + def to_binary(self): + nnz = len(self._indices) + return pack(f'>iii{nnz}i{nnz}f', self._dim, nnz, 0, *self._indices, *self._values) + + def _from_dict(self, d, dim): + elements = [(i, v) for i, v in d.items() if v != 0] + elements.sort() + + self._dim = int(dim) + self._indices = [int(v[0]) for v in elements] + self._values = [float(v[1]) for v in elements] + + def _from_sparse(self, value): + value = value.tocoo() + + if value.ndim == 1: + self._dim = value.shape[0] + elif value.ndim == 2 and value.shape[0] == 1: + self._dim = value.shape[1] + else: + raise ValueError('expected ndim to be 1') + + if hasattr(value, 'coords'): + # scipy 1.13+ + self._indices = value.coords[0].tolist() + else: + self._indices = value.col.tolist() + self._values = value.data.tolist() + + def _from_dense(self, value): + self._dim = len(value) + self._indices = [i for i, v in enumerate(value) if v != 0] + self._values = [float(value[i]) for i in self._indices] + + @classmethod + def from_text(cls, value): + elements, dim = value.split('/', 2) + indices = [] + values = [] + # split on empty string returns single element list + if len(elements) > 2: + for e in elements[1:-1].split(','): + i, v = e.split(':', 2) + indices.append(int(i) - 1) + values.append(float(v)) + return cls._from_parts(int(dim), indices, values) + + @classmethod + def from_binary(cls, value): + dim, nnz, unused = unpack_from('>iii', value) + indices = unpack_from(f'>{nnz}i', value, 12) + values = unpack_from(f'>{nnz}f', value, 12 + nnz * 4) + return cls._from_parts(int(dim), indices, values) + + @classmethod + def _from_parts(cls, dim, indices, values): + vec = cls.__new__(cls) + vec._dim = dim + vec._indices = indices + vec._values = values + return vec + + @classmethod + def _to_db(cls, value, dim=None): + if value is None: + return value + + if not isinstance(value, cls): + value = cls(value) + + if dim is not None and value.dimensions() != dim: + raise ValueError('expected %d dimensions, not %d' % (dim, value.dimensions())) + + return value.to_text() + + @classmethod + def _to_db_binary(cls, value): + if value is None: + return value + + if not isinstance(value, cls): + value = cls(value) + + return value.to_binary() + + @classmethod + def _from_db(cls, value): + if value is None or isinstance(value, cls): + return value + + return cls.from_text(value) + + @classmethod + def _from_db_binary(cls, value): + if value is None or isinstance(value, cls): + return value + + return cls.from_binary(value) diff --git a/.venv/lib/python3.12/site-packages/pgvector/utils/vector.py b/.venv/lib/python3.12/site-packages/pgvector/utils/vector.py new file mode 100644 index 00000000..3fa2f356 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pgvector/utils/vector.py @@ -0,0 +1,78 @@ +import numpy as np +from struct import pack, unpack_from + + +class Vector: + def __init__(self, value): + # asarray still copies if same dtype + if not isinstance(value, np.ndarray) or value.dtype != '>f4': + value = np.asarray(value, dtype='>f4') + + if value.ndim != 1: + raise ValueError('expected ndim to be 1') + + self._value = value + + def __repr__(self): + return f'Vector({self.to_list()})' + + def dimensions(self): + return len(self._value) + + def to_list(self): + return self._value.tolist() + + def to_numpy(self): + return self._value + + def to_text(self): + return '[' + ','.join([str(float(v)) for v in self._value]) + ']' + + def to_binary(self): + return pack('>HH', self.dimensions(), 0) + self._value.tobytes() + + @classmethod + def from_text(cls, value): + return cls([float(v) for v in value[1:-1].split(',')]) + + @classmethod + def from_binary(cls, value): + dim, unused = unpack_from('>HH', value) + return cls(np.frombuffer(value, dtype='>f4', count=dim, offset=4)) + + @classmethod + def _to_db(cls, value, dim=None): + if value is None: + return value + + if not isinstance(value, cls): + value = cls(value) + + if dim is not None and value.dimensions() != dim: + raise ValueError('expected %d dimensions, not %d' % (dim, value.dimensions())) + + return value.to_text() + + @classmethod + def _to_db_binary(cls, value): + if value is None: + return value + + if not isinstance(value, cls): + value = cls(value) + + return value.to_binary() + + @classmethod + def _from_db(cls, value): + if value is None or isinstance(value, np.ndarray): + return value + + return cls.from_text(value).to_numpy().astype(np.float32) + + @classmethod + def _from_db_binary(cls, value): + if value is None or isinstance(value, np.ndarray): + return value + + return cls.from_binary(value).to_numpy().astype(np.float32) |