parent
bcdcf165d2
commit
2bb5ce4a0e
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,112 @@
|
||||
"""测试股票K线接口返回的字段"""
|
||||
import requests
|
||||
import json
|
||||
|
||||
# API 配置
|
||||
BASE_URL = "http://localhost:8080/v1"
|
||||
API_KEY = "demo-api-key-2024"
|
||||
|
||||
# 测试获取股票K线
|
||||
def test_stock_klines():
|
||||
"""测试股票K线接口返回的字段"""
|
||||
url = f"{BASE_URL}/stock/klines/000001.SZ"
|
||||
headers = {"X-API-Key": API_KEY}
|
||||
params = {
|
||||
"start": "20260301",
|
||||
"end": "20260310",
|
||||
"freq": "1d"
|
||||
}
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"测试接口: GET {url}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
data = response.json()
|
||||
|
||||
if data.get("code") == 0:
|
||||
kline_data = data.get("data", {})
|
||||
items = kline_data.get("items", [])
|
||||
|
||||
print(f"\n标的: {kline_data.get('symbol')}")
|
||||
print(f"周期: {kline_data.get('freq')}")
|
||||
print(f"数据条数: {len(items)}")
|
||||
print(f"\n{'='*60}")
|
||||
|
||||
if items:
|
||||
# 显示第一条数据的完整字段
|
||||
first_item = items[0]
|
||||
print("\n第一条数据详情:")
|
||||
print(f"{'-'*60}")
|
||||
|
||||
# 基础字段
|
||||
print(f"时间戳: {first_item.get('time')}")
|
||||
print(f"开盘价: {first_item.get('open')}")
|
||||
print(f"最高价: {first_item.get('high')}")
|
||||
print(f"最低价: {first_item.get('low')}")
|
||||
print(f"收盘价: {first_item.get('close')}")
|
||||
print(f"成交量: {first_item.get('volume')}")
|
||||
print(f"成交额: {first_item.get('amount')}")
|
||||
|
||||
# 扩展字段
|
||||
print(f"\n扩展字段:")
|
||||
print(f" 交易日: {first_item.get('trade_date')}")
|
||||
print(f" 是否涨停: {first_item.get('is_limit_up')}")
|
||||
print(f" 是否跌停: {first_item.get('is_limit_down')}")
|
||||
print(f" 总市值: {first_item.get('total_market_cap')}")
|
||||
print(f" 流通市值: {first_item.get('float_market_cap')}")
|
||||
print(f" 机构持仓占比: {first_item.get('inst_holding_ratio')}")
|
||||
print(f" 可交易日数: {first_item.get('trading_days')}")
|
||||
print(f" 创建时间: {first_item.get('created_at')}")
|
||||
|
||||
# 验证所有字段是否存在
|
||||
expected_fields = [
|
||||
'symbol', 'time', 'open', 'high', 'low', 'close',
|
||||
'volume', 'amount', 'trade_date', 'is_limit_up',
|
||||
'is_limit_down', 'total_market_cap', 'float_market_cap',
|
||||
'inst_holding_ratio', 'trading_days', 'created_at'
|
||||
]
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print("字段完整性检查:")
|
||||
print(f"{'-'*60}")
|
||||
|
||||
missing_fields = []
|
||||
for field in expected_fields:
|
||||
if field in first_item:
|
||||
print(f" ✓ {field}")
|
||||
else:
|
||||
print(f" ✗ {field} (缺失)")
|
||||
missing_fields.append(field)
|
||||
|
||||
if missing_fields:
|
||||
print(f"\n缺失字段: {', '.join(missing_fields)}")
|
||||
else:
|
||||
print(f"\n所有字段都存在!")
|
||||
|
||||
return True
|
||||
else:
|
||||
print("没有获取到数据")
|
||||
return False
|
||||
else:
|
||||
print(f"请求失败: {data.get('message')}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"请求异常: {e}")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("\n" + "="*60)
|
||||
print("股票K线接口字段测试")
|
||||
print("="*60)
|
||||
|
||||
success = test_stock_klines()
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
if success:
|
||||
print("测试完成!")
|
||||
else:
|
||||
print("测试失败!")
|
||||
print("="*60 + "\n")
|
||||
@ -0,0 +1,133 @@
|
||||
"""测试K线数据扩展字段获取"""
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from app.adapters.amazingdata_adapter import AmazingDataAdapter
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
async def test_klines_with_extended_fields():
|
||||
"""测试获取带有扩展字段的K线数据"""
|
||||
print("\n" + "="*60)
|
||||
print("测试K线数据扩展字段")
|
||||
print("="*60)
|
||||
|
||||
adapter = AmazingDataAdapter()
|
||||
|
||||
# 连接配置(请根据实际情况修改)
|
||||
config = {
|
||||
"username": os.getenv("AMAZINGDATA_USERNAME", "11200008169"),
|
||||
"password": os.getenv("AMAZINGDATA_PASSWORD", "11200008169@2026"),
|
||||
"host": os.getenv("AMAZINGDATA_HOST", "140.206.44.234"),
|
||||
"port": int(os.getenv("AMAZINGDATA_PORT", "8600")),
|
||||
"local_path": "./amazing_data_cache/",
|
||||
"use_local_cache": True
|
||||
}
|
||||
|
||||
try:
|
||||
# 连接适配器
|
||||
print("\n[1/3] 正在连接 AmazingData...")
|
||||
await adapter.connect(config)
|
||||
print("✓ 连接成功")
|
||||
|
||||
# 获取K线数据
|
||||
symbol = "000001.SZ" # 平安银行
|
||||
start_date = "20260301"
|
||||
end_date = "20260310"
|
||||
|
||||
print(f"\n[2/3] 正在获取 {symbol} 的K线数据 ({start_date} ~ {end_date})...")
|
||||
klines = await adapter.fetch_klines(symbol, start_date, end_date, "1d")
|
||||
print(f"✓ 获取到 {len(klines)} 条K线数据")
|
||||
|
||||
# 显示第一条数据的完整信息
|
||||
if klines:
|
||||
print(f"\n[3/3] 数据字段验证")
|
||||
print("-"*60)
|
||||
|
||||
k = klines[0]
|
||||
print(f"\n标的代码: {k.symbol}")
|
||||
print(f"交易日: {k.trade_date}")
|
||||
print(f"时间戳: {datetime.fromtimestamp(k.time)}")
|
||||
|
||||
print(f"\n基础行情:")
|
||||
print(f" 开盘价: {k.open}")
|
||||
print(f" 最高价: {k.high}")
|
||||
print(f" 最低价: {k.low}")
|
||||
print(f" 收盘价: {k.close}")
|
||||
print(f" 成交量: {k.volume}")
|
||||
print(f" 成交额: {k.amount}")
|
||||
|
||||
print(f"\n扩展字段:")
|
||||
print(f" 是否涨停: {k.is_limit_up} {'✓' if k.is_limit_up is not None else '✗'}")
|
||||
print(f" 是否跌停: {k.is_limit_down} {'✓' if k.is_limit_down is not None else '✗'}")
|
||||
print(f" 总市值: {k.total_market_cap:,.0f} 元" if k.total_market_cap else " 总市值: None ✗")
|
||||
print(f" 流通市值: {k.float_market_cap:,.0f} 元" if k.float_market_cap else " 流通市值: None ✗")
|
||||
print(f" 机构持仓占比: {k.inst_holding_ratio}%" if k.inst_holding_ratio else " 机构持仓占比: None")
|
||||
print(f" 可交易日数: {k.trading_days} {'✓' if k.trading_days else '✗'}")
|
||||
|
||||
# 验证字段完整性
|
||||
print(f"\n{'='*60}")
|
||||
print("字段完整性检查:")
|
||||
print("-"*60)
|
||||
|
||||
checks = [
|
||||
("symbol", k.symbol is not None),
|
||||
("time", k.time > 0),
|
||||
("open", k.open > 0),
|
||||
("high", k.high > 0),
|
||||
("low", k.low > 0),
|
||||
("close", k.close > 0),
|
||||
("volume", k.volume > 0),
|
||||
("amount", k.amount > 0),
|
||||
("trade_date", k.trade_date is not None),
|
||||
("is_limit_up", k.is_limit_up is not None),
|
||||
("is_limit_down", k.is_limit_down is not None),
|
||||
("total_market_cap", k.total_market_cap is not None and k.total_market_cap > 0),
|
||||
("float_market_cap", k.float_market_cap is not None and k.float_market_cap > 0),
|
||||
("trading_days", k.trading_days is not None and k.trading_days > 0),
|
||||
]
|
||||
|
||||
passed = 0
|
||||
for field, check in checks:
|
||||
status = "✓" if check else "✗"
|
||||
print(f" {status} {field}")
|
||||
if check:
|
||||
passed += 1
|
||||
|
||||
print(f"\n通过: {passed}/{len(checks)}")
|
||||
|
||||
# 显示涨跌停判断逻辑验证
|
||||
print(f"\n{'='*60}")
|
||||
print("涨跌停判断示例:")
|
||||
print("-"*60)
|
||||
for k in klines[:3]: # 显示前3条
|
||||
limit_status = ""
|
||||
if k.is_limit_up:
|
||||
limit_status = "📈 涨停"
|
||||
elif k.is_limit_down:
|
||||
limit_status = "📉 跌停"
|
||||
else:
|
||||
limit_status = "—"
|
||||
print(f" {k.trade_date}: 收盘{k.close} {limit_status}")
|
||||
|
||||
# 断开连接
|
||||
await adapter.close()
|
||||
print(f"\n{'='*60}")
|
||||
print("测试完成!")
|
||||
print("="*60 + "\n")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n✗ 测试失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = asyncio.run(test_klines_with_extended_fields())
|
||||
sys.exit(0 if success else 1)
|
||||
@ -0,0 +1 @@
|
||||
pip
|
||||
@ -0,0 +1,186 @@
|
||||
Metadata-Version: 2.4
|
||||
Name: blosc2
|
||||
Version: 4.1.2
|
||||
Summary: A fast & compressed ndarray library with a flexible compute engine.
|
||||
Author-Email: Blosc Development Team <blosc@blosc.org>
|
||||
Maintainer-Email: Blosc Development Team <blosc@blosc.org>
|
||||
License-Expression: BSD-3-Clause
|
||||
License-File: LICENSE.txt
|
||||
Classifier: Development Status :: 6 - Mature
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Intended Audience :: Information Technology
|
||||
Classifier: Intended Audience :: Science/Research
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Operating System :: Microsoft :: Windows
|
||||
Classifier: Operating System :: Unix
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Project-URL: homepage, https://github.com/Blosc/python-blosc2
|
||||
Project-URL: documentation, https://www.blosc.org/python-blosc2/python-blosc2.html
|
||||
Requires-Python: >=3.10
|
||||
Requires-Dist: numpy>=1.26
|
||||
Requires-Dist: ndindex
|
||||
Requires-Dist: msgpack
|
||||
Requires-Dist: numexpr>=2.14.1; platform_machine != "wasm32"
|
||||
Requires-Dist: requests
|
||||
Description-Content-Type: text/x-rst
|
||||
|
||||
=============
|
||||
Python-Blosc2
|
||||
=============
|
||||
|
||||
A fast & compressed ndarray library with a flexible compute engine
|
||||
==================================================================
|
||||
|
||||
:Author: The Blosc development team
|
||||
:Contact: blosc@blosc.org
|
||||
:Github: https://github.com/Blosc/python-blosc2
|
||||
:Actions: |actions|
|
||||
:PyPi: |version|
|
||||
:NumFOCUS: |numfocus|
|
||||
:Code of Conduct: |Contributor Covenant|
|
||||
|
||||
.. |version| image:: https://img.shields.io/pypi/v/blosc2.svg
|
||||
:target: https://pypi.python.org/pypi/blosc2
|
||||
.. |Contributor Covenant| image:: https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg
|
||||
:target: https://github.com/Blosc/community/blob/master/code_of_conduct.md
|
||||
.. |numfocus| image:: https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A
|
||||
:target: https://numfocus.org
|
||||
.. |actions| image:: https://github.com/Blosc/python-blosc2/actions/workflows/build.yml/badge.svg
|
||||
:target: https://github.com/Blosc/python-blosc2/actions/workflows/build.yml
|
||||
|
||||
|
||||
What is Python-Blosc2?
|
||||
=======================
|
||||
|
||||
Python-Blosc2 is a high-performance compressed ndarray library with a flexible
|
||||
compute engine, using `C-Blosc2 <https://www.blosc.org/c-blosc2/c-blosc2.html>`_
|
||||
as its compression backend. It allows complex calculations on compressed data,
|
||||
whether stored in memory, on disk, or over the network (e.g., via
|
||||
`Caterva2 <https://github.com/ironArray/Caterva2>`_). It uses the
|
||||
`C-Blosc2 simple and open format
|
||||
<https://github.com/Blosc/c-blosc2/blob/main/README_FORMAT.rst>`_ for storing
|
||||
compressed data.
|
||||
|
||||
More info: https://www.blosc.org/python-blosc2/getting_started/overview.html
|
||||
|
||||
Installing
|
||||
==========
|
||||
|
||||
Binary packages are available for major OSes (Win, Mac, Linux) and platforms.
|
||||
Install from PyPi using ``pip``:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
pip install blosc2 --upgrade
|
||||
|
||||
Conda users can install from conda-forge:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
conda install -c conda-forge python-blosc2
|
||||
|
||||
Documentation
|
||||
=============
|
||||
|
||||
The documentation is available here:
|
||||
|
||||
https://blosc.org/python-blosc2/python-blosc2.html
|
||||
|
||||
You can find examples at:
|
||||
|
||||
https://github.com/Blosc/python-blosc2/tree/main/examples
|
||||
|
||||
A tutorial from PyData Global 2025 is available at:
|
||||
|
||||
https://github.com/Blosc/PyData-Global-2025-Tutorial
|
||||
|
||||
(`Click here <https://www.youtube.com/watch?v=tUvSI3EpTBQ&list=PLGVZCDnMOq0qmerwB1eITnr5AfYRGm0DF&index=81>`_ to watch the video recording of the tutorial)
|
||||
|
||||
It contains Jupyter notebooks explaining the main features of Python-Blosc2.
|
||||
|
||||
License
|
||||
=======
|
||||
|
||||
This software is licensed under a 3-Clause BSD license. A copy of the
|
||||
python-blosc2 license can be found in
|
||||
`LICENSE.txt <https://github.com/Blosc/python-blosc2/tree/main/LICENSE.txt>`_.
|
||||
|
||||
Discussion forum
|
||||
================
|
||||
|
||||
Discussion about this package is welcome at:
|
||||
|
||||
https://github.com/Blosc/python-blosc2/discussions
|
||||
|
||||
Social feeds
|
||||
------------
|
||||
|
||||
Stay informed about the latest developments by following us in
|
||||
`Mastodon <https://fosstodon.org/@Blosc2>`_,
|
||||
`Bluesky <https://bsky.app/profile/blosc.org>`_ or
|
||||
`LinkedIn <https://www.linkedin.com/company/88381936/admin/dashboard/>`_.
|
||||
|
||||
Thanks
|
||||
======
|
||||
|
||||
Blosc2 is supported by the `NumFOCUS foundation <https://numfocus.org>`_, the
|
||||
`LEAPS-INNOV project <https://www.leaps-innov.eu>`_
|
||||
and `ironArray SLU <https://ironarray.io>`_, among many other donors.
|
||||
This allowed the following people to have contributed in an important way
|
||||
to the core development of the Blosc2 library:
|
||||
|
||||
- Francesc Alted
|
||||
- Marta Iborra
|
||||
- Luke Shaw
|
||||
- Aleix Alcacer
|
||||
- Oscar Guiñón
|
||||
- Juan David Ibáñez
|
||||
- Ivan Vilata i Balaguer
|
||||
- Oumaima Ech.Chdig
|
||||
- Ricardo Sales Piquer
|
||||
|
||||
In addition, other people have participated in the project in different
|
||||
aspects:
|
||||
|
||||
- Jan Sellner, contributed the mmap support for NDArray/SChunk objects.
|
||||
- Dimitri Papadopoulos, contributed a large bunch of improvements to
|
||||
many aspects of the project. His attention to detail is remarkable.
|
||||
- And many others that have contributed with bug reports, suggestions and
|
||||
improvements.
|
||||
|
||||
Developed using JetBrains IDEs.
|
||||
|
||||
.. image:: https://resources.jetbrains.com/storage/products/company/brand/logos/jetbrains.svg
|
||||
:target: https://jb.gg/OpenSource
|
||||
:alt: JetBrains logo.
|
||||
|
||||
Citing Blosc
|
||||
============
|
||||
|
||||
You can cite our work on the various libraries under the Blosc umbrella as follows:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
@ONLINE{blosc,
|
||||
author = {{Blosc Development Team}},
|
||||
title = "{A fast, compressed and persistent data store library}",
|
||||
year = {2009-2025},
|
||||
note = {https://blosc.org}
|
||||
}
|
||||
|
||||
Support Blosc for a Sustainable Future
|
||||
======================================
|
||||
|
||||
If you find Blosc useful and want to support its development, please consider
|
||||
making a `donation or contract to the Blosc Development Team
|
||||
<https://www.blosc.org/pages/blosc-in-depth/#support-blosc>`_.
|
||||
Thank you!
|
||||
|
||||
|
||||
**Compress Better, Compute Bigger**
|
||||
@ -0,0 +1,80 @@
|
||||
blosc2-4.1.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
blosc2-4.1.2.dist-info/METADATA,sha256=PfwyhaAAq9E0pNey0uX8XLczfOfGhPqQntCe2eKZqCA,6231
|
||||
blosc2-4.1.2.dist-info/RECORD,,
|
||||
blosc2-4.1.2.dist-info/WHEEL,sha256=Iwzd8cFJYd34Bw6rlN9JB4hgsVKLIBCVy637sxMRVyo,106
|
||||
blosc2-4.1.2.dist-info/entry_points.txt,sha256=AQn8qWJhx7sMxZxwNAn9AGT77UMZpT8ZHmZoHsUY4Tw,29
|
||||
blosc2-4.1.2.dist-info/licenses/LICENSE.txt,sha256=AstwCmS9owvusCU1ghx9pxk64zven0poXe4JQkjWzxg,1655
|
||||
blosc2/__init__.py,sha256=wfFw2HAsNRTp9qjTQAA-sr5zFnhkONa3EjrqGO2wUR8,21137
|
||||
blosc2/__pycache__/__init__.cpython-311.pyc,,
|
||||
blosc2/__pycache__/_wasm_jit.cpython-311.pyc,,
|
||||
blosc2/__pycache__/c2array.cpython-311.pyc,,
|
||||
blosc2/__pycache__/core.cpython-311.pyc,,
|
||||
blosc2/__pycache__/dict_store.cpython-311.pyc,,
|
||||
blosc2/__pycache__/dsl_kernel.cpython-311.pyc,,
|
||||
blosc2/__pycache__/embed_store.cpython-311.pyc,,
|
||||
blosc2/__pycache__/exceptions.cpython-311.pyc,,
|
||||
blosc2/__pycache__/fft.cpython-311.pyc,,
|
||||
blosc2/__pycache__/info.cpython-311.pyc,,
|
||||
blosc2/__pycache__/lazyexpr.cpython-311.pyc,,
|
||||
blosc2/__pycache__/linalg.cpython-311.pyc,,
|
||||
blosc2/__pycache__/ndarray.cpython-311.pyc,,
|
||||
blosc2/__pycache__/proxy.cpython-311.pyc,,
|
||||
blosc2/__pycache__/schunk.cpython-311.pyc,,
|
||||
blosc2/__pycache__/storage.cpython-311.pyc,,
|
||||
blosc2/__pycache__/tree_store.cpython-311.pyc,,
|
||||
blosc2/__pycache__/utils.cpython-311.pyc,,
|
||||
blosc2/__pycache__/version.cpython-311.pyc,,
|
||||
blosc2/_wasm_jit.py,sha256=H_bnZVwr2oQjZUaosoR6ykzxYBO1_HX9dfPSUaTS_Vc,19119
|
||||
blosc2/blosc2_ext.cp311-win_amd64.pyd,sha256=Pv7H9dTzotXxPSWlqwBNqpcmLClOQada7ixN-xSH5UM,2499072
|
||||
blosc2/blosc2_ext.pyx,sha256=wfKd0JLxGIld1U9ZrooUbgpTsPIF0KpFMxOF8mBzz04,146229
|
||||
blosc2/c2array.py,sha256=8X5OAjiPEQk42t7yQTP5JTlHNh-u1Sb-ScXElv8mgFc,16505
|
||||
blosc2/core.py,sha256=2smXa_RYq4vA44kZ1thA3gGKE6kW1Ol7REH2ibQGhBA,71651
|
||||
blosc2/dict_store.py,sha256=75JnpeJ9BV65TgWgwtUQydrFAt6Op3dXpecTcMggoik,23149
|
||||
blosc2/dsl_kernel.py,sha256=XzpIRx4VmcCXUio22xsb4XUabv2VvCLdhrwLmXAMHdY,46111
|
||||
blosc2/embed_store.py,sha256=LOudUZvwMO1vWEmrGUay6tdaVNpQIMCJX6e_p2Lx3PA,13349
|
||||
blosc2/exceptions.py,sha256=ZK-SPzS527CEDL7-dw1Xl8wWHt93KW2vp9sg3wIp4BI,557
|
||||
blosc2/fft.py,sha256=V_8c_-w-DOS61IWfUcx6UgeDlME35PGAyxY5ASgDnG4,979
|
||||
blosc2/include/b2nd.h,sha256=vLj3QPdmG2xzeS0f6f1xZzoeypEiApfB0MvUGjVZdLA,24902
|
||||
blosc2/include/blosc2.h,sha256=Ii9DzbSgjVdhA2Sz6oHmT8InCZEFuip9DbR_aotdFzA,102880
|
||||
blosc2/include/blosc2/blosc2-common.h,sha256=5yQGyQpkwbT-bBzurP5p155ZAWv3GKzyx43ZwilSyQM,2719
|
||||
blosc2/include/blosc2/blosc2-export.h,sha256=hsi3IiPDgyWVIhpuV8oNhgN4EYWIA3j_EvRBrIqZxck,1855
|
||||
blosc2/include/blosc2/blosc2-stdio.h,sha256=jGU3e3cgXbxW7t5E1TOt5y-Rga1MSWLk6ebo93UOkKI,4650
|
||||
blosc2/include/blosc2/codecs-registry.h,sha256=-bikcAq5rZOZzA9lZkDrvsGTTmuX4c3qPMH_qG0iPPw,2043
|
||||
blosc2/include/blosc2/filters-registry.h,sha256=TzV0nTUifbKSK6fs4NkD_CtcOHb7m5W18weVO5n4KxU,1833
|
||||
blosc2/include/blosc2/tuners-registry.h,sha256=uQ3TWsbf0QGXHP5qYRv9iqmpXcb7z5MUSmnsF7MVcv4,842
|
||||
blosc2/info.py,sha256=5eaD2K3jfyLT1y0WAQQ7tgyJmfwN5FVJ9rUInWfLfbU,2085
|
||||
blosc2/lazyexpr.py,sha256=QYfuhvxDpYfFMcPZ9zm264wH2I8qDMkxF5of5Cre62g,188780
|
||||
blosc2/lib/blosc2.lib,sha256=UxKvQdGCBE42neuY7QTq9AF1YImoo_tGjQs7G4u0khY,35744
|
||||
blosc2/lib/cmake/blosc2/Blosc2Config.cmake,sha256=64up3utTNCiNvhLYfBhRLIJw0ZbtOmyZ8LY9UOMd-3E,4351
|
||||
blosc2/lib/cmake/blosc2/Blosc2ConfigVersion.cmake,sha256=z0xZRkTV_dFBNx6fg9lJrm40x_zFwQ8JdQ_DuoiVNV0,2830
|
||||
blosc2/lib/cmake/blosc2/Blosc2Targets-release.cmake,sha256=qqvx-rb3WsAOPd_grd3xFiSJj9IE67qxwYXKsL49bn0,1771
|
||||
blosc2/lib/cmake/blosc2/Blosc2Targets.cmake,sha256=ero0ELloxyV-njwSbFuxyQHw02lqPUCUF8V3rz57-lI,4787
|
||||
blosc2/lib/cmake/blosc2/Modules/FindIPP.cmake,sha256=CuPjgtqtJ9AZ7QAKA7z5nzKBPjTEsRv20oQiN-mdoPQ,2165
|
||||
blosc2/lib/cmake/blosc2/Modules/FindLZ4.cmake,sha256=nGGvXB3GSR4Mbct1z4xv8Y3HToo6NUnpNZMGCkCTvmg,288
|
||||
blosc2/lib/cmake/blosc2/Modules/FindSIMD.cmake,sha256=VZEaOjgaV6XHvMLxnp7ahOF_wbOEEN3MPmjO_JGHpEw,2268
|
||||
blosc2/lib/cmake/blosc2/Modules/FindZLIB_NG.cmake,sha256=Ehvjx61hvBcSUdJ6SEKgQfip2X1JlCIk-owmQ8EzRWg,2105
|
||||
blosc2/lib/cmake/blosc2/Modules/FindZSTD.cmake,sha256=pxIgMvnUjLVP-3rntB-70f-tzzfuXtBUwBtOotvoQOo,212
|
||||
blosc2/lib/cmake/blosc2/Modules/toolchain-aarch64.cmake,sha256=Kmg8bElwPy5FBF5LW2N8veU-7igNTyscDMbG_J14ajc,985
|
||||
blosc2/lib/cmake/blosc2/Modules/toolchain-armhf.cmake,sha256=2bRK-4gla53SFgDV6dTuqXn6g7Wpbb_fDcc7hFd5kHo,1022
|
||||
blosc2/lib/cmake/blosc2/Modules/toolchain-armsf.cmake,sha256=RkIP5XvfdbwfzIYZJgcopLv6D4ABWOLUDOuFB1VqanI,1128
|
||||
blosc2/lib/libblosc2.dll,sha256=ZWwatYusKIPBNQqVKbeHQpviAYSO_pV9l0mRXcDdZ1k,1338368
|
||||
blosc2/lib/libblosc2.lib,sha256=gtCUPjUM8vgkydQD7Uq1JFGEBenCtErcM-SdbV1a0GA,2626708
|
||||
blosc2/lib/pkgconfig/blosc2.pc,sha256=EoRO6O8iLRBVas-RCp5koYWFiIFZzNNYgkdmMUK03qY,497
|
||||
blosc2/lib/tcc.dll,sha256=k7MezEkky81NA7O5aQTydJ18_elH_gNSIONioBGywNw,350720
|
||||
blosc2/linalg.py,sha256=H8muaJ96U4rMCL81MNxYe_-i7Fqs7bHRo0bhVsZMIZA,31541
|
||||
blosc2/me_jit_glue.js,sha256=9uyc98wu83S9PW1WJaL9KDtA5z9hTlTxg2_maHNnx7Y,23314
|
||||
blosc2/ndarray.py,sha256=dAzB6bAFA-_4sdzPcr-f58OCtlaTKPUfmTTIZqRRTfI,228824
|
||||
blosc2/proxy.py,sha256=Om2lB-w31njTJhycqXEiTv8YHmgiYGEXgIsVmKvD35c,29361
|
||||
blosc2/schunk.py,sha256=MgwCrzLI-Td9bE8Tduh42uxf_TD7xid5FOp2RkSCGds,65044
|
||||
blosc2/storage.py,sha256=k7DHIzbLphHHE_xytaQ6eawuqzoT6FtF_80rRJp_pQU,10525
|
||||
blosc2/tree_store.py,sha256=khU10mInKHJfSQhai7l_cae2V7ds7vlNmmx4tzxK6Bw,27200
|
||||
blosc2/utils.py,sha256=li0oyxsDiBZLtpBdkUKoFB-pEYwRA_kF2-GtWYw7-B4,37192
|
||||
blosc2/version.py,sha256=L7zCFLtbyYe-_iP0M_1XaXP8Dexs06fJx0XFT65XNyk,58
|
||||
include/libtcc.h,sha256=OEtbE6mBTCh2v8EeBtL5RVrI10pr_IaHbE51hy598XY,5096
|
||||
lib/tcc.dll,sha256=k7MezEkky81NA7O5aQTydJ18_elH_gNSIONioBGywNw,350720
|
||||
share/miniexpr/licenses/LICENSE,sha256=amIGietrCmqJGJurywOAM2NHty7iPWtEMhyfRprJuMU,1568
|
||||
share/miniexpr/licenses/LICENSE-LIBTCC,sha256=xOx_Td8HPikwzbvz1mnOo_J1-wLIws4bNI_Qe4059ws,27111
|
||||
share/miniexpr/licenses/LICENSE-SLEEF,sha256=vrjkLp1rQoTgMwTQWoGgdVIAqWX8jQpeCuoehM-AXW4,1361
|
||||
share/miniexpr/licenses/LICENSE-TINYEXPR,sha256=wcAtE2y-ldvajJ6rOIdTGJc2ZRHk2GXCKximY2q2VZE,897
|
||||
share/miniexpr/licenses/THIRD_PARTY_NOTICES.md,sha256=rIjDpvDa2FXLO-zTDl7ThLkhfcrkmqf-Obl5_9Cu4FI,867
|
||||
share/miniexpr/third_party/tinycc/COPYING,sha256=mxg-fwNWw5jMCmXEotLNVvIUmo4kQmTE0mrFnp2to-g,26932
|
||||
@ -0,0 +1,5 @@
|
||||
Wheel-Version: 1.0
|
||||
Generator: scikit-build-core 0.12.1
|
||||
Root-Is-Purelib: false
|
||||
Tag: cp311-cp311-win_amd64
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
[array_api]
|
||||
blosc2 = blosc2
|
||||
|
||||
@ -0,0 +1,31 @@
|
||||
BSD 3-Clause License
|
||||
|
||||
For Blosc - A blocking, shuffling and lossless compression library
|
||||
|
||||
Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
@ -0,0 +1,940 @@
|
||||
#######################################################################
|
||||
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#######################################################################
|
||||
|
||||
# Hey Ruff, please ignore the next violations
|
||||
# ruff: noqa: E402 - Module level import not at top of file
|
||||
# ruff: noqa: F401 - `var` imported but unused
|
||||
|
||||
import contextlib
|
||||
import importlib.util
|
||||
import os
|
||||
import platform
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
_HAS_NUMBA = False
|
||||
try:
|
||||
import numba
|
||||
|
||||
_HAS_NUMBA = True
|
||||
except ImportError:
|
||||
pass
|
||||
# Do the platform check once at module level
|
||||
IS_WASM = platform.machine() == "wasm32"
|
||||
# IS_WASM = True # for testing (comment this line out for production)
|
||||
"""
|
||||
Flag for WebAssembly platform.
|
||||
"""
|
||||
|
||||
if not IS_WASM:
|
||||
import numexpr
|
||||
|
||||
from .version import __array_api_version__, __version__
|
||||
|
||||
|
||||
def _configure_libtcc_runtime_path():
|
||||
"""Best-effort configuration so miniexpr can find bundled libtcc at runtime."""
|
||||
if IS_WASM:
|
||||
return
|
||||
if os.environ.get("ME_DSL_JIT_LIBTCC_PATH"):
|
||||
return
|
||||
|
||||
spec = importlib.util.find_spec("blosc2.blosc2_ext")
|
||||
origin = None if spec is None else spec.origin
|
||||
if not origin:
|
||||
return
|
||||
|
||||
ext_dir = Path(origin).resolve().parent
|
||||
candidate_dirs = (
|
||||
ext_dir,
|
||||
ext_dir / "lib",
|
||||
ext_dir.parent / "lib",
|
||||
)
|
||||
if platform.system() == "Darwin":
|
||||
names = ("libtcc.dylib",)
|
||||
elif platform.system() == "Windows":
|
||||
names = ("tcc.dll", "libtcc.dll")
|
||||
else:
|
||||
names = ("libtcc.so", "libtcc.so.1")
|
||||
|
||||
for cdir in candidate_dirs:
|
||||
for name in names:
|
||||
candidate = cdir / name
|
||||
if candidate.is_file():
|
||||
os.environ["ME_DSL_JIT_LIBTCC_PATH"] = str(candidate)
|
||||
return
|
||||
|
||||
|
||||
_configure_libtcc_runtime_path()
|
||||
|
||||
_WASM_MINIEXPR_ENABLED = not IS_WASM
|
||||
|
||||
__version__ = __version__
|
||||
__array_api_version__ = __array_api_version__
|
||||
"""
|
||||
Python-Blosc2 version.
|
||||
"""
|
||||
|
||||
|
||||
class Codec(Enum):
|
||||
"""
|
||||
Available codecs.
|
||||
"""
|
||||
|
||||
BLOSCLZ = 0
|
||||
LZ4 = 1
|
||||
LZ4HC = 2
|
||||
ZLIB = 4
|
||||
ZSTD = 5
|
||||
NDLZ = 32
|
||||
ZFP_ACC = 33
|
||||
ZFP_PREC = 34
|
||||
ZFP_RATE = 35
|
||||
#: Needs to be installed with ``pip install blosc2-openhtj2k``
|
||||
OPENHTJ2K = 36
|
||||
#: Needs to be installed with ``pip install blosc2-grok``
|
||||
GROK = 37
|
||||
#: Needs to be installed with ``pip install blosc2-openzl``
|
||||
OPENZL = 38
|
||||
|
||||
|
||||
class Filter(Enum):
|
||||
"""
|
||||
Available filters.
|
||||
For each of the filters, the integer value passed to ``filters_meta`` has the following meaning:
|
||||
|
||||
- NOFILTER: Not used
|
||||
- SHUFFLE: Number of byte streams for shuffle (if 0 defaults to typesize of array).
|
||||
- BITSHUFFLE: Not used
|
||||
- DELTA: Not used (bitwise XOR)
|
||||
- TRUNC_PREC: Number of bits to which to truncate float
|
||||
- NDCELL: Cellshape (i.e. for a 3-dim dataset, meta = 4 implies cellshape is 4x4x4)
|
||||
- NDMEAN: Cellshape (i.e. for a 3-dim dataset, meta = 4 implies cellshape is 4x4x4)
|
||||
- BYTEDELTA: Number of byte streams for delta
|
||||
- INT_TRUNC: Number of bits to which to truncate integer
|
||||
|
||||
For TRUNC_PREC and INT_TRUNC, positive values specify number of bits to keep; negative values specify number of bits to zero.
|
||||
|
||||
For NDCELL/NDMEAN see this explanation for `NDCELL <https://github.com/Blosc/c-blosc2/blob/main/plugins/filters/ndcell/README.md>`_ and this for `NDMEAN <https://github.com/Blosc/c-blosc2/blob/main/plugins/filters/ndmean/README.md>`_.
|
||||
"""
|
||||
|
||||
NOFILTER = 0
|
||||
SHUFFLE = 1
|
||||
BITSHUFFLE = 2
|
||||
DELTA = 3
|
||||
TRUNC_PREC = 4
|
||||
NDCELL = 32
|
||||
NDMEAN = 33
|
||||
BYTEDELTA = 35
|
||||
INT_TRUNC = 36
|
||||
|
||||
|
||||
class SplitMode(Enum):
|
||||
"""
|
||||
Available split modes.
|
||||
"""
|
||||
|
||||
ALWAYS_SPLIT = 1
|
||||
NEVER_SPLIT = 2
|
||||
AUTO_SPLIT = 3
|
||||
FORWARD_COMPAT_SPLIT = 4
|
||||
|
||||
|
||||
class SpecialValue(Enum):
|
||||
"""
|
||||
Possible special values in a chunk.
|
||||
"""
|
||||
|
||||
NOT_SPECIAL = 0
|
||||
ZERO = 1
|
||||
NAN = 2
|
||||
VALUE = 3
|
||||
UNINIT = 4
|
||||
|
||||
|
||||
class Tuner(Enum):
|
||||
"""
|
||||
Available tuners.
|
||||
"""
|
||||
|
||||
#: A 'simple' tuner. This is the default in the Blosc2 library
|
||||
STUNE = 0
|
||||
#: A more sophisticated tuner that can select different codecs/filters for different chunks
|
||||
#: (more info `here <https://github.com/Blosc/blosc2_btune/>`_); Needs to be installed with
|
||||
#: ``pip install blosc2-btune``
|
||||
BTUNE = 32
|
||||
|
||||
|
||||
class FPAccuracy(Enum):
|
||||
"""
|
||||
Floating point accuracy modes for Blosc2 computing with lazy expressions.
|
||||
|
||||
This is only relevant when using floating point dtypes with miniexpr.
|
||||
"""
|
||||
|
||||
#: Use 1.0 ULPs (Units in the Last Place) for floating point functions
|
||||
HIGH = 1
|
||||
#: Use 3.5 ULPs (Units in the Last Place) for floating point functions
|
||||
MEDIUM = 2
|
||||
#: Use default accuracy. This is MEDIUM, which should be enough for most applications.
|
||||
DEFAULT = MEDIUM
|
||||
|
||||
|
||||
from .blosc2_ext import (
|
||||
DEFINED_CODECS_STOP,
|
||||
EXTENDED_HEADER_LENGTH,
|
||||
GLOBAL_REGISTERED_CODECS_STOP,
|
||||
MAX_BLOCKSIZE,
|
||||
MAX_BUFFERSIZE,
|
||||
MAX_DIM,
|
||||
MAX_OVERHEAD,
|
||||
MAX_TYPESIZE,
|
||||
MIN_HEADER_LENGTH,
|
||||
USER_REGISTERED_CODECS_STOP,
|
||||
VERSION_DATE,
|
||||
VERSION_STRING,
|
||||
)
|
||||
|
||||
DEFINED_CODECS_STOP = DEFINED_CODECS_STOP
|
||||
"""
|
||||
Maximum possible Blosc2-defined codec id."""
|
||||
|
||||
GLOBAL_REGISTERED_CODECS_STOP = GLOBAL_REGISTERED_CODECS_STOP
|
||||
"""
|
||||
Maximum possible Blosc2 global registered codec id."""
|
||||
|
||||
USER_REGISTERED_CODECS_STOP = USER_REGISTERED_CODECS_STOP
|
||||
"""
|
||||
Maximum possible Blosc2 user registered codec id."""
|
||||
|
||||
EXTENDED_HEADER_LENGTH = EXTENDED_HEADER_LENGTH
|
||||
"""
|
||||
Blosc2 extended header length in bytes."""
|
||||
|
||||
MAX_BUFFERSIZE = MAX_BUFFERSIZE
|
||||
"""
|
||||
Maximum buffer size in bytes for a Blosc2 chunk."""
|
||||
|
||||
MAX_FAST_PATH_SIZE = 2**30
|
||||
"""
|
||||
Maximum size in bytes for a fast path evaluation.
|
||||
"""
|
||||
|
||||
MAX_OVERHEAD = MAX_OVERHEAD
|
||||
"""
|
||||
Maximum overhead during compression (in bytes). This is
|
||||
equal to :py:obj:`blosc2.EXTENDED_HEADER_LENGTH <EXTENDED_HEADER_LENGTH>`."""
|
||||
|
||||
MAX_TYPESIZE = MAX_TYPESIZE
|
||||
"""
|
||||
Blosc2 maximum type size (in bytes)."""
|
||||
|
||||
MIN_HEADER_LENGTH = MIN_HEADER_LENGTH
|
||||
"""
|
||||
Blosc2 minimum header length (in bytes)."""
|
||||
|
||||
VERSION_DATE = VERSION_DATE
|
||||
"""
|
||||
The C-Blosc2 version's date."""
|
||||
|
||||
VERSION_STRING = VERSION_STRING
|
||||
"""
|
||||
The C-Blosc2 version's string."""
|
||||
|
||||
if IS_WASM:
|
||||
from ._wasm_jit import init_wasm_jit_helpers
|
||||
|
||||
_WASM_MINIEXPR_ENABLED = init_wasm_jit_helpers()
|
||||
|
||||
|
||||
# For array-api compatibility
|
||||
iinfo = np.iinfo
|
||||
finfo = np.finfo
|
||||
|
||||
|
||||
def isdtype(a_dtype: np.dtype, kind: str | np.dtype | tuple):
|
||||
"""
|
||||
Returns a boolean indicating whether a provided dtype is of a specified data type "kind".
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dtype: dtype
|
||||
The input dtype.
|
||||
|
||||
kind: str | dtype | Tuple[str, dtype]
|
||||
Data type kind.
|
||||
|
||||
If kind is a dtype, return boolean indicating whether the input dtype is equal to the dtype specified by kind.
|
||||
|
||||
If kind is a string, return boolean indicating whether the input dtype is of a specified data type kind.
|
||||
The following dtype kinds are supporte:
|
||||
|
||||
* 'bool': boolean data types (e.g., bool).
|
||||
|
||||
* 'signed integer': signed integer data types (e.g., int8, int16, int32, int64).
|
||||
|
||||
* 'unsigned integer': unsigned integer data types (e.g., uint8, uint16, uint32, uint64).
|
||||
|
||||
* 'integral': integer data types. Shorthand for ('signed integer', 'unsigned integer').
|
||||
|
||||
* 'real floating': real-valued floating-point data types (e.g., float32, float64).
|
||||
|
||||
* 'complex floating': complex floating-point data types (e.g., complex64, complex128).
|
||||
|
||||
* 'numeric': numeric data types. Shorthand for ('integral', 'real floating', 'complex floating').
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: bool
|
||||
Boolean indicating whether a provided dtype is of a specified data type kind.
|
||||
"""
|
||||
kind = (kind,) if not isinstance(kind, tuple) else kind
|
||||
for _ in kind:
|
||||
if a_dtype == kind:
|
||||
return True
|
||||
|
||||
_complex, _signedint, _uint, _rfloat = False, False, False, False
|
||||
if a_dtype in (complex64, complex128):
|
||||
_complex = True
|
||||
if "complex floating" in kind:
|
||||
return True
|
||||
if a_dtype == bool_ and "bool" in kind:
|
||||
return True
|
||||
if a_dtype in (int8, int16, int32, int64):
|
||||
_signedint = True
|
||||
if "signed integer" in kind:
|
||||
return True
|
||||
if a_dtype in (uint8, uint16, uint32, uint64):
|
||||
_uint = True
|
||||
if "unsigned integer" in kind:
|
||||
return True
|
||||
if a_dtype in (float16, float32, float64):
|
||||
_rfloat = True
|
||||
if "real floating" in kind:
|
||||
return True
|
||||
if "integral" in kind and (_signedint or _uint):
|
||||
return True
|
||||
return "numeric" in kind and (
|
||||
_signedint or _uint or _rfloat or _complex
|
||||
) # checked everything, otherwise False
|
||||
|
||||
|
||||
# dtypes for array-api
|
||||
str_ = np.str_
|
||||
bytes_ = np.bytes_
|
||||
object_ = np.object_
|
||||
|
||||
from numpy import (
|
||||
bool_,
|
||||
complex64,
|
||||
complex128,
|
||||
e,
|
||||
euler_gamma,
|
||||
float16,
|
||||
float32,
|
||||
float64,
|
||||
inf,
|
||||
int8,
|
||||
int16,
|
||||
int32,
|
||||
int64,
|
||||
nan,
|
||||
newaxis,
|
||||
pi,
|
||||
uint8,
|
||||
uint16,
|
||||
uint32,
|
||||
uint64,
|
||||
)
|
||||
|
||||
bool = bool
|
||||
|
||||
DEFAULT_COMPLEX = complex128
|
||||
"""
|
||||
Default complex floating dtype."""
|
||||
|
||||
DEFAULT_FLOAT = float64
|
||||
"""
|
||||
Default real floating dtype."""
|
||||
|
||||
DEFAULT_INT = int64
|
||||
"""
|
||||
Default integer dtype."""
|
||||
|
||||
DEFAULT_INDEX = int64
|
||||
"""
|
||||
Default indexing dtype."""
|
||||
|
||||
|
||||
class Info:
|
||||
def __init__(self, **kwargs):
|
||||
for key, value in kwargs.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
|
||||
def __array_namespace_info__() -> Info:
|
||||
"""
|
||||
Return information about the array namespace following the Array API specification.
|
||||
"""
|
||||
|
||||
def _raise(exc):
|
||||
raise exc
|
||||
|
||||
return Info(
|
||||
capabilities=lambda: {
|
||||
"boolean indexing": True,
|
||||
"data-dependent shapes": False,
|
||||
"max dimensions": MAX_DIM,
|
||||
},
|
||||
default_device=lambda: "cpu",
|
||||
default_dtypes=lambda device=None: (
|
||||
{
|
||||
"real floating": DEFAULT_FLOAT,
|
||||
"complex floating": DEFAULT_COMPLEX,
|
||||
"integral": DEFAULT_INT,
|
||||
"indexing": DEFAULT_INDEX,
|
||||
}
|
||||
if (device == "cpu" or device is None)
|
||||
else _raise(ValueError("Only cpu devices allowed"))
|
||||
),
|
||||
dtypes=lambda device=None, kind=None: (
|
||||
np.__array_namespace_info__().dtypes(kind=kind, device=device)
|
||||
if (device == "cpu" or device is None)
|
||||
else _raise(ValueError("Only cpu devices allowed"))
|
||||
),
|
||||
devices=lambda: ["cpu"],
|
||||
name="blosc2",
|
||||
version=__version__,
|
||||
)
|
||||
|
||||
|
||||
# Public API for container module
|
||||
from .core import (
|
||||
clib_info,
|
||||
compress,
|
||||
compress2,
|
||||
compressor_list,
|
||||
compute_chunks_blocks,
|
||||
decompress,
|
||||
decompress2,
|
||||
detect_number_of_cores,
|
||||
free_resources,
|
||||
from_cframe,
|
||||
get_blocksize,
|
||||
get_cbuffer_sizes,
|
||||
get_clib,
|
||||
get_compressor,
|
||||
get_cpu_info,
|
||||
load_array,
|
||||
load_tensor,
|
||||
ndarray_from_cframe,
|
||||
pack,
|
||||
pack_array,
|
||||
pack_array2,
|
||||
pack_tensor,
|
||||
print_versions,
|
||||
register_codec,
|
||||
register_filter,
|
||||
remove_urlpath,
|
||||
save_array,
|
||||
save_tensor,
|
||||
schunk_from_cframe,
|
||||
set_blocksize,
|
||||
set_compressor,
|
||||
set_nthreads,
|
||||
set_releasegil,
|
||||
unpack,
|
||||
unpack_array,
|
||||
unpack_array2,
|
||||
unpack_tensor,
|
||||
)
|
||||
|
||||
# Internal Blosc threading
|
||||
# Get CPU info
|
||||
cpu_info = get_cpu_info()
|
||||
nthreads = ncores = cpu_info.get("count", 1)
|
||||
"""Number of threads to be used in compression/decompression.
|
||||
"""
|
||||
# Protection against too many threads
|
||||
nthreads = min(nthreads, 64)
|
||||
|
||||
if IS_WASM:
|
||||
nthreads = 1
|
||||
# Keep C-side runtime in sync with Python-level default in wasm32.
|
||||
set_nthreads(1)
|
||||
else:
|
||||
# Experiments say that, when using a large number of threads, it is better to not use them all
|
||||
if nthreads > 16:
|
||||
nthreads -= nthreads // 8
|
||||
# Only call set_num_threads if within NUMEXPR_MAX_THREADS limit to avoid warning
|
||||
numexpr_max_env = os.environ.get("NUMEXPR_MAX_THREADS")
|
||||
numexpr_max: int | None = None
|
||||
if numexpr_max_env is not None:
|
||||
with contextlib.suppress(ValueError):
|
||||
numexpr_max = int(numexpr_max_env)
|
||||
if numexpr_max is None or nthreads <= numexpr_max:
|
||||
numexpr.set_num_threads(nthreads)
|
||||
|
||||
# This import must be before ndarray and schunk
|
||||
from .storage import ( # noqa: I001
|
||||
CParams,
|
||||
cparams_dflts,
|
||||
DParams,
|
||||
dparams_dflts,
|
||||
Storage,
|
||||
storage_dflts,
|
||||
)
|
||||
|
||||
from .ndarray import (
|
||||
Array,
|
||||
NDArray,
|
||||
NDField,
|
||||
Operand,
|
||||
are_partitions_aligned,
|
||||
are_partitions_behaved,
|
||||
arange,
|
||||
broadcast_to,
|
||||
linspace,
|
||||
eye,
|
||||
asarray,
|
||||
astype,
|
||||
indices,
|
||||
sort,
|
||||
reshape,
|
||||
copy,
|
||||
concat,
|
||||
expand_dims,
|
||||
empty,
|
||||
empty_like,
|
||||
frombuffer,
|
||||
fromiter,
|
||||
get_slice_nchunks,
|
||||
meshgrid,
|
||||
nans,
|
||||
uninit,
|
||||
zeros,
|
||||
zeros_like,
|
||||
ones,
|
||||
ones_like,
|
||||
full,
|
||||
full_like,
|
||||
save,
|
||||
stack,
|
||||
)
|
||||
from .embed_store import EmbedStore, estore_from_cframe
|
||||
from .dict_store import DictStore
|
||||
from .tree_store import TreeStore
|
||||
|
||||
from .c2array import c2context, C2Array, URLPath
|
||||
|
||||
from .dsl_kernel import DSLSyntaxError, DSLKernel, dsl_kernel, validate_dsl
|
||||
from .lazyexpr import (
|
||||
LazyExpr,
|
||||
lazyudf,
|
||||
lazyexpr,
|
||||
LazyArray,
|
||||
LazyUDF,
|
||||
_open_lazyarray,
|
||||
get_expr_operands,
|
||||
validate_expr,
|
||||
evaluate,
|
||||
result_type,
|
||||
can_cast,
|
||||
)
|
||||
from .proxy import Proxy, ProxySource, ProxyNDSource, ProxyNDField, SimpleProxy, jit, as_simpleproxy
|
||||
|
||||
from .schunk import SChunk, open
|
||||
from . import linalg
|
||||
from .linalg import tensordot, vecdot, permute_dims, matrix_transpose, matmul, transpose, diagonal, outer
|
||||
from .utils import linalg_funcs as linalg_funcs_list
|
||||
from . import fft
|
||||
|
||||
# Registry for postfilters
|
||||
postfilter_funcs = {}
|
||||
"""
|
||||
Registry for postfilter functions. For more info see
|
||||
:func:`SChunk.postfilter <blosc2.schunk.SChunk.postfilter>`"""
|
||||
# Registry for prefilters
|
||||
prefilter_funcs = {}
|
||||
"""
|
||||
Registry for prefilter functions. For more info see
|
||||
:func:`SChunk.prefilter <blosc2.schunk.SChunk.prefilter>`"""
|
||||
|
||||
# Registry for user-defined codecs
|
||||
ucodecs_registry = {}
|
||||
"""
|
||||
Registry for user-defined codecs. For more info see
|
||||
:func:`blosc2.register_codec <blosc2.register_codec>`"""
|
||||
# Registry for user-defined filters
|
||||
ufilters_registry = {}
|
||||
"""
|
||||
Registry for user-defined filters. For more info see
|
||||
:func:`blosc2.register_filter <blosc2.register_filter>`"""
|
||||
|
||||
blosclib_version = f"{VERSION_STRING} ({VERSION_DATE})"
|
||||
"""
|
||||
The blosc2 version + date.
|
||||
"""
|
||||
|
||||
# Private global variables
|
||||
_disable_overloaded_equal = False
|
||||
"""
|
||||
Disable the overloaded equal operator.
|
||||
"""
|
||||
|
||||
# Delayed imports for avoiding overwriting of python builtins
|
||||
from .ndarray import (
|
||||
abs,
|
||||
acos,
|
||||
acosh,
|
||||
add,
|
||||
all,
|
||||
any,
|
||||
arccos,
|
||||
arccosh,
|
||||
arcsin,
|
||||
arcsinh,
|
||||
arctan,
|
||||
arctan2,
|
||||
arctanh,
|
||||
argmax,
|
||||
argmin,
|
||||
array_from_ffi_ptr,
|
||||
asin,
|
||||
asinh,
|
||||
atan,
|
||||
atan2,
|
||||
atanh,
|
||||
bitwise_and,
|
||||
bitwise_invert,
|
||||
bitwise_left_shift,
|
||||
bitwise_or,
|
||||
bitwise_right_shift,
|
||||
bitwise_xor,
|
||||
ceil,
|
||||
clip,
|
||||
conj,
|
||||
contains,
|
||||
copysign,
|
||||
cos,
|
||||
cosh,
|
||||
count_nonzero,
|
||||
cumulative_prod,
|
||||
cumulative_sum,
|
||||
divide,
|
||||
endswith,
|
||||
equal,
|
||||
exp,
|
||||
expm1,
|
||||
floor,
|
||||
floor_divide,
|
||||
greater,
|
||||
greater_equal,
|
||||
hypot,
|
||||
imag,
|
||||
isfinite,
|
||||
isinf,
|
||||
isnan,
|
||||
lazywhere,
|
||||
less,
|
||||
less_equal,
|
||||
log,
|
||||
log1p,
|
||||
log2,
|
||||
log10,
|
||||
logaddexp,
|
||||
logical_and,
|
||||
logical_not,
|
||||
logical_or,
|
||||
logical_xor,
|
||||
lower,
|
||||
max,
|
||||
maximum,
|
||||
mean,
|
||||
min,
|
||||
minimum,
|
||||
multiply,
|
||||
negative,
|
||||
nextafter,
|
||||
not_equal,
|
||||
positive,
|
||||
pow,
|
||||
prod,
|
||||
real,
|
||||
reciprocal,
|
||||
remainder,
|
||||
round,
|
||||
sign,
|
||||
signbit,
|
||||
sin,
|
||||
sinh,
|
||||
sqrt,
|
||||
square,
|
||||
squeeze,
|
||||
startswith,
|
||||
std,
|
||||
subtract,
|
||||
sum,
|
||||
take,
|
||||
take_along_axis,
|
||||
tan,
|
||||
tanh,
|
||||
trunc,
|
||||
upper,
|
||||
var,
|
||||
where,
|
||||
)
|
||||
|
||||
__all__ = [ # noqa : RUF022
|
||||
# Constants
|
||||
"EXTENDED_HEADER_LENGTH",
|
||||
"MAX_BUFFERSIZE",
|
||||
"MAX_TYPESIZE",
|
||||
"MIN_HEADER_LENGTH",
|
||||
"VERSION_DATE",
|
||||
"VERSION_STRING",
|
||||
# Default dtypes
|
||||
"DEFAULT_COMPLEX",
|
||||
"DEFAULT_FLOAT",
|
||||
"DEFAULT_INDEX",
|
||||
"DEFAULT_INT",
|
||||
# Mathematical constants
|
||||
"e",
|
||||
"pi",
|
||||
"inf",
|
||||
"nan",
|
||||
"newaxis",
|
||||
# Classes
|
||||
"C2Array",
|
||||
"CParams",
|
||||
# Enums
|
||||
"Codec",
|
||||
"DParams",
|
||||
"DictStore",
|
||||
"EmbedStore",
|
||||
"Filter",
|
||||
"LazyArray",
|
||||
"DSLKernel",
|
||||
"DSLSyntaxError",
|
||||
"LazyExpr",
|
||||
"LazyUDF",
|
||||
"NDArray",
|
||||
"NDField",
|
||||
"Operand",
|
||||
"Proxy",
|
||||
"ProxyNDField",
|
||||
"ProxyNDSource",
|
||||
"ProxySource",
|
||||
"SChunk",
|
||||
"SimpleProxy",
|
||||
"SpecialValue",
|
||||
"SplitMode",
|
||||
"Storage",
|
||||
"TreeStore",
|
||||
"Tuner",
|
||||
"URLPath",
|
||||
# Version
|
||||
"__version__",
|
||||
# Utils
|
||||
"linalg_funcs_list",
|
||||
# Functions
|
||||
"abs",
|
||||
"acos",
|
||||
"acosh",
|
||||
"add",
|
||||
"all",
|
||||
"any",
|
||||
"arange",
|
||||
"arccos",
|
||||
"arccosh",
|
||||
"arcsin",
|
||||
"arcsinh",
|
||||
"arctan",
|
||||
"arctan2",
|
||||
"arctanh",
|
||||
"are_partitions_aligned",
|
||||
"are_partitions_behaved",
|
||||
"argmax",
|
||||
"argmin",
|
||||
"array_from_ffi_ptr",
|
||||
"asarray",
|
||||
"asin",
|
||||
"asinh",
|
||||
"as_simpleproxy",
|
||||
"astype",
|
||||
"atan",
|
||||
"atan2",
|
||||
"atanh",
|
||||
"bitwise_and",
|
||||
"bitwise_invert",
|
||||
"bitwise_left_shift",
|
||||
"bitwise_or",
|
||||
"bitwise_right_shift",
|
||||
"bitwise_xor",
|
||||
"broadcast_to",
|
||||
"can_cast",
|
||||
"ceil",
|
||||
"clib_info",
|
||||
"clip",
|
||||
"compress",
|
||||
"compress2",
|
||||
"compressor_list",
|
||||
"compute_chunks_blocks",
|
||||
"concat",
|
||||
"conj",
|
||||
"contains",
|
||||
"copy",
|
||||
"copysign",
|
||||
"cos",
|
||||
"cosh",
|
||||
"count_nonzero",
|
||||
"cparams_dflts",
|
||||
"cpu_info",
|
||||
"cumulative_prod",
|
||||
"cumulative_sum",
|
||||
"decompress",
|
||||
"decompress2",
|
||||
"detect_number_of_cores",
|
||||
"divide",
|
||||
"dparams_dflts",
|
||||
"endswith",
|
||||
"empty",
|
||||
"empty_like",
|
||||
"equal",
|
||||
"estore_from_cframe",
|
||||
"exp",
|
||||
"expand_dims",
|
||||
"expm1",
|
||||
"eye",
|
||||
"finfo",
|
||||
"floor",
|
||||
"floor_divide",
|
||||
"free_resources",
|
||||
"from_cframe",
|
||||
"frombuffer",
|
||||
"fromiter",
|
||||
"full",
|
||||
"full_like",
|
||||
"get_blocksize",
|
||||
"get_cbuffer_sizes",
|
||||
"get_clib",
|
||||
"get_compressor",
|
||||
"get_cpu_info",
|
||||
"get_expr_operands",
|
||||
"get_slice_nchunks",
|
||||
"greater",
|
||||
"greater_equal",
|
||||
"hypot",
|
||||
"imag",
|
||||
"iinfo",
|
||||
"indices",
|
||||
"isdtype",
|
||||
"isfinite",
|
||||
"isinf",
|
||||
"isnan",
|
||||
"jit",
|
||||
"lazyexpr",
|
||||
"dsl_kernel",
|
||||
"validate_dsl",
|
||||
"lazyudf",
|
||||
"lazywhere",
|
||||
"less",
|
||||
"less_equal",
|
||||
"linspace",
|
||||
"load_array",
|
||||
"load_tensor",
|
||||
"log",
|
||||
"log1p",
|
||||
"log2",
|
||||
"log10",
|
||||
"logaddexp",
|
||||
"logical_and",
|
||||
"logical_not",
|
||||
"logical_or",
|
||||
"logical_xor",
|
||||
"lower",
|
||||
"matmul",
|
||||
"matrix_transpose",
|
||||
"max",
|
||||
"maximum",
|
||||
"mean",
|
||||
"meshgrid",
|
||||
"min",
|
||||
"minimum",
|
||||
"multiply",
|
||||
"nans",
|
||||
"ndarray_from_cframe",
|
||||
"negative",
|
||||
"nextafter",
|
||||
"not_equal",
|
||||
"ones",
|
||||
"ones_like",
|
||||
"open",
|
||||
"pack",
|
||||
"pack_array",
|
||||
"pack_array2",
|
||||
"pack_tensor",
|
||||
"permute_dims",
|
||||
"positive",
|
||||
"postfilter_funcs",
|
||||
"pow",
|
||||
"prefilter_funcs",
|
||||
"print_versions",
|
||||
"prod",
|
||||
"real",
|
||||
"reciprocal",
|
||||
"register_codec",
|
||||
"register_filter",
|
||||
"remainder",
|
||||
"remove_urlpath",
|
||||
"reshape",
|
||||
"result_type",
|
||||
"round",
|
||||
"save",
|
||||
"save_array",
|
||||
"save_tensor",
|
||||
"schunk_from_cframe",
|
||||
"set_blocksize",
|
||||
"set_compressor",
|
||||
"set_nthreads",
|
||||
"set_releasegil",
|
||||
"sign",
|
||||
"signbit",
|
||||
"sin",
|
||||
"sinh",
|
||||
"sort",
|
||||
"sqrt",
|
||||
"square",
|
||||
"squeeze",
|
||||
"stack",
|
||||
"startswith",
|
||||
"std",
|
||||
"storage_dflts",
|
||||
"subtract",
|
||||
"sum",
|
||||
"take",
|
||||
"take_along_axis",
|
||||
"tan",
|
||||
"tanh",
|
||||
"tensordot",
|
||||
"transpose",
|
||||
"trunc",
|
||||
"uninit",
|
||||
"unpack",
|
||||
"unpack_array",
|
||||
"unpack_array2",
|
||||
"unpack_tensor",
|
||||
"upper",
|
||||
"validate_expr",
|
||||
"var",
|
||||
"vecdot",
|
||||
"where",
|
||||
"zeros",
|
||||
"zeros_like",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,627 @@
|
||||
#######################################################################
|
||||
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#######################################################################
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
_HELPERS_REGISTERED = False
|
||||
|
||||
_REGISTER_HELPERS_JS = r"""
|
||||
(() => {
|
||||
const g = globalThis;
|
||||
if (g.__blosc2_me_jit_helper_ptrs) {
|
||||
return g.__blosc2_me_jit_helper_ptrs;
|
||||
}
|
||||
|
||||
const candidates = [];
|
||||
const addCandidate = (name, obj) => {
|
||||
if (!obj || (typeof obj !== "object" && typeof obj !== "function")) {
|
||||
return;
|
||||
}
|
||||
candidates.push({ name, obj });
|
||||
};
|
||||
const addDerivedCandidates = (baseName, obj) => {
|
||||
if (!obj || (typeof obj !== "object" && typeof obj !== "function")) {
|
||||
return;
|
||||
}
|
||||
addCandidate(`${baseName}._module`, obj._module);
|
||||
addCandidate(`${baseName}.module`, obj.module);
|
||||
addCandidate(`${baseName}.Module`, obj.Module);
|
||||
addCandidate(`${baseName}.asm`, obj.asm);
|
||||
addCandidate(`${baseName}.wasmExports`, obj.wasmExports);
|
||||
addCandidate(`${baseName}.wasm`, obj.wasm);
|
||||
addCandidate(`${baseName}.__wasm`, obj.__wasm);
|
||||
addCandidate(`${baseName}.pyodide`, obj.pyodide);
|
||||
addCandidate(`${baseName}._api`, obj._api);
|
||||
};
|
||||
|
||||
addCandidate("globalThis", g);
|
||||
addCandidate("globalThis.Module", g.Module);
|
||||
addCandidate("globalThis.__blosc2_pyodide_module", g.__blosc2_pyodide_module);
|
||||
addCandidate("globalThis.__blosc2_pyodide_api", g.__blosc2_pyodide_api);
|
||||
addCandidate("globalThis.pyodide", g.pyodide);
|
||||
addCandidate("globalThis.pyodide._module", g.pyodide && g.pyodide._module);
|
||||
addCandidate("globalThis.pyodide.module", g.pyodide && g.pyodide.module);
|
||||
addCandidate("globalThis.pyodide.Module", g.pyodide && g.pyodide.Module);
|
||||
addCandidate("globalThis.pyodide._api", g.pyodide && g.pyodide._api);
|
||||
addCandidate("globalThis.pyodide._api._module", g.pyodide && g.pyodide._api && g.pyodide._api._module);
|
||||
addCandidate("globalThis.pyodide._api.Module", g.pyodide && g.pyodide._api && g.pyodide._api.Module);
|
||||
addDerivedCandidates("globalThis", g);
|
||||
addDerivedCandidates("globalThis.pyodide", g.pyodide);
|
||||
addDerivedCandidates("globalThis.__blosc2_pyodide_module", g.__blosc2_pyodide_module);
|
||||
addDerivedCandidates("globalThis.__blosc2_pyodide_api", g.__blosc2_pyodide_api);
|
||||
|
||||
const resolve = (name) => {
|
||||
for (const cand of candidates) {
|
||||
let value;
|
||||
try {
|
||||
value = cand.obj[name];
|
||||
} catch (_e) {
|
||||
value = undefined;
|
||||
}
|
||||
if (value !== undefined && value !== null) {
|
||||
if (typeof value === "function") {
|
||||
return value.bind(cand.obj);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
}
|
||||
if (g[name] !== undefined && g[name] !== null) {
|
||||
return g[name];
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const wasmExports = resolve("wasmExports") || resolve("exports");
|
||||
const asmObj = resolve("asm");
|
||||
|
||||
const isWasmMemory = (value) =>
|
||||
typeof WebAssembly !== "undefined" &&
|
||||
typeof WebAssembly.Memory !== "undefined" &&
|
||||
value instanceof WebAssembly.Memory;
|
||||
const isWasmTable = (value) =>
|
||||
typeof WebAssembly !== "undefined" &&
|
||||
typeof WebAssembly.Table !== "undefined" &&
|
||||
value instanceof WebAssembly.Table;
|
||||
const heapU8ForProbe = resolve("HEAPU8");
|
||||
const heapBufferForProbe = heapU8ForProbe && heapU8ForProbe.buffer ? heapU8ForProbe.buffer : null;
|
||||
const heapBufferLenForProbe =
|
||||
heapBufferForProbe && typeof heapBufferForProbe.byteLength === "number"
|
||||
? heapBufferForProbe.byteLength
|
||||
: -1;
|
||||
|
||||
const isMemoryLike = (value) => {
|
||||
if (!value) {
|
||||
return false;
|
||||
}
|
||||
if (isWasmMemory(value)) {
|
||||
return true;
|
||||
}
|
||||
let buf = null;
|
||||
try {
|
||||
buf = value.buffer;
|
||||
} catch (_e) {
|
||||
buf = null;
|
||||
}
|
||||
if (!buf || typeof buf.byteLength !== "number") {
|
||||
return false;
|
||||
}
|
||||
if (typeof value.grow !== "function") {
|
||||
return false;
|
||||
}
|
||||
if (heapBufferForProbe && buf !== heapBufferForProbe) {
|
||||
const bufLen = typeof buf.byteLength === "number" ? buf.byteLength : -1;
|
||||
if (heapBufferLenForProbe > 0 && bufLen > 0 && bufLen < heapBufferLenForProbe) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
const isTableLike = (value) => {
|
||||
if (!value) {
|
||||
return false;
|
||||
}
|
||||
if (isWasmTable(value)) {
|
||||
return true;
|
||||
}
|
||||
return (
|
||||
typeof value.get === "function" &&
|
||||
typeof value.grow === "function" &&
|
||||
typeof value.length === "number"
|
||||
);
|
||||
};
|
||||
|
||||
const findMemoryOrTableByType = (wantMemory) => {
|
||||
const isObj = (v) => v && (typeof v === "object" || typeof v === "function");
|
||||
const seen = new Set();
|
||||
const queue = [];
|
||||
const maxDepth = 6;
|
||||
const maxVisited = 5000;
|
||||
|
||||
for (const cand of candidates) {
|
||||
if (isObj(cand.obj)) {
|
||||
queue.push({ value: cand.obj, depth: 0 });
|
||||
}
|
||||
}
|
||||
|
||||
while (queue.length > 0 && seen.size < maxVisited) {
|
||||
const node = queue.shift();
|
||||
const obj = node.value;
|
||||
const depth = node.depth;
|
||||
if (!isObj(obj) || seen.has(obj)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(obj);
|
||||
|
||||
if (wantMemory && isMemoryLike(obj)) {
|
||||
return obj;
|
||||
}
|
||||
if (!wantMemory && isTableLike(obj)) {
|
||||
return obj;
|
||||
}
|
||||
if (depth >= maxDepth) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let keys = [];
|
||||
try {
|
||||
keys = Object.getOwnPropertyNames(obj);
|
||||
} catch (_e) {
|
||||
keys = [];
|
||||
}
|
||||
let symKeys = [];
|
||||
try {
|
||||
symKeys = Object.getOwnPropertySymbols(obj);
|
||||
} catch (_e) {
|
||||
symKeys = [];
|
||||
}
|
||||
const allKeys = keys.concat(symKeys);
|
||||
|
||||
for (const key of allKeys) {
|
||||
let value;
|
||||
try {
|
||||
value = obj[key];
|
||||
} catch (_e) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wantMemory && isMemoryLike(value)) {
|
||||
return value;
|
||||
}
|
||||
if (!wantMemory && isTableLike(value)) {
|
||||
return value;
|
||||
}
|
||||
if (isObj(value)) {
|
||||
if (wantMemory && isMemoryLike(value.memory)) {
|
||||
return value.memory;
|
||||
}
|
||||
if (!wantMemory && isTableLike(value.__indirect_function_table)) {
|
||||
return value.__indirect_function_table;
|
||||
}
|
||||
queue.push({ value, depth: depth + 1 });
|
||||
}
|
||||
}
|
||||
|
||||
let proto = null;
|
||||
try {
|
||||
proto = Object.getPrototypeOf(obj);
|
||||
} catch (_e) {
|
||||
proto = null;
|
||||
}
|
||||
if (isObj(proto)) {
|
||||
queue.push({ value: proto, depth: depth + 1 });
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
const captureMemoryViaGrowHook = () => {
|
||||
if (
|
||||
typeof WebAssembly === "undefined" ||
|
||||
typeof WebAssembly.Memory === "undefined" ||
|
||||
!WebAssembly.Memory.prototype ||
|
||||
typeof WebAssembly.Memory.prototype.grow !== "function"
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const growMemory = resolve("growMemory");
|
||||
const resizeHeap = resolve("_emscripten_resize_heap");
|
||||
if (typeof growMemory !== "function" && typeof resizeHeap !== "function") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const heapU8 = resolve("HEAPU8");
|
||||
const currentBytes =
|
||||
heapU8 && heapU8.buffer && typeof heapU8.buffer.byteLength === "number"
|
||||
? heapU8.buffer.byteLength
|
||||
: 0;
|
||||
if (currentBytes <= 0) {
|
||||
return null;
|
||||
}
|
||||
const onePage = 64 * 1024;
|
||||
let targetBytes = currentBytes + onePage;
|
||||
const getHeapMax = resolve("getHeapMax");
|
||||
if (typeof getHeapMax === "function") {
|
||||
try {
|
||||
const maxBytes = getHeapMax();
|
||||
if (typeof maxBytes === "number" && maxBytes > 0) {
|
||||
targetBytes = Math.min(targetBytes, maxBytes);
|
||||
}
|
||||
} catch (_e) {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
if (targetBytes <= currentBytes) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let captured = null;
|
||||
const originalGrow = WebAssembly.Memory.prototype.grow;
|
||||
WebAssembly.Memory.prototype.grow = function patchedGrow(pages) {
|
||||
captured = this;
|
||||
return originalGrow.call(this, pages);
|
||||
};
|
||||
|
||||
try {
|
||||
if (typeof growMemory === "function") {
|
||||
growMemory(targetBytes);
|
||||
} else if (typeof resizeHeap === "function") {
|
||||
resizeHeap(targetBytes);
|
||||
}
|
||||
} catch (_e) {
|
||||
/* best effort only */
|
||||
} finally {
|
||||
WebAssembly.Memory.prototype.grow = originalGrow;
|
||||
}
|
||||
|
||||
if (captured && isMemoryLike(captured)) {
|
||||
return captured;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const deriveRuntimeFromAdjustedImports = () => {
|
||||
for (const cand of candidates) {
|
||||
const obj = cand.obj;
|
||||
if (!obj || typeof obj.adjustWasmImports !== "function") {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const importsObj = { env: {} };
|
||||
const adjustedMaybe = obj.adjustWasmImports(importsObj);
|
||||
const adjusted =
|
||||
adjustedMaybe && (typeof adjustedMaybe === "object" || typeof adjustedMaybe === "function")
|
||||
? adjustedMaybe
|
||||
: importsObj;
|
||||
const env =
|
||||
(adjusted && adjusted.env) ||
|
||||
(importsObj && importsObj.env) ||
|
||||
null;
|
||||
if (!env) {
|
||||
continue;
|
||||
}
|
||||
const mem =
|
||||
env.memory ||
|
||||
env.wasmMemory ||
|
||||
(adjusted && (adjusted.memory || adjusted.wasmMemory)) ||
|
||||
null;
|
||||
const tbl =
|
||||
env.__indirect_function_table ||
|
||||
env.wasmTable ||
|
||||
(adjusted && (adjusted.__indirect_function_table || adjusted.wasmTable)) ||
|
||||
null;
|
||||
if (mem || tbl) {
|
||||
return { memory: mem, table: tbl };
|
||||
}
|
||||
} catch (_e) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const adjustedRuntime = deriveRuntimeFromAdjustedImports();
|
||||
|
||||
const wasmMemory =
|
||||
resolve("wasmMemory") ||
|
||||
resolve("memory") ||
|
||||
resolve("wasmMemoryObject") ||
|
||||
resolve("__wasmMemory") ||
|
||||
(asmObj && asmObj.memory) ||
|
||||
(asmObj && asmObj.wasmMemory) ||
|
||||
(wasmExports && wasmExports.memory) ||
|
||||
(adjustedRuntime && adjustedRuntime.memory) ||
|
||||
captureMemoryViaGrowHook() ||
|
||||
findMemoryOrTableByType(true) ||
|
||||
null;
|
||||
const wasmTable =
|
||||
resolve("wasmTable") ||
|
||||
resolve("__indirect_function_table") ||
|
||||
(asmObj && asmObj.__indirect_function_table) ||
|
||||
(asmObj && asmObj.wasmTable) ||
|
||||
(wasmExports && wasmExports.__indirect_function_table) ||
|
||||
(adjustedRuntime && adjustedRuntime.table) ||
|
||||
findMemoryOrTableByType(false) ||
|
||||
null;
|
||||
const runtime = {
|
||||
HEAPF32: resolve("HEAPF32"),
|
||||
HEAPF64: resolve("HEAPF64"),
|
||||
HEAPU8: heapU8ForProbe,
|
||||
wasmMemory,
|
||||
wasmTable,
|
||||
addFunction: resolve("addFunction"),
|
||||
removeFunction: resolve("removeFunction"),
|
||||
stackSave: resolve("stackSave"),
|
||||
stackAlloc: resolve("stackAlloc"),
|
||||
stackRestore: resolve("stackRestore"),
|
||||
lengthBytesUTF8: resolve("lengthBytesUTF8"),
|
||||
stringToUTF8: resolve("stringToUTF8"),
|
||||
err: resolve("err"),
|
||||
};
|
||||
|
||||
const required = [
|
||||
"HEAPF32",
|
||||
"HEAPF64",
|
||||
"HEAPU8",
|
||||
"wasmMemory",
|
||||
"wasmTable",
|
||||
"addFunction",
|
||||
"removeFunction",
|
||||
"stackSave",
|
||||
"stackAlloc",
|
||||
"stackRestore",
|
||||
"lengthBytesUTF8",
|
||||
"stringToUTF8",
|
||||
];
|
||||
const missing = required.filter((name) => !runtime[name]);
|
||||
if (missing.length > 0) {
|
||||
const aliasKeys = [
|
||||
"wasmMemory",
|
||||
"memory",
|
||||
"wasmExports",
|
||||
"asm",
|
||||
"__indirect_function_table",
|
||||
"wasmTable",
|
||||
"adjustWasmImports",
|
||||
];
|
||||
const keyRegex = /(mem|wasm|asm|module|heap)/i;
|
||||
const diag = candidates.map((cand) => {
|
||||
const have = required.filter((name) => {
|
||||
try {
|
||||
return !!cand.obj[name];
|
||||
} catch (_e) {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
const aliases = aliasKeys.filter((name) => {
|
||||
try {
|
||||
return cand.obj[name] !== undefined && cand.obj[name] !== null;
|
||||
} catch (_e) {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
let ownKeys = [];
|
||||
try {
|
||||
ownKeys = Object.getOwnPropertyNames(cand.obj);
|
||||
} catch (_e) {
|
||||
ownKeys = [];
|
||||
}
|
||||
const interesting = ownKeys.filter((k) => keyRegex.test(k)).slice(0, 20);
|
||||
return `${cand.name}=[${have.join(",")}],aliases=[${aliases.join(",")}],keys=[${interesting.join(",")}]`;
|
||||
}).join(" | ");
|
||||
return {
|
||||
instantiatePtr: 0,
|
||||
freePtr: 0,
|
||||
error: `missing runtime members: ${missing.join(", ")}; candidates: ${diag}`,
|
||||
};
|
||||
}
|
||||
|
||||
if (typeof g._meJitInstantiate !== "function" || typeof g._meJitFreeFn !== "function") {
|
||||
return { instantiatePtr: 0, freePtr: 0, error: "me_jit_glue exports unavailable" };
|
||||
}
|
||||
|
||||
const refreshRuntimeViews = () => {
|
||||
const updater = resolve("updateMemoryViews");
|
||||
if (typeof updater === "function") {
|
||||
try {
|
||||
updater();
|
||||
} catch (_e) {
|
||||
/* best effort only */
|
||||
}
|
||||
runtime.HEAPU8 = resolve("HEAPU8") || runtime.HEAPU8;
|
||||
runtime.HEAPF32 = resolve("HEAPF32") || runtime.HEAPF32;
|
||||
runtime.HEAPF64 = resolve("HEAPF64") || runtime.HEAPF64;
|
||||
}
|
||||
|
||||
const mem = runtime.wasmMemory;
|
||||
const buffer = mem && mem.buffer ? mem.buffer : null;
|
||||
if (!buffer || typeof buffer.byteLength !== "number" || buffer.byteLength === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const heapU8 = runtime.HEAPU8;
|
||||
if (!heapU8 || heapU8.buffer !== buffer || heapU8.byteLength === 0) {
|
||||
runtime.HEAPU8 = new Uint8Array(buffer);
|
||||
}
|
||||
const heapF32 = runtime.HEAPF32;
|
||||
if (!heapF32 || heapF32.buffer !== buffer || heapF32.byteLength === 0) {
|
||||
runtime.HEAPF32 = new Float32Array(buffer);
|
||||
}
|
||||
const heapF64 = runtime.HEAPF64;
|
||||
if (!heapF64 || heapF64.buffer !== buffer || heapF64.byteLength === 0) {
|
||||
runtime.HEAPF64 = new Float64Array(buffer);
|
||||
}
|
||||
|
||||
return runtime.HEAPU8;
|
||||
};
|
||||
|
||||
const instantiateWrapper = (wasmPtr, wasmLen, bridgeLookupFnIdx) => {
|
||||
const start = wasmPtr >>> 0;
|
||||
const len = wasmLen >>> 0;
|
||||
if (start === 0 || len === 0) {
|
||||
return 0;
|
||||
}
|
||||
const heapU8 = refreshRuntimeViews();
|
||||
if (!heapU8) {
|
||||
return 0;
|
||||
}
|
||||
const end = (start + len) >>> 0;
|
||||
if (end > heapU8.byteLength || end < start) {
|
||||
return 0;
|
||||
}
|
||||
const wasmBytes = new Uint8Array(len);
|
||||
wasmBytes.set(heapU8.subarray(start, end));
|
||||
return g._meJitInstantiate(runtime, wasmBytes, bridgeLookupFnIdx | 0) | 0;
|
||||
};
|
||||
const freeWrapper = (fnIdx) => {
|
||||
g._meJitFreeFn(runtime, fnIdx | 0);
|
||||
};
|
||||
|
||||
const instantiatePtr = runtime.addFunction(instantiateWrapper, "iiii");
|
||||
const freePtr = runtime.addFunction(freeWrapper, "vi");
|
||||
g.__blosc2_me_jit_helper_ptrs = {
|
||||
instantiatePtr,
|
||||
freePtr,
|
||||
instantiateWrapper,
|
||||
freeWrapper,
|
||||
runtime,
|
||||
};
|
||||
return g.__blosc2_me_jit_helper_ptrs;
|
||||
})()
|
||||
"""
|
||||
|
||||
|
||||
def _trace_enabled() -> bool:
|
||||
value = os.environ.get("ME_DSL_TRACE", "")
|
||||
return value.lower() in {"1", "true", "on", "yes"}
|
||||
|
||||
|
||||
def _trace(message: str) -> None:
|
||||
if _trace_enabled():
|
||||
print(f"[blosc2.wasm-jit] {message}")
|
||||
|
||||
|
||||
def _js_eval(js_mod, source: str):
|
||||
evaluator = getattr(js_mod, "eval", None)
|
||||
if evaluator is not None:
|
||||
return evaluator(source)
|
||||
return js_mod.globalThis.eval(source)
|
||||
|
||||
|
||||
def _load_glue_once(js_mod) -> bool:
|
||||
has_exports = _js_eval(
|
||||
js_mod,
|
||||
"typeof globalThis._meJitInstantiate === 'function' && "
|
||||
"typeof globalThis._meJitFreeFn === 'function'",
|
||||
)
|
||||
if bool(has_exports):
|
||||
return True
|
||||
|
||||
glue_path = Path(__file__).with_name("me_jit_glue.js")
|
||||
try:
|
||||
glue_source = glue_path.read_text(encoding="utf-8")
|
||||
except OSError as exc:
|
||||
_trace(f"could not read {glue_path.name}: {exc}")
|
||||
return False
|
||||
|
||||
try:
|
||||
_js_eval(js_mod, glue_source)
|
||||
except Exception as exc: # pragma: no cover - pyodide-specific error path
|
||||
_trace(f"failed to evaluate {glue_path.name}: {exc}")
|
||||
return False
|
||||
|
||||
has_exports = _js_eval(
|
||||
js_mod,
|
||||
"typeof globalThis._meJitInstantiate === 'function' && "
|
||||
"typeof globalThis._meJitFreeFn === 'function'",
|
||||
)
|
||||
return bool(has_exports)
|
||||
|
||||
|
||||
def _inject_pyodide_runtime_handles(js_mod) -> None:
|
||||
try:
|
||||
import pyodide_js
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
module_obj = None
|
||||
for name in ("_module", "module", "Module"):
|
||||
module_obj = getattr(pyodide_js, name, None)
|
||||
if module_obj is not None:
|
||||
break
|
||||
if module_obj is not None:
|
||||
js_mod.globalThis.__blosc2_pyodide_module = module_obj
|
||||
_trace("captured pyodide_js module handle")
|
||||
|
||||
api_obj = getattr(pyodide_js, "_api", None)
|
||||
if api_obj is not None:
|
||||
js_mod.globalThis.__blosc2_pyodide_api = api_obj
|
||||
_trace("captured pyodide_js API handle")
|
||||
|
||||
|
||||
def _create_helper_ptrs(js_mod) -> tuple[int, int] | None:
|
||||
try:
|
||||
result = _js_eval(js_mod, _REGISTER_HELPERS_JS)
|
||||
except Exception as exc: # pragma: no cover - pyodide-specific error path
|
||||
_trace(f"helper setup JS failed: {exc}")
|
||||
return None
|
||||
|
||||
try:
|
||||
instantiate_ptr = int(result.instantiatePtr)
|
||||
free_ptr = int(result.freePtr)
|
||||
except Exception as exc: # pragma: no cover - pyodide-specific error path
|
||||
_trace(f"unexpected helper setup result: {exc}")
|
||||
return None
|
||||
|
||||
if instantiate_ptr == 0 or free_ptr == 0:
|
||||
with_error = getattr(result, "error", None)
|
||||
if with_error:
|
||||
_trace(str(with_error))
|
||||
return None
|
||||
return instantiate_ptr, free_ptr
|
||||
|
||||
|
||||
def init_wasm_jit_helpers() -> bool:
|
||||
global _HELPERS_REGISTERED
|
||||
if _HELPERS_REGISTERED:
|
||||
return True
|
||||
|
||||
try:
|
||||
import js
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
from . import blosc2_ext
|
||||
|
||||
if not hasattr(blosc2_ext, "_register_wasm_jit_helpers"):
|
||||
_trace("extension does not expose _register_wasm_jit_helpers")
|
||||
return False
|
||||
|
||||
_inject_pyodide_runtime_handles(js)
|
||||
if not _load_glue_once(js):
|
||||
_trace("me_jit_glue.js was not loaded")
|
||||
return False
|
||||
|
||||
helper_ptrs = _create_helper_ptrs(js)
|
||||
if helper_ptrs is None:
|
||||
_trace("could not allocate addFunction helper pointers")
|
||||
return False
|
||||
|
||||
instantiate_ptr, free_ptr = helper_ptrs
|
||||
try:
|
||||
blosc2_ext._register_wasm_jit_helpers(instantiate_ptr, free_ptr)
|
||||
except Exception as exc: # pragma: no cover - pyodide-specific error path
|
||||
_trace(f"C helper registration failed: {exc}")
|
||||
return False
|
||||
_HELPERS_REGISTERED = True
|
||||
_trace(f"registered wasm JIT helper pointers instantiate={instantiate_ptr} free={free_ptr}")
|
||||
return True
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,465 @@
|
||||
#######################################################################
|
||||
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#######################################################################
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from contextlib import contextmanager
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
import blosc2
|
||||
from blosc2.info import InfoReporter
|
||||
|
||||
_subscriber_data = {
|
||||
"urlbase": os.environ.get("BLOSC_C2URLBASE"),
|
||||
"auth_token": "",
|
||||
}
|
||||
"""Caterva2 subscriber data saved by context manager."""
|
||||
|
||||
TIMEOUT = 15
|
||||
"""Default timeout for HTTP requests."""
|
||||
|
||||
|
||||
@contextmanager
|
||||
def c2context(
|
||||
*,
|
||||
urlbase: (str | None) = None,
|
||||
username: (str | None) = None,
|
||||
password: (str | None) = None,
|
||||
auth_token: (str | None) = None,
|
||||
) -> None:
|
||||
"""
|
||||
Context manager that sets parameters in Caterva2 subscriber requests.
|
||||
|
||||
A parameter not specified or set to ``None`` will inherit the value from the
|
||||
previous context manager, defaulting to an environment variable (see
|
||||
below) if supported by that parameter. Parameters set to an empty string
|
||||
will not be used in requests (without a default either).
|
||||
|
||||
If the subscriber requires authorization for requests, you can either
|
||||
provide an `auth_token` (which you should have obtained previously from the
|
||||
subscriber), or both `username` and `password` to obtain the token by
|
||||
logging in to the subscriber. The token will be reused until it is explicitly
|
||||
reset or requested again in a later context manager invocation.
|
||||
|
||||
Please note that this manager is reentrant but not safe for concurrent use.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
urlbase : str | None
|
||||
The base URL to be used when a C2Array instance does not have a subscriber
|
||||
URL base set. If not specified, it defaults to the value of the
|
||||
``BLOSC_C2URLBASE`` environment variable.
|
||||
username : str | None
|
||||
The username for logging in to the subscriber to obtain an authorization token.
|
||||
If not specified, it defaults to the value of the ``BLOSC_C2USERNAME`` environment variable.
|
||||
password : str | None
|
||||
The password for logging in to the subscriber to obtain an authorization token.
|
||||
If not specified, it defaults to the value of the ``BLOSC_C2PASSWORD`` environment variable.
|
||||
auth_token : str | None
|
||||
The authorization token to be used when a C2Array instance does not have an
|
||||
authorization token set.
|
||||
|
||||
Yields
|
||||
------
|
||||
out: None
|
||||
|
||||
"""
|
||||
global _subscriber_data
|
||||
print("_subscriber_data", _subscriber_data)
|
||||
|
||||
# Perform login to get an authorization token.
|
||||
if not auth_token:
|
||||
username = username or os.environ.get("BLOSC_C2USERNAME")
|
||||
password = password or os.environ.get("BLOSC_C2PASSWORD")
|
||||
if username or password:
|
||||
if auth_token:
|
||||
raise ValueError("Either provide a username/password or an authorization token")
|
||||
auth_token = login(username, password, urlbase)
|
||||
|
||||
try:
|
||||
old_sub_data = _subscriber_data
|
||||
new_sub_data = old_sub_data.copy() # inherit old values
|
||||
if urlbase is not None:
|
||||
new_sub_data["urlbase"] = urlbase
|
||||
elif old_sub_data["urlbase"] is None:
|
||||
# The variable may have gotten a value after program start.
|
||||
new_sub_data["urlbase"] = os.environ.get("BLOSC_C2URLBASE")
|
||||
if auth_token is not None:
|
||||
new_sub_data["auth_token"] = auth_token
|
||||
_subscriber_data = new_sub_data
|
||||
yield
|
||||
finally:
|
||||
_subscriber_data = old_sub_data
|
||||
|
||||
|
||||
def _xget(url, params=None, headers=None, auth_token=None, timeout=TIMEOUT):
|
||||
auth_token = auth_token or _subscriber_data["auth_token"]
|
||||
if auth_token:
|
||||
headers = headers.copy() if headers else {}
|
||||
headers["Cookie"] = auth_token
|
||||
response = requests.get(url, params=params, headers=headers, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
|
||||
def _xpost(url, json=None, auth_token=None, timeout=TIMEOUT):
|
||||
auth_token = auth_token or _subscriber_data["auth_token"]
|
||||
headers = {"Cookie": auth_token} if auth_token else None
|
||||
response = requests.post(url, json=json, headers=headers, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def _sub_url(urlbase, path):
|
||||
urlbase = urlbase or _subscriber_data["urlbase"]
|
||||
if not urlbase:
|
||||
raise RuntimeError("No default Caterva2 subscriber set")
|
||||
return f"{urlbase}{path}" if urlbase.endswith("/") else f"{urlbase}/{path}"
|
||||
|
||||
|
||||
def login(username, password, urlbase):
|
||||
url = _sub_url(urlbase, "auth/jwt/login")
|
||||
creds = {"username": username, "password": password}
|
||||
resp = requests.post(url, data=creds, timeout=TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
return "=".join(list(resp.cookies.items())[0])
|
||||
|
||||
|
||||
def info(path, urlbase, params=None, headers=None, model=None, auth_token=None):
|
||||
url = _sub_url(urlbase, f"api/info/{path}")
|
||||
response = _xget(url, params, headers, auth_token)
|
||||
json = response.json()
|
||||
return json if model is None else model(**json)
|
||||
|
||||
|
||||
def fetch_data(path, urlbase, params, auth_token=None, as_blosc2=False):
|
||||
url = _sub_url(urlbase, f"api/fetch/{path}")
|
||||
response = _xget(url, params=params, auth_token=auth_token)
|
||||
data = response.content
|
||||
# Try different deserialization methods
|
||||
try:
|
||||
data = blosc2.ndarray_from_cframe(data)
|
||||
except RuntimeError:
|
||||
data = blosc2.schunk_from_cframe(data)
|
||||
if as_blosc2:
|
||||
return data
|
||||
if hasattr(data, "ndim"): # if b2nd or b2frame
|
||||
# catch 0d case where [:] fails
|
||||
return data[()] if data.ndim == 0 else data[:]
|
||||
else:
|
||||
return data[:]
|
||||
|
||||
|
||||
def slice_to_string(slice_):
|
||||
if slice_ is None or slice_ == () or slice_ == slice(None):
|
||||
return ""
|
||||
slice_parts = []
|
||||
if not isinstance(slice_, tuple):
|
||||
slice_ = (slice_,)
|
||||
for index in slice_:
|
||||
if isinstance(index, int):
|
||||
slice_parts.append(str(index))
|
||||
elif isinstance(index, slice):
|
||||
start = index.start or ""
|
||||
stop = index.stop or ""
|
||||
if index.step not in (1, None):
|
||||
raise IndexError("Only step=1 is supported")
|
||||
# step = index.step or ''
|
||||
slice_parts.append(f"{start}:{stop}")
|
||||
return ", ".join(slice_parts)
|
||||
|
||||
|
||||
class C2Array(blosc2.Operand):
|
||||
def __init__(self, path: str, /, urlbase: str | None = None, auth_token: str | None = None):
|
||||
"""Create an instance of a remote NDArray.
|
||||
|
||||
Remote NDArrays can be accessed via HTTP from a Caterva2 server
|
||||
(e.g., https://cat2.cloud). More information about Caterva2 at:
|
||||
https://ironarray.io/caterva2.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path: str
|
||||
The path to the remote NDArray file (root + file path) as
|
||||
a posix path.
|
||||
urlbase: str
|
||||
The base URL (slash-terminated) of the subscriber to query.
|
||||
auth_token: str
|
||||
An optional token to authorize requests via HTTP. Currently, it
|
||||
will be sent as an HTTP cookie.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: C2Array
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import blosc2
|
||||
>>> urlbase = "https://cat2.cloud/demo"
|
||||
>>> path = "@public/examples/dir1/ds-3d.b2nd"
|
||||
>>> remote_array = blosc2.C2Array(path, urlbase=urlbase)
|
||||
>>> remote_array.shape
|
||||
(3, 4, 5)
|
||||
>>> remote_array.chunks
|
||||
(2, 3, 4)
|
||||
>>> remote_array.blocks
|
||||
(2, 2, 2)
|
||||
>>> remote_array.dtype
|
||||
dtype('float32')
|
||||
"""
|
||||
if path.startswith("/"):
|
||||
raise ValueError("The path should start with a root name, not a slash")
|
||||
self.path = path
|
||||
|
||||
if urlbase and not urlbase.endswith("/"):
|
||||
urlbase += "/"
|
||||
self.urlbase = urlbase
|
||||
|
||||
self.auth_token = auth_token
|
||||
|
||||
# Try to 'open' the remote path
|
||||
try:
|
||||
self.meta = info(self.path, self.urlbase, auth_token=self.auth_token)
|
||||
except requests.HTTPError as err:
|
||||
raise FileNotFoundError(f"Remote path not found: {path}.\nError was: {err}") from err
|
||||
cparams = self.meta["schunk"]["cparams"]
|
||||
# Remove "filters, meta" from cparams; this is an artifact from the server
|
||||
cparams.pop("filters, meta", None)
|
||||
self._cparams = blosc2.CParams(**cparams)
|
||||
|
||||
def __getitem__(self, slice_: int | slice | Sequence[slice]) -> np.ndarray:
|
||||
"""
|
||||
Get a slice of the array (returning NumPy array).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
slice_ : int, slice, tuple of ints and slices, or None
|
||||
The slice to fetch.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: numpy.ndarray
|
||||
A numpy.ndarray containing the data slice.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import blosc2
|
||||
>>> urlbase = "https://cat2.cloud/demo"
|
||||
>>> path = "@public/examples/dir1/ds-2d.b2nd"
|
||||
>>> remote_array = blosc2.C2Array(path, urlbase=urlbase)
|
||||
>>> data_slice = remote_array[3:5, 1:4]
|
||||
>>> data_slice.shape
|
||||
(2, 3)
|
||||
>>> data_slice[:]
|
||||
array([[61, 62, 63],
|
||||
[81, 82, 83]], dtype=uint16)
|
||||
"""
|
||||
slice_ = slice_to_string(slice_)
|
||||
return fetch_data(
|
||||
self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token, as_blosc2=False
|
||||
)
|
||||
|
||||
def slice(self, slice_: int | slice | Sequence[slice]) -> blosc2.NDArray:
|
||||
"""
|
||||
Get a slice of the array (returning blosc2 NDArray array).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
slice_ : int, slice, tuple of ints and slices, or None
|
||||
The slice to fetch.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: blosc2.NDArray
|
||||
A blosc2.NDArray containing the data slice.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import blosc2
|
||||
>>> urlbase = "https://cat2.cloud/demo"
|
||||
>>> path = "@public/examples/dir1/ds-2d.b2nd"
|
||||
>>> remote_array = blosc2.C2Array(path, urlbase=urlbase)
|
||||
>>> data_slice = remote_array.slice((slice(3,5), slice(1,4)))
|
||||
>>> data_slice.shape
|
||||
(2, 3)
|
||||
>>> type(data_slice)
|
||||
blosc2.ndarray.NDArray
|
||||
"""
|
||||
slice_ = slice_to_string(slice_)
|
||||
return fetch_data(
|
||||
self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token, as_blosc2=True
|
||||
)
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Returns the length of the first dimension of the array.
|
||||
This is equivalent to ``self.shape[0]``.
|
||||
"""
|
||||
return self.shape[0]
|
||||
|
||||
def get_chunk(self, nchunk: int) -> bytes:
|
||||
"""
|
||||
Get the compressed unidimensional chunk of a :ref:`C2Array`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nchunk: int
|
||||
The index of the unidimensional chunk to retrieve.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: bytes
|
||||
The requested compressed chunk.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import blosc2
|
||||
>>> urlbase = "https://cat2.cloud/demo"
|
||||
>>> path = "@public/examples/dir1/ds-3d.b2nd"
|
||||
>>> a = blosc2.C2Array(path, urlbase)
|
||||
>>> # Get the compressed chunk from array 'a' for index 0
|
||||
>>> compressed_chunk = a.get_chunk(0)
|
||||
>>> f"Size of chunk {0} from a: {len(compressed_chunk)} bytes"
|
||||
Size of chunk 0 from a: 160 bytes
|
||||
>>> # Decompress the chunk and convert it to a NumPy array
|
||||
>>> decompressed_chunk = blosc2.decompress(compressed_chunk)
|
||||
>>> np.frombuffer(decompressed_chunk, dtype=a.dtype)
|
||||
array([ 0., 1., 5., 6., 20., 21., 25., 26., 2., 3., 7., 8., 22.,
|
||||
23., 27., 28., 10., 11., 0., 0., 30., 31., 0., 0., 12., 13.,
|
||||
0., 0., 32., 33., 0., 0.], dtype=float32)
|
||||
"""
|
||||
url = _sub_url(self.urlbase, f"api/chunk/{self.path}")
|
||||
params = {"nchunk": nchunk}
|
||||
response = _xget(url, params=params, auth_token=self.auth_token)
|
||||
return response.content
|
||||
|
||||
@property
|
||||
def shape(self) -> tuple[int]:
|
||||
"""The shape of the remote array"""
|
||||
return tuple(self.meta["shape"])
|
||||
|
||||
@property
|
||||
def chunks(self) -> tuple[int]:
|
||||
"""The chunks of the remote array"""
|
||||
return tuple(self.meta["chunks"])
|
||||
|
||||
@property
|
||||
def blocks(self) -> tuple[int]:
|
||||
"""The blocks of the remote array"""
|
||||
return tuple(self.meta["blocks"])
|
||||
|
||||
@property
|
||||
def dtype(self) -> np.dtype:
|
||||
"""The dtype of the remote array"""
|
||||
return np.dtype(self.meta["dtype"])
|
||||
|
||||
@property
|
||||
def cparams(self) -> blosc2.CParams:
|
||||
"""The compression parameters of the remote array"""
|
||||
return self._cparams
|
||||
|
||||
@property
|
||||
def nbytes(self) -> int:
|
||||
"""The number of bytes of the remote array"""
|
||||
return self.meta["schunk"]["nbytes"]
|
||||
|
||||
@property
|
||||
def cbytes(self) -> int:
|
||||
"""The number of compressed bytes of the remote array"""
|
||||
return self.meta["schunk"]["cbytes"]
|
||||
|
||||
@property
|
||||
def cratio(self) -> float:
|
||||
"""The compression ratio of the remote array"""
|
||||
return self.meta["schunk"]["cratio"]
|
||||
|
||||
# TODO: Add these to SChunk model in srv_utils and then access them here
|
||||
# @property
|
||||
# def dparams(self) -> float:
|
||||
# """The dparams of the remote array"""
|
||||
# return
|
||||
#
|
||||
# @property
|
||||
# def meta(self) -> float:
|
||||
# """The meta of the remote array"""
|
||||
# return
|
||||
|
||||
# TODO: This seems to cause problems for proxy sources (see tests/ndarray/test_proxy_c2array.py::test_open)
|
||||
# @property
|
||||
# def urlpath(self) -> str:
|
||||
# """The URL path of the remote array"""
|
||||
# return self.meta["schunk"]["urlpath"]
|
||||
|
||||
@property
|
||||
def vlmeta(self) -> dict:
|
||||
"""The variable-length metadata f the remote array"""
|
||||
return self.meta["schunk"]["vlmeta"]
|
||||
|
||||
@property
|
||||
def info(self) -> InfoReporter:
|
||||
"""
|
||||
Print information about this remote array.
|
||||
"""
|
||||
return InfoReporter(self)
|
||||
|
||||
@property
|
||||
def info_items(self) -> list:
|
||||
"""A list of tuples with the information about the remote array.
|
||||
Each tuple contains the name of the attribute and its value.
|
||||
"""
|
||||
items = []
|
||||
items += [("type", f"{self.__class__.__name__}")]
|
||||
items += [("shape", self.shape)]
|
||||
items += [("chunks", self.chunks)]
|
||||
items += [("blocks", self.blocks)]
|
||||
items += [("dtype", self.dtype)]
|
||||
items += [("nbytes", self.nbytes)]
|
||||
items += [("cbytes", self.cbytes)]
|
||||
items += [("cratio", f"{self.cratio:.2f}")]
|
||||
items += [("cparams", self.cparams)]
|
||||
# items += [("dparams", self.dparams)]
|
||||
return items
|
||||
|
||||
# TODO: Access chunksize, size, ext_chunks, etc.
|
||||
# @property
|
||||
# def size(self) -> int:
|
||||
# """The size (in bytes) for this container."""
|
||||
# return self.cbytes
|
||||
# @property
|
||||
# def chunksize(self) -> int:
|
||||
# """NOT the same as `SChunk.chunksize <blosc2.schunk.SChunk.chunksize>`
|
||||
# in case :attr:`chunks` is not multiple in
|
||||
# each dimension of :attr:`blocks` (or equivalently, if :attr:`chunks` is
|
||||
# not the same as :attr:`ext_chunks`).
|
||||
# """
|
||||
# return
|
||||
|
||||
@property
|
||||
def blocksize(self) -> int:
|
||||
"""The block size (in bytes) for the remote container."""
|
||||
return self.meta["schunk"]["blocksize"]
|
||||
|
||||
|
||||
class URLPath:
|
||||
def __init__(self, path: str, /, urlbase: str | None = None, auth_token: str | None = None):
|
||||
"""
|
||||
Create an instance of a remote data file (aka :ref:`C2Array <C2Array>`) urlpath.
|
||||
This is meant to be used in the :func:`blosc2.open` function.
|
||||
|
||||
The parameters are the same as for the :meth:`C2Array.__init__`.
|
||||
|
||||
"""
|
||||
self.path = path
|
||||
self.urlbase = urlbase
|
||||
self.auth_token = auth_token
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,547 @@
|
||||
#######################################################################
|
||||
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#######################################################################
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import zipfile
|
||||
from collections.abc import Iterator, Set
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
import blosc2
|
||||
from blosc2.c2array import C2Array
|
||||
from blosc2.embed_store import EmbedStore
|
||||
from blosc2.schunk import SChunk
|
||||
|
||||
|
||||
class DictStore:
|
||||
"""
|
||||
Directory-based storage for compressed data using Blosc2.
|
||||
|
||||
Manages arrays in a directory (.b2d) or zip (.b2z) format.
|
||||
|
||||
Supports the following types:
|
||||
|
||||
- blosc2.NDArray: n-dimensional arrays. When persisted externally they
|
||||
are stored as .b2nd files.
|
||||
- blosc2.SChunk: super-chunks. When persisted externally they are stored
|
||||
as .b2f files.
|
||||
- blosc2.C2Array: columnar containers. These are always kept inside the
|
||||
embedded store (never externalized).
|
||||
- numpy.ndarray: converted to blosc2.NDArray on assignment.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
localpath : str
|
||||
Local path for the directory (".b2d") or file (".b2z"); other extensions
|
||||
are not supported. If a directory is specified, it will be treated as
|
||||
a Blosc2 directory format (B2DIR). If a file is specified, it
|
||||
will be treated as a Blosc2 zip format (B2ZIP).
|
||||
mode : str, optional
|
||||
File mode ('r', 'w', 'a'). Default is 'a'.
|
||||
mmap_mode : str or None, optional
|
||||
Memory mapping mode for read access. For now, only ``"r"`` is supported,
|
||||
and only when ``mode="r"``. Default is None.
|
||||
tmpdir : str or None, optional
|
||||
Temporary directory to use when working with ".b2z" files. If None,
|
||||
a system temporary directory will be managed. Default is None.
|
||||
cparams : dict or None, optional
|
||||
Compression parameters for the internal embed store.
|
||||
If None, the default Blosc2 parameters are used.
|
||||
dparams : dict or None, optional
|
||||
Decompression parameters for the internal embed store.
|
||||
If None, the default Blosc2 parameters are used.
|
||||
storage : blosc2.Storage or None, optional
|
||||
Storage properties for the internal embed store.
|
||||
If None, the default Blosc2 storage properties are used.
|
||||
threshold : int or None, optional
|
||||
Threshold (in bytes of uncompressed data) under which values are kept
|
||||
in the embedded store. If None, in-memory arrays are stored in the
|
||||
embedded store and on-disk arrays are stored as separate files.
|
||||
C2Array objects will always be stored in the embedded store,
|
||||
regardless of their size.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> dstore = DictStore(localpath="my_dstore.b2z", mode="w")
|
||||
>>> dstore["/node1"] = np.array([1, 2, 3]) # goes to embed store
|
||||
>>> dstore["/node2"] = blosc2.ones(2) # goes to embed store
|
||||
>>> arr_external = blosc2.arange(3, urlpath="ext_node3.b2nd", mode="w")
|
||||
>>> dstore["/dir1/node3"] = arr_external # external file in dir1 (.b2nd)
|
||||
>>> schunk = blosc2.SChunk(chunksize=32)
|
||||
>>> schunk.append_data(b"abcd")
|
||||
4
|
||||
>>> dstore["/dir1/schunk1"] = schunk # externalized as .b2f if above threshold
|
||||
>>> dstore.to_b2z() # persist to the zip file; external files are copied in
|
||||
>>> print(sorted(dstore.keys()))
|
||||
['/dir1/node3', '/dir1/schunk1', '/node1', '/node2']
|
||||
>>> print(dstore["/node1"][:]))
|
||||
array([1, 2, 3])
|
||||
|
||||
Notes
|
||||
-----
|
||||
- External persistence uses the following file extensions:
|
||||
.b2nd for NDArray and .b2f for SChunk.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
localpath: os.PathLike[Any] | str | bytes,
|
||||
mode: str = "a",
|
||||
tmpdir: str | None = None,
|
||||
cparams: blosc2.CParams | None = None,
|
||||
dparams: blosc2.DParams | None = None,
|
||||
storage: blosc2.Storage | None = None,
|
||||
threshold: int | None = 2**13,
|
||||
*,
|
||||
mmap_mode: str | None = None,
|
||||
_storage_meta: dict | None = None,
|
||||
):
|
||||
"""
|
||||
See :class:`DictStore` for full documentation of parameters.
|
||||
"""
|
||||
self.localpath = localpath if isinstance(localpath, (str, bytes)) else str(localpath)
|
||||
if not self.localpath.endswith((".b2z", ".b2d")):
|
||||
raise ValueError(f"localpath must have a .b2z or .b2d extension; you passed: {self.localpath}")
|
||||
if mode not in ("r", "w", "a"):
|
||||
raise ValueError("For DictStore containers, mode must be 'r', 'w', or 'a'")
|
||||
if mmap_mode not in (None, "r"):
|
||||
raise ValueError("For DictStore containers, mmap_mode must be None or 'r'")
|
||||
if mmap_mode == "r" and mode != "r":
|
||||
raise ValueError("For DictStore containers, mmap_mode='r' requires mode='r'")
|
||||
|
||||
self.mode = mode
|
||||
self.mmap_mode = mmap_mode
|
||||
self.threshold = threshold
|
||||
self.cparams = cparams or blosc2.CParams()
|
||||
self.dparams = dparams or blosc2.DParams()
|
||||
self.storage = storage or blosc2.Storage()
|
||||
|
||||
if _storage_meta:
|
||||
self.storage.meta = _storage_meta
|
||||
else:
|
||||
# Mark this storage as a b2dict object
|
||||
self.storage.meta = {"b2dict": {"version": 1}}
|
||||
|
||||
self.offsets = {}
|
||||
self.map_tree = {}
|
||||
self._temp_dir_obj = None
|
||||
|
||||
self._setup_paths_and_dirs(tmpdir)
|
||||
|
||||
if self.mode == "r":
|
||||
self._init_read_mode(self.dparams)
|
||||
else:
|
||||
self._init_write_append_mode(self.cparams, self.dparams, storage)
|
||||
|
||||
def _setup_paths_and_dirs(self, tmpdir: str | None):
|
||||
"""Set up working directories and paths."""
|
||||
self.is_zip_store = self.localpath.endswith(".b2z")
|
||||
if self.is_zip_store:
|
||||
if tmpdir is None:
|
||||
self._temp_dir_obj = tempfile.TemporaryDirectory()
|
||||
self.working_dir = self._temp_dir_obj.name
|
||||
else:
|
||||
self.working_dir = tmpdir
|
||||
os.makedirs(tmpdir, exist_ok=True)
|
||||
self.b2z_path = self.localpath
|
||||
else: # .b2d
|
||||
self.working_dir = self.localpath
|
||||
if self.mode in ("w", "a"):
|
||||
os.makedirs(self.working_dir, exist_ok=True)
|
||||
self.b2z_path = self.localpath[:-4] + ".b2z"
|
||||
|
||||
self.estore_path = os.path.join(self.working_dir, "embed.b2e")
|
||||
|
||||
def _init_read_mode(self, dparams: blosc2.DParams | None = None):
|
||||
"""Initialize store in read mode."""
|
||||
if not os.path.exists(self.localpath):
|
||||
raise FileNotFoundError(f"dir/zip file {self.localpath} does not exist.")
|
||||
|
||||
if self.is_zip_store:
|
||||
self.offsets = self._get_zip_offsets()
|
||||
if "embed.b2e" not in self.offsets:
|
||||
raise FileNotFoundError("Embed file embed.b2e not found in store.")
|
||||
estore_offset = self.offsets["embed.b2e"]["offset"]
|
||||
schunk = blosc2.blosc2_ext.open(
|
||||
self.b2z_path,
|
||||
mode="r",
|
||||
offset=estore_offset,
|
||||
mmap_mode=self.mmap_mode,
|
||||
dparams=dparams,
|
||||
)
|
||||
for filepath in self.offsets:
|
||||
if filepath.endswith((".b2nd", ".b2f")):
|
||||
key = "/" + filepath[: -5 if filepath.endswith(".b2nd") else -4]
|
||||
self.map_tree[key] = filepath
|
||||
else: # .b2d
|
||||
if not os.path.isdir(self.localpath):
|
||||
raise FileNotFoundError(f"Directory {self.localpath} does not exist for reading.")
|
||||
schunk = blosc2.blosc2_ext.open(
|
||||
self.estore_path,
|
||||
mode="r",
|
||||
offset=0,
|
||||
mmap_mode=self.mmap_mode,
|
||||
dparams=dparams,
|
||||
)
|
||||
self._update_map_tree()
|
||||
|
||||
self._estore = EmbedStore(_from_schunk=schunk)
|
||||
self.storage.meta = self._estore.storage.meta
|
||||
|
||||
def _init_write_append_mode(
|
||||
self,
|
||||
cparams: blosc2.CParams | None,
|
||||
dparams: blosc2.DParams | None,
|
||||
storage: blosc2.Storage | None,
|
||||
):
|
||||
"""Initialize store in write/append mode."""
|
||||
if self.mode == "a" and os.path.exists(self.localpath):
|
||||
if self.is_zip_store:
|
||||
with zipfile.ZipFile(self.localpath, "r") as zf:
|
||||
zf.extractall(self.working_dir)
|
||||
elif not os.path.isdir(self.working_dir):
|
||||
raise FileNotFoundError(f"Directory {self.working_dir} does not exist for reading.")
|
||||
|
||||
self._estore = EmbedStore(
|
||||
urlpath=self.estore_path,
|
||||
mode=self.mode,
|
||||
cparams=cparams,
|
||||
dparams=dparams,
|
||||
storage=storage,
|
||||
meta=self.storage.meta,
|
||||
)
|
||||
self._update_map_tree()
|
||||
|
||||
def _update_map_tree(self):
|
||||
# Build map_tree from .b2nd and .b2f files in working dir
|
||||
for root, _, files in os.walk(self.working_dir):
|
||||
for file in files:
|
||||
filepath = os.path.join(root, file)
|
||||
if filepath.endswith((".b2nd", ".b2f")):
|
||||
# Convert filename to key: remove extension and ensure starts with /
|
||||
rel_path = os.path.relpath(filepath, self.working_dir)
|
||||
# Normalize path separators to forward slashes for cross-platform consistency
|
||||
rel_path = rel_path.replace(os.sep, "/")
|
||||
if rel_path.endswith(".b2nd"):
|
||||
key = rel_path[:-5]
|
||||
elif rel_path.endswith(".b2f"):
|
||||
key = rel_path[:-4]
|
||||
else:
|
||||
continue
|
||||
if not key.startswith("/"):
|
||||
key = "/" + key
|
||||
self.map_tree[key] = rel_path
|
||||
|
||||
@property
|
||||
def estore(self) -> EmbedStore:
|
||||
"""Access the underlying EmbedStore."""
|
||||
return self._estore
|
||||
|
||||
def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None:
|
||||
"""Add a node to the DictStore."""
|
||||
if isinstance(value, np.ndarray):
|
||||
value = blosc2.asarray(value, cparams=self.cparams, dparams=self.dparams)
|
||||
# C2Array should always go to embed store; let estore handle it directly
|
||||
if isinstance(value, C2Array):
|
||||
self._estore[key] = value
|
||||
return
|
||||
exceeds_threshold = self.threshold is not None and value.nbytes >= self.threshold
|
||||
# Consider both NDArray and SChunk external files (have urlpath)
|
||||
external_file = isinstance(value, (blosc2.NDArray, SChunk)) and getattr(value, "urlpath", None)
|
||||
if exceeds_threshold or (external_file and self.threshold is None):
|
||||
# Choose extension based on type
|
||||
ext = ".b2f" if isinstance(value, SChunk) else ".b2nd"
|
||||
# Convert key to a proper file path within the tree directory
|
||||
rel_key = key.lstrip("/")
|
||||
dest_path = os.path.join(self.working_dir, rel_key + ext)
|
||||
|
||||
# Ensure the parent directory exists
|
||||
parent_dir = os.path.dirname(dest_path)
|
||||
if parent_dir and not os.path.exists(parent_dir):
|
||||
os.makedirs(parent_dir, exist_ok=True)
|
||||
|
||||
# Save the value to the destination path
|
||||
if not external_file:
|
||||
if hasattr(value, "save"):
|
||||
value.save(urlpath=dest_path)
|
||||
else:
|
||||
# An SChunk does not have a save() method
|
||||
with open(dest_path, "wb") as f:
|
||||
f.write(value.to_cframe())
|
||||
else:
|
||||
# This should be faster than using value.save() ?
|
||||
shutil.copy2(value.urlpath, dest_path)
|
||||
|
||||
# Store relative path from tree directory
|
||||
rel_path = os.path.relpath(dest_path, self.working_dir)
|
||||
# Normalize to forward slashes
|
||||
rel_path = rel_path.replace(os.sep, "/")
|
||||
self.map_tree[key] = rel_path
|
||||
else:
|
||||
if external_file:
|
||||
# Embed a copy by using cframe
|
||||
value = blosc2.from_cframe(value.to_cframe())
|
||||
self._estore[key] = value
|
||||
|
||||
def __getitem__(self, key: str) -> blosc2.NDArray | SChunk | C2Array:
|
||||
"""Retrieve a node from the DictStore."""
|
||||
# Check map_tree first
|
||||
if key in self.map_tree:
|
||||
filepath = self.map_tree[key]
|
||||
if filepath in self.offsets:
|
||||
offset = self.offsets[filepath]["offset"]
|
||||
return blosc2.blosc2_ext.open(
|
||||
self.b2z_path,
|
||||
mode="r",
|
||||
offset=offset,
|
||||
mmap_mode=self.mmap_mode,
|
||||
dparams=self.dparams,
|
||||
)
|
||||
else:
|
||||
urlpath = os.path.join(self.working_dir, filepath)
|
||||
if os.path.exists(urlpath):
|
||||
return blosc2.open(
|
||||
urlpath,
|
||||
mode="r" if self.mode == "r" else "a",
|
||||
mmap_mode=self.mmap_mode if self.mode == "r" else None,
|
||||
dparams=self.dparams,
|
||||
)
|
||||
else:
|
||||
raise KeyError(f"File for key '{key}' not found in offsets or temporary directory.")
|
||||
|
||||
# Fall back to EmbedStore
|
||||
return self._estore[key]
|
||||
|
||||
def get(self, key: str, default: Any = None) -> blosc2.NDArray | SChunk | C2Array | Any:
|
||||
"""Retrieve a node, or default if not found."""
|
||||
try:
|
||||
return self[key]
|
||||
except KeyError:
|
||||
return default
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
"""Remove a node from the DictStore."""
|
||||
if key in self.map_tree:
|
||||
# Remove from map_tree and delete the external file
|
||||
filepath = self.map_tree[key]
|
||||
del self.map_tree[key]
|
||||
|
||||
# Delete the physical file if it exists
|
||||
full_path = os.path.join(self.working_dir, filepath)
|
||||
if os.path.exists(full_path):
|
||||
os.remove(full_path)
|
||||
elif key in self._estore:
|
||||
del self._estore[key]
|
||||
else:
|
||||
raise KeyError(f"Key '{key}' not found")
|
||||
|
||||
def __contains__(self, key: str) -> bool:
|
||||
"""Check if a key exists."""
|
||||
return key in self.map_tree or key in self._estore
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return number of nodes."""
|
||||
return len(self.map_tree) + len(self._estore)
|
||||
|
||||
def __iter__(self) -> Iterator[str]:
|
||||
"""Iterate over keys."""
|
||||
yield from self.map_tree.keys()
|
||||
for key in self._estore:
|
||||
if key not in self.map_tree:
|
||||
yield key
|
||||
|
||||
def keys(self) -> Set[str]:
|
||||
"""Return all keys."""
|
||||
return self.map_tree.keys() | self._estore.keys()
|
||||
|
||||
def values(self) -> Iterator[blosc2.NDArray | SChunk | C2Array]:
|
||||
"""Iterate over all values."""
|
||||
# Get all unique keys from both map_tree and _estore, with map_tree taking precedence
|
||||
all_keys = set(self.map_tree.keys()) | set(self._estore.keys())
|
||||
|
||||
for key in all_keys:
|
||||
if key in self.map_tree:
|
||||
filepath = self.map_tree[key]
|
||||
if self.is_zip_store:
|
||||
if filepath in self.offsets:
|
||||
offset = self.offsets[filepath]["offset"]
|
||||
yield blosc2.blosc2_ext.open(
|
||||
self.b2z_path,
|
||||
mode="r",
|
||||
offset=offset,
|
||||
mmap_mode=self.mmap_mode,
|
||||
dparams=self.dparams,
|
||||
)
|
||||
else:
|
||||
urlpath = os.path.join(self.working_dir, filepath)
|
||||
yield blosc2.open(
|
||||
urlpath,
|
||||
mode="r" if self.mode == "r" else "a",
|
||||
mmap_mode=self.mmap_mode if self.mode == "r" else None,
|
||||
dparams=self.dparams,
|
||||
)
|
||||
elif key in self._estore:
|
||||
yield self._estore[key]
|
||||
|
||||
def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk | C2Array]]:
|
||||
"""Iterate over (key, value) pairs."""
|
||||
# Get all unique keys from both map_tree and _estore, with map_tree taking precedence
|
||||
all_keys = set(self.map_tree.keys()) | set(self._estore.keys())
|
||||
|
||||
for key in all_keys:
|
||||
# Check map_tree first, then fall back to _estore
|
||||
if key in self.map_tree:
|
||||
filepath = self.map_tree[key]
|
||||
if self.is_zip_store:
|
||||
if filepath in self.offsets:
|
||||
offset = self.offsets[filepath]["offset"]
|
||||
yield (
|
||||
key,
|
||||
blosc2.blosc2_ext.open(
|
||||
self.b2z_path,
|
||||
mode="r",
|
||||
offset=offset,
|
||||
mmap_mode=self.mmap_mode,
|
||||
dparams=self.dparams,
|
||||
),
|
||||
)
|
||||
else:
|
||||
urlpath = os.path.join(self.working_dir, filepath)
|
||||
yield (
|
||||
key,
|
||||
blosc2.open(
|
||||
urlpath,
|
||||
mode="r" if self.mode == "r" else "a",
|
||||
mmap_mode=self.mmap_mode if self.mode == "r" else None,
|
||||
dparams=self.dparams,
|
||||
),
|
||||
)
|
||||
elif key in self._estore:
|
||||
yield key, self._estore[key]
|
||||
|
||||
def to_b2z(self, overwrite=False, filename=None) -> os.PathLike[Any] | str:
|
||||
"""
|
||||
Serialize zip store contents to the b2z file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
overwrite : bool, optional
|
||||
If True, overwrite the existing b2z file if it exists. Default is False.
|
||||
filename : str, optional
|
||||
If provided, use this filename instead of the default b2z file path.
|
||||
|
||||
Returns
|
||||
-------
|
||||
filename : str
|
||||
The absolute path to the created b2z file.
|
||||
"""
|
||||
if self.mode == "r":
|
||||
raise ValueError("Cannot call to_b2z() on a DictStore opened in read mode.")
|
||||
|
||||
b2z_path = self.b2z_path if filename is None else filename
|
||||
if not b2z_path.endswith(".b2z"):
|
||||
raise ValueError("b2z_path must have a .b2z extension")
|
||||
|
||||
if os.path.exists(b2z_path) and not overwrite:
|
||||
raise FileExistsError(f"'{b2z_path}' already exists. Use overwrite=True to overwrite.")
|
||||
|
||||
# Gather all files except estore_path
|
||||
filepaths = []
|
||||
for root, _, files in os.walk(self.working_dir):
|
||||
for file in files:
|
||||
filepath = os.path.join(root, file)
|
||||
if os.path.abspath(filepath) != os.path.abspath(self.estore_path):
|
||||
filepaths.append(filepath)
|
||||
|
||||
# Sort filepaths by file size from largest to smallest
|
||||
filepaths.sort(key=os.path.getsize, reverse=True)
|
||||
|
||||
with zipfile.ZipFile(self.b2z_path, "w", zipfile.ZIP_STORED) as zf:
|
||||
# Write all files (except estore_path) first (sorted by size)
|
||||
for filepath in filepaths:
|
||||
arcname = os.path.relpath(filepath, self.working_dir)
|
||||
zf.write(filepath, arcname)
|
||||
# Write estore last
|
||||
if os.path.exists(self.estore_path):
|
||||
arcname = os.path.relpath(self.estore_path, self.working_dir)
|
||||
zf.write(self.estore_path, arcname)
|
||||
return os.path.abspath(self.b2z_path)
|
||||
|
||||
def _get_zip_offsets(self) -> dict[str, dict[str, int]]:
|
||||
"""Get offset and length of all files in the zip archive."""
|
||||
self.offsets = {} # Reset offsets
|
||||
with open(self.b2z_path, "rb") as f, zipfile.ZipFile(f) as zf:
|
||||
for info in zf.infolist():
|
||||
# info.header_offset points to the local file header
|
||||
# The actual file data starts after the header
|
||||
f.seek(info.header_offset)
|
||||
local_header = f.read(30)
|
||||
filename_len = int.from_bytes(local_header[26:28], "little")
|
||||
extra_len = int.from_bytes(local_header[28:30], "little")
|
||||
data_offset = info.header_offset + 30 + filename_len + extra_len
|
||||
self.offsets[info.filename] = {"offset": data_offset, "length": info.file_size}
|
||||
return self.offsets
|
||||
|
||||
def close(self) -> None:
|
||||
"""Persist changes and cleanup."""
|
||||
# Repack estore
|
||||
# TODO: for some reason this is not working
|
||||
# if self.mode != "r":
|
||||
# cframe = self._estore.to_cframe()
|
||||
# with open(self._estore.urlpath, "wb") as f:
|
||||
# f.write(cframe)
|
||||
|
||||
if self.is_zip_store and self.mode in ("w", "a"):
|
||||
# Serialize to b2z file
|
||||
self.to_b2z(overwrite=True)
|
||||
|
||||
# Clean up temporary directory if we created it
|
||||
if self._temp_dir_obj is not None:
|
||||
self._temp_dir_obj.cleanup()
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager enter."""
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit."""
|
||||
self.close()
|
||||
# No need to handle exceptions, just close the DictStore
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage
|
||||
localpath = "example_dstore.b2z"
|
||||
if True:
|
||||
with DictStore(localpath, mode="w") as dstore:
|
||||
dstore["/node1"] = np.array([1, 2, 3])
|
||||
dstore["/node2"] = blosc2.ones(2)
|
||||
|
||||
# Make /node3 an external file
|
||||
arr_external = blosc2.arange(3, urlpath="ext_node3.b2nd", mode="w")
|
||||
dstore["/dir1/node3"] = arr_external
|
||||
|
||||
print("DictStore keys:", list(dstore.keys()))
|
||||
print("Node1 data:", dstore["/node1"][:])
|
||||
print("Node2 data:", dstore["/node2"][:])
|
||||
print("Node3 data (external):", dstore["/dir1/node3"][:])
|
||||
|
||||
del dstore["/node1"]
|
||||
print("After deletion, keys:", list(dstore.keys()))
|
||||
|
||||
# Open the stored zip file
|
||||
with DictStore(localpath, mode="r") as dstore_opened:
|
||||
print("Opened dstore keys:", list(dstore_opened.keys()))
|
||||
for key, value in dstore_opened.items():
|
||||
if isinstance(value, blosc2.NDArray):
|
||||
print(
|
||||
f"Key: {key}, Shape: {value.shape}, Values: {value[:10] if len(value) > 3 else value[:]}"
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,329 @@
|
||||
#######################################################################
|
||||
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#######################################################################
|
||||
|
||||
import copy
|
||||
from collections.abc import Iterator, KeysView
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
import blosc2
|
||||
from blosc2.c2array import C2Array
|
||||
from blosc2.schunk import SChunk
|
||||
|
||||
PROFILE = False # Set to True to enable PROFILE prints in EmbedStore
|
||||
|
||||
|
||||
class EmbedStore:
|
||||
"""
|
||||
A dictionary-like container for storing NumPy/Blosc2 arrays (NDArray or SChunk) as nodes.
|
||||
|
||||
For NumPy arrays, Blosc2 NDArrays (even if they live in external ``.b2nd`` files),
|
||||
and Blosc2 SChunk objects, the data is read and embedded into the store. For remote
|
||||
arrays (``C2Array``), only lightweight references (URL base and path) are stored.
|
||||
If you need a richer hierarchical container with optional external references, consider using
|
||||
`blosc2.TreeStore` or `blosc2.DictStore`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
urlpath : str or None, optional
|
||||
Path for persistent storage. Using a '.b2e' extension is recommended.
|
||||
If None, the embed store will be in memory only, which can be
|
||||
deserialized later using the :func:`blosc2.from_cframe` function.
|
||||
mode : str, optional
|
||||
File mode ('r', 'w', 'a'). Default is 'w'.
|
||||
mmap_mode : str or None, optional
|
||||
Memory mapping mode for read access. For now, only ``"r"`` is supported,
|
||||
and only when ``mode="r"``. Default is None.
|
||||
cparams : dict or None, optional
|
||||
Compression parameters for nodes and the embed store itself.
|
||||
Default is None, which uses the default Blosc2 parameters.
|
||||
dparams : dict or None, optional
|
||||
Decompression parameters for nodes and the embed store itself.
|
||||
Default is None, which uses the default Blosc2 parameters.
|
||||
storage : blosc2.Storage or None, optional
|
||||
Storage properties for the embed store. If passed, it will override
|
||||
the `urlpath` and `mode` parameters.
|
||||
chunksize : int, optional
|
||||
Size of chunks for the backing storage. Default is 1 MiB.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> estore = EmbedStore(urlpath="example_estore.b2e", mode="w")
|
||||
>>> estore["/node1"] = np.array([1, 2, 3])
|
||||
>>> estore["/node2"] = blosc2.ones(2)
|
||||
>>> estore["/node3"] = blosc2.arange(3, dtype="i4", urlpath="external_node3.b2nd", mode="w")
|
||||
>>> urlpath = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo")
|
||||
>>> estore["/node4"] = blosc2.open(urlpath, mode="r")
|
||||
>>> print(list(estore.keys()))
|
||||
['/node1', '/node2', '/node3', '/node4']
|
||||
>>> print(estore["/node1"][:])
|
||||
[1 2 3]
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
urlpath: str | None = None,
|
||||
mode: str = "a",
|
||||
cparams: blosc2.CParams | None = None,
|
||||
dparams: blosc2.CParams | None = None,
|
||||
storage: blosc2.Storage | None = None,
|
||||
chunksize: int | None = 2**13,
|
||||
_from_schunk: SChunk | None = None,
|
||||
*,
|
||||
mmap_mode: str | None = None,
|
||||
meta: dict | None = None,
|
||||
):
|
||||
"""Initialize EmbedStore."""
|
||||
|
||||
# For some reason, the SChunk store cannot achieve the same compression ratio as the NDArray store,
|
||||
# although it is more efficient in terms of CPU usage.
|
||||
# Let's use the SChunk store by default and continue experimenting.
|
||||
self._schunk_store = True # put this to False to use an NDArray instead of a SChunk
|
||||
self.urlpath = urlpath
|
||||
if mmap_mode not in (None, "r"):
|
||||
raise ValueError("For EmbedStore containers, mmap_mode must be None or 'r'")
|
||||
if mmap_mode == "r" and mode != "r":
|
||||
raise ValueError("For EmbedStore containers, mmap_mode='r' requires mode='r'")
|
||||
self.mmap_mode = mmap_mode
|
||||
|
||||
if _from_schunk is not None:
|
||||
self.cparams = _from_schunk.cparams
|
||||
self.dparams = _from_schunk.dparams
|
||||
self.mode = mode
|
||||
self._store = _from_schunk
|
||||
self.storage = blosc2.Storage()
|
||||
self.storage.meta = _from_schunk.meta
|
||||
self._load_metadata()
|
||||
return
|
||||
|
||||
self.mode = mode
|
||||
self.cparams = cparams or blosc2.CParams()
|
||||
# self.cparams.nthreads = 1 # for debugging purposes, use only one thread
|
||||
self.dparams = dparams or blosc2.DParams()
|
||||
# self.dparams.nthreads = 1 # for debugging purposes, use only one thread
|
||||
if storage is None:
|
||||
self.storage = blosc2.Storage(
|
||||
contiguous=True,
|
||||
urlpath=urlpath,
|
||||
mode=mode,
|
||||
)
|
||||
else:
|
||||
self.storage = storage
|
||||
|
||||
if mode in ("r", "a") and urlpath:
|
||||
self._store = blosc2.blosc2_ext.open(urlpath, mode=mode, offset=0, mmap_mode=mmap_mode)
|
||||
self.storage.meta = self._store.meta
|
||||
self._load_metadata()
|
||||
return
|
||||
|
||||
_cparams = copy.deepcopy(self.cparams)
|
||||
_cparams.typesize = 1 # ensure typesize is set to 1 for byte storage
|
||||
_storage = self.storage
|
||||
_storage.meta = meta if meta is not None else {"b2embed": {"version": 1}}
|
||||
if self._schunk_store:
|
||||
self._store = blosc2.SChunk(
|
||||
chunksize=chunksize,
|
||||
data=None,
|
||||
cparams=_cparams,
|
||||
dparams=self.dparams,
|
||||
storage=_storage,
|
||||
)
|
||||
else:
|
||||
self._store = blosc2.zeros(
|
||||
chunksize,
|
||||
dtype=np.uint8,
|
||||
cparams=_cparams,
|
||||
dparams=self.dparams,
|
||||
storage=_storage,
|
||||
)
|
||||
self._embed_map: dict = {}
|
||||
self._current_offset = 0
|
||||
|
||||
def _validate_key(self, key: str) -> None:
|
||||
"""Validate node key."""
|
||||
if not isinstance(key, str):
|
||||
raise TypeError("Key must be a string.")
|
||||
if not key.startswith("/"):
|
||||
raise ValueError("Key must start with '/'.")
|
||||
if len(key) > 1 and key.endswith("/"):
|
||||
raise ValueError("Key cannot end with '/' unless it is the root key '/'.")
|
||||
if "//" in key:
|
||||
raise ValueError("Key cannot contain consecutive slashes '//'.")
|
||||
for char in (":", "\0", "\n", "\r", "\t"):
|
||||
if char in key:
|
||||
raise ValueError(f"Key cannot contain character: {char!r}")
|
||||
if key in self._embed_map:
|
||||
raise ValueError(f"Key '{key}' already exists in store.")
|
||||
|
||||
def _ensure_capacity(self, needed_bytes: int) -> None:
|
||||
"""Ensure backing storage has enough capacity."""
|
||||
required_size = self._current_offset + needed_bytes
|
||||
if required_size > self._store.shape[0]:
|
||||
new_size = max(required_size, int(self._store.shape[0] * 1.5))
|
||||
self._store.resize((new_size,))
|
||||
|
||||
def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None:
|
||||
"""Add a node to the embed store."""
|
||||
if self.mode == "r":
|
||||
raise ValueError("Cannot set items in read-only mode.")
|
||||
self._validate_key(key)
|
||||
if isinstance(value, C2Array):
|
||||
self._embed_map[key] = {"urlbase": value.urlbase, "path": value.path}
|
||||
else:
|
||||
if isinstance(value, np.ndarray):
|
||||
value = blosc2.asarray(value, cparams=self.cparams, dparams=self.dparams)
|
||||
serialized_data = value.to_cframe()
|
||||
data_len = len(serialized_data)
|
||||
if not self._schunk_store:
|
||||
self._ensure_capacity(data_len)
|
||||
offset = self._current_offset
|
||||
if self._schunk_store:
|
||||
self._store[offset : offset + data_len] = serialized_data
|
||||
else:
|
||||
self._store[offset : offset + data_len] = np.frombuffer(serialized_data, dtype=np.uint8)
|
||||
self._current_offset += data_len
|
||||
self._embed_map[key] = {"offset": offset, "length": data_len}
|
||||
self._save_metadata()
|
||||
|
||||
def __getitem__(self, key: str) -> blosc2.NDArray | SChunk:
|
||||
"""Retrieve a node from the embed store."""
|
||||
if key not in self._embed_map:
|
||||
raise KeyError(f"Key '{key}' not found in the embed store.")
|
||||
node_info = self._embed_map[key]
|
||||
urlbase = node_info.get("urlbase", None)
|
||||
if urlbase:
|
||||
urlpath = blosc2.URLPath(node_info["path"], urlbase=urlbase)
|
||||
return blosc2.open(urlpath, mode="r")
|
||||
offset = node_info["offset"]
|
||||
length = node_info["length"]
|
||||
serialized_data = bytes(self._store[offset : offset + length])
|
||||
# It is safer to copy data here, as the reference to the SChunk may disappear
|
||||
# Use from_cframe so we can deserialize either an NDArray or an SChunk
|
||||
return blosc2.from_cframe(serialized_data, copy=True)
|
||||
|
||||
def get(self, key: str, default: Any = None) -> blosc2.NDArray | SChunk | Any:
|
||||
"""Retrieve a node, or default if not found."""
|
||||
return self[key] if key in self._embed_map else default
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
"""Remove a node from the embed store."""
|
||||
if key not in self._embed_map:
|
||||
raise KeyError(f"Key '{key}' not found in the embed store.")
|
||||
del self._embed_map[key]
|
||||
self._save_metadata()
|
||||
|
||||
def __contains__(self, key: str) -> bool:
|
||||
"""Check if a key exists."""
|
||||
return key in self._embed_map
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return number of nodes."""
|
||||
return len(self._embed_map)
|
||||
|
||||
def __iter__(self) -> Iterator[str]:
|
||||
"""Iterate over keys."""
|
||||
return iter(self._embed_map)
|
||||
|
||||
def keys(self) -> KeysView[str]:
|
||||
"""Return all keys."""
|
||||
return self._embed_map.keys()
|
||||
|
||||
def values(self) -> Iterator[blosc2.NDArray | SChunk]:
|
||||
"""Iterate over all values."""
|
||||
for key in self._embed_map:
|
||||
yield self[key]
|
||||
|
||||
def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk]]:
|
||||
"""Iterate over (key, value) pairs."""
|
||||
for key in self._embed_map:
|
||||
yield key, self[key]
|
||||
|
||||
def _save_metadata(self) -> None:
|
||||
"""Save embed store map to vlmeta."""
|
||||
metadata = {"embed_map": self._embed_map, "current_offset": self._current_offset}
|
||||
self._store.vlmeta["estore_metadata"] = metadata
|
||||
|
||||
def _load_metadata(self) -> None:
|
||||
"""Load embed store map from vlmeta."""
|
||||
if "estore_metadata" in self._store.vlmeta:
|
||||
metadata = self._store.vlmeta["estore_metadata"]
|
||||
self._embed_map = metadata["embed_map"]
|
||||
self._current_offset = metadata["current_offset"]
|
||||
else:
|
||||
self._embed_map = {}
|
||||
self._current_offset = 0
|
||||
|
||||
def to_cframe(self) -> bytes:
|
||||
"""Serialize embed store to CFrame format."""
|
||||
return self._store.to_cframe()
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager enter."""
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit."""
|
||||
# No need to close anything as SChunk/NDArray handles persistence automatically
|
||||
return False
|
||||
|
||||
|
||||
def estore_from_cframe(cframe: bytes, copy: bool = False) -> EmbedStore:
|
||||
"""
|
||||
Deserialize a CFrame to an EmbedStore object.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cframe : bytes
|
||||
CFrame data to deserialize.
|
||||
copy : bool, optional
|
||||
If True, copy the data. Default is False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
estore : EmbedStore
|
||||
The deserialized EmbedStore object.
|
||||
"""
|
||||
schunk = blosc2.schunk_from_cframe(cframe, copy=copy)
|
||||
return EmbedStore(_from_schunk=schunk)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage
|
||||
persistent = False
|
||||
if persistent:
|
||||
estore = EmbedStore(urlpath="example_estore.b2e", mode="w") # , cparams=blosc2.CParams(clevel=0))
|
||||
else:
|
||||
estore = EmbedStore() # , cparams=blosc2.CParams(clevel=0))
|
||||
# import pdb; pdb.set_trace()
|
||||
estore["/node1"] = np.array([1, 2, 3])
|
||||
estore["/node2"] = blosc2.ones(2)
|
||||
urlpath = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo")
|
||||
arr_remote = blosc2.open(urlpath, mode="r")
|
||||
estore["/dir1/node3"] = arr_remote
|
||||
|
||||
print("EmbedStore keys:", list(estore.keys()))
|
||||
print("Node1 data:", estore["/node1"][:])
|
||||
print("Node2 data:", estore["/node2"][:])
|
||||
print("Node3 data (remote):", estore["/dir1/node3"][:3])
|
||||
|
||||
del estore["/node1"]
|
||||
print("After deletion, keys:", list(estore.keys()))
|
||||
|
||||
# Reading back the estore
|
||||
if persistent:
|
||||
estore_read = EmbedStore(urlpath="example_estore.b2e", mode="r")
|
||||
else:
|
||||
estore_read = blosc2.from_cframe(estore.to_cframe())
|
||||
|
||||
print("Read keys:", list(estore_read.keys()))
|
||||
for key, value in estore_read.items():
|
||||
print(
|
||||
f"shape of {key}: {value.shape}, dtype: {value.dtype}, map: {estore_read._embed_map[key]}, "
|
||||
f"values: {value[:10] if len(value) > 3 else value[:]}"
|
||||
)
|
||||
@ -0,0 +1,15 @@
|
||||
#######################################################################
|
||||
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#######################################################################
|
||||
|
||||
|
||||
class MissingOperands(ValueError):
|
||||
def __init__(self, expr, missing_ops):
|
||||
self.expr = expr
|
||||
self.missing_ops = missing_ops
|
||||
|
||||
message = f'Lazy expression "{expr}" with missing operands: {missing_ops}'
|
||||
super().__init__(message)
|
||||
@ -0,0 +1,62 @@
|
||||
#######################################################################
|
||||
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#######################################################################
|
||||
|
||||
|
||||
def fft():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def ifft():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def fftn():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def ifftn():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def rfft():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def irfft():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def rfftn():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def irfftn():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def hfft():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def ihfft():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def fftfreq():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def rfftfreq():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def fftshift():
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def ifftshift():
|
||||
raise NotImplementedError
|
||||
@ -0,0 +1,675 @@
|
||||
/*********************************************************************
|
||||
Blosc - Blocked Shuffling and Compression Library
|
||||
|
||||
Copyright (c) 2021 Blosc Development Team <blosc@blosc.org>
|
||||
https://blosc.org
|
||||
License: BSD 3-Clause (see LICENSE.txt)
|
||||
|
||||
See LICENSE.txt for details about copyright and rights to use.
|
||||
**********************************************************************/
|
||||
|
||||
/** @file b2nd.h
|
||||
* @brief Blosc2 NDim header file.
|
||||
*
|
||||
* This file contains Blosc2 NDim public API and the structures needed to use it.
|
||||
* @author Blosc Development Team <blosc@blosc.org>
|
||||
*/
|
||||
|
||||
#ifndef BLOSC_B2ND_H
|
||||
#define BLOSC_B2ND_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#include "blosc2/blosc2-export.h"
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#include "blosc2.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define B2ND_DEPRECATED(msg) __declspec(deprecated(msg))
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#define B2ND_DEPRECATED(msg) __attribute__((deprecated(msg)))
|
||||
#else
|
||||
#define B2ND_DEPRECATED(msg)
|
||||
#endif
|
||||
|
||||
/* The version for metalayer format; starts from 0 and it must not exceed 127 */
|
||||
#define B2ND_METALAYER_VERSION 0
|
||||
|
||||
/* The maximum number of dimensions for b2nd arrays */
|
||||
#define B2ND_MAX_DIM 16
|
||||
|
||||
/* The maximum number of metalayers for b2nd arrays */
|
||||
#define B2ND_MAX_METALAYERS (BLOSC2_MAX_METALAYERS - 1)
|
||||
|
||||
/* NumPy dtype format
|
||||
* https://numpy.org/doc/stable/reference/arrays.dtypes.html#arrays-dtypes-constructing
|
||||
*/
|
||||
#define DTYPE_NUMPY_FORMAT 0
|
||||
|
||||
/* The default data type */
|
||||
#define B2ND_DEFAULT_DTYPE "|u1"
|
||||
/* The default data format */
|
||||
#define B2ND_DEFAULT_DTYPE_FORMAT DTYPE_NUMPY_FORMAT
|
||||
|
||||
/**
|
||||
* @brief An *optional* cache for a single block.
|
||||
*
|
||||
* When a chunk is needed, it is copied into this cache. In this way, if the same chunk is needed
|
||||
* again afterwards, it is not necessary to recover it because it is already in the cache.
|
||||
*/
|
||||
struct chunk_cache_s {
|
||||
uint8_t *data;
|
||||
//!< The chunk data.
|
||||
int64_t nchunk;
|
||||
//!< The chunk number in cache. If @p nchunk equals to -1, it means that the cache is empty.
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief General parameters needed for the creation of a b2nd array.
|
||||
*/
|
||||
typedef struct b2nd_context_s b2nd_context_t; /* opaque type */
|
||||
|
||||
/**
|
||||
* @brief A multidimensional array of data that can be compressed.
|
||||
*/
|
||||
typedef struct {
|
||||
blosc2_schunk *sc;
|
||||
//!< Pointer to a Blosc super-chunk
|
||||
int64_t shape[B2ND_MAX_DIM];
|
||||
//!< Shape of original data.
|
||||
int32_t chunkshape[B2ND_MAX_DIM];
|
||||
//!< Shape of each chunk.
|
||||
int64_t extshape[B2ND_MAX_DIM];
|
||||
//!< Shape of padded data.
|
||||
int32_t blockshape[B2ND_MAX_DIM];
|
||||
//!< Shape of each block.
|
||||
int64_t extchunkshape[B2ND_MAX_DIM];
|
||||
//!< Shape of padded chunk.
|
||||
int64_t nitems;
|
||||
//!< Number of items in original data.
|
||||
int32_t chunknitems;
|
||||
//!< Number of items in each chunk.
|
||||
int64_t extnitems;
|
||||
//!< Number of items in padded data.
|
||||
int32_t blocknitems;
|
||||
//!< Number of items in each block.
|
||||
int64_t extchunknitems;
|
||||
//!< Number of items in a padded chunk.
|
||||
int8_t ndim;
|
||||
//!< Data dimensions.
|
||||
struct chunk_cache_s chunk_cache;
|
||||
//!< A partition cache.
|
||||
int64_t item_array_strides[B2ND_MAX_DIM];
|
||||
//!< Item - shape strides.
|
||||
int64_t item_chunk_strides[B2ND_MAX_DIM];
|
||||
//!< Item - shape strides.
|
||||
int64_t item_extchunk_strides[B2ND_MAX_DIM];
|
||||
//!< Item - shape strides.
|
||||
int64_t item_block_strides[B2ND_MAX_DIM];
|
||||
//!< Item - shape strides.
|
||||
int64_t block_chunk_strides[B2ND_MAX_DIM];
|
||||
//!< Item - shape strides.
|
||||
int64_t chunk_array_strides[B2ND_MAX_DIM];
|
||||
//!< Item - shape strides.
|
||||
char *dtype;
|
||||
//!< Data type. Different formats can be supported (see dtype_format).
|
||||
int8_t dtype_format;
|
||||
//!< The format of the data type. Default is DTYPE_NUMPY_FORMAT.
|
||||
} b2nd_array_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Create b2nd params.
|
||||
*
|
||||
* @param b2_storage The Blosc2 storage params.
|
||||
* @param ndim The dimensions.
|
||||
* @param shape The shape.
|
||||
* @param chunkshape The chunk shape.
|
||||
* @param blockshape The block shape.
|
||||
* @param dtype The data type expressed as a string version.
|
||||
* @param dtype_format The data type format; DTYPE_NUMPY_FORMAT should be chosen for NumPy compatibility.
|
||||
* @param metalayers The memory pointer to the list of the metalayers desired.
|
||||
* @param nmetalayers The number of metalayers.
|
||||
*
|
||||
* @return A pointer to the new b2nd params. NULL is returned if this fails.
|
||||
*
|
||||
* @note The pointer returned must be freed when not used anymore with #b2nd_free_ctx.
|
||||
*
|
||||
*/
|
||||
BLOSC_EXPORT b2nd_context_t *
|
||||
b2nd_create_ctx(const blosc2_storage *b2_storage, int8_t ndim, const int64_t *shape, const int32_t *chunkshape,
|
||||
const int32_t *blockshape, const char *dtype, int8_t dtype_format, const blosc2_metalayer *metalayers,
|
||||
int32_t nmetalayers);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free the resources associated with b2nd_context_t.
|
||||
*
|
||||
* @param ctx The b2nd context to free.
|
||||
*
|
||||
* @return An error code.
|
||||
*
|
||||
* @note This is safe in the sense that it will not free the schunk pointer in internal cparams.
|
||||
*
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_free_ctx(b2nd_context_t *ctx);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Create an uninitialized array.
|
||||
*
|
||||
* @param ctx The b2nd context for the new array.
|
||||
* @param array The memory pointer where the array will be created.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_uninit(b2nd_context_t *ctx, b2nd_array_t **array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Create an empty array.
|
||||
*
|
||||
* @param ctx The b2nd context for the new array.
|
||||
* @param array The memory pointer where the array will be created.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_empty(b2nd_context_t *ctx, b2nd_array_t **array);
|
||||
|
||||
|
||||
/**
|
||||
* Create an array, with zero being used as the default value for
|
||||
* uninitialized portions of the array.
|
||||
*
|
||||
* @param ctx The b2nd context for the new array.
|
||||
* @param array The memory pointer where the array will be created.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_zeros(b2nd_context_t *ctx, b2nd_array_t **array);
|
||||
|
||||
|
||||
/**
|
||||
* Create an array, with NaN being used as the default value for
|
||||
* uninitialized portions of the array. Should only be used with type sizes
|
||||
* of either 4 or 8. Other sizes generate an error.
|
||||
*
|
||||
* @param ctx The b2nd context for the new array.
|
||||
* @param array The memory pointer where the array will be created.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_nans(b2nd_context_t *ctx, b2nd_array_t **array);
|
||||
|
||||
|
||||
/**
|
||||
* Create an array, with @p fill_value being used as the default value for
|
||||
* uninitialized portions of the array.
|
||||
*
|
||||
* @param ctx The b2nd context for the new array.
|
||||
* @param array The memory pointer where the array will be created.
|
||||
* @param fill_value Default value for uninitialized portions of the array.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_full(b2nd_context_t *ctx, b2nd_array_t **array, const void *fill_value);
|
||||
|
||||
/**
|
||||
* @brief Free an array.
|
||||
*
|
||||
* @param array The array.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_free(b2nd_array_t *array);
|
||||
|
||||
/**
|
||||
* @brief Create a b2nd array from a super-chunk. It can only be used if the array
|
||||
* is backed by a blosc super-chunk.
|
||||
*
|
||||
* @param schunk The blosc super-chunk where the b2nd array is stored.
|
||||
* @param array The memory pointer where the array will be created.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_from_schunk(blosc2_schunk *schunk, b2nd_array_t **array);
|
||||
|
||||
/**
|
||||
* Create a serialized super-chunk from a b2nd array.
|
||||
*
|
||||
* @param array The b2nd array to be serialized.
|
||||
* @param cframe The pointer of the buffer where the in-memory array will be copied.
|
||||
* @param cframe_len The length of the in-memory array buffer.
|
||||
* @param needs_free Whether the buffer should be freed or not.
|
||||
*
|
||||
* @return An error code
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_to_cframe(const b2nd_array_t *array, uint8_t **cframe,
|
||||
int64_t *cframe_len, bool *needs_free);
|
||||
|
||||
/**
|
||||
* @brief Create a b2nd array from a serialized super-chunk.
|
||||
*
|
||||
* @param cframe The buffer of the in-memory array.
|
||||
* @param cframe_len The size (in bytes) of the in-memory array.
|
||||
* @param copy Whether b2nd should make a copy of the cframe data or not. The copy will be made to an internal sparse frame.
|
||||
* @param array The memory pointer where the array will be created.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_from_cframe(uint8_t *cframe, int64_t cframe_len, bool copy, b2nd_array_t **array);
|
||||
|
||||
/**
|
||||
* @brief Open a b2nd array from a file.
|
||||
*
|
||||
* @param urlpath The path of the b2nd array on disk.
|
||||
* @param array The memory pointer where the array info will be stored.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_open(const char *urlpath, b2nd_array_t **array);
|
||||
|
||||
/**
|
||||
* @brief Open a b2nd array from a file using an offset.
|
||||
*
|
||||
* @param urlpath The path of the b2nd array on disk.
|
||||
* @param array The memory pointer where the array info will be stored.
|
||||
* @param offset The offset in the file where the b2nd array frame starts.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_open_offset(const char *urlpath, b2nd_array_t **array, int64_t offset);
|
||||
|
||||
/**
|
||||
* @brief Save b2nd array into a specific urlpath.
|
||||
*
|
||||
* @param array The array to be saved.
|
||||
* @param urlpath The urlpath where the array will be stored.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_save(const b2nd_array_t *array, char *urlpath);
|
||||
|
||||
/**
|
||||
* @brief Append a b2nd array into a file.
|
||||
*
|
||||
* @param array The array to write.
|
||||
* @param urlpath The path for persistent storage.
|
||||
*
|
||||
* @return If successful, return the offset where @p array has been appended in @p urlpath.
|
||||
* Else, a negative value.
|
||||
*/
|
||||
BLOSC_EXPORT int64_t b2nd_save_append(const b2nd_array_t *array, const char *urlpath);
|
||||
|
||||
/**
|
||||
* @brief Create a b2nd array from a C buffer.
|
||||
*
|
||||
* @param ctx The b2nd context for the new array.
|
||||
* @param array The memory pointer where the array will be created.
|
||||
* @param buffer The buffer where source data is stored.
|
||||
* @param buffersize The size (in bytes) of the buffer.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_from_cbuffer(b2nd_context_t *ctx, b2nd_array_t **array, const void *buffer, int64_t buffersize);
|
||||
|
||||
/**
|
||||
* @brief Extract the data from a b2nd array into a C buffer.
|
||||
*
|
||||
* @param array The b2nd array.
|
||||
* @param buffer The buffer where the data will be stored.
|
||||
* @param buffersize Size (in bytes) of the buffer.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_to_cbuffer(const b2nd_array_t *array, void *buffer, int64_t buffersize);
|
||||
|
||||
/**
|
||||
* @brief Get a slice from an array and store it into a new array.
|
||||
*
|
||||
* @param ctx The b2nd context for the new array.
|
||||
* @param array The memory pointer where the array will be created.
|
||||
* @param src The array from which the slice will be extracted
|
||||
* @param start The coordinates where the slice will begin.
|
||||
* @param stop The coordinates where the slice will end.
|
||||
*
|
||||
* @return An error code.
|
||||
*
|
||||
* @note The ndim and shape from ctx will be overwritten by the src and stop-start respectively.
|
||||
*
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_get_slice(b2nd_context_t *ctx, b2nd_array_t **array, const b2nd_array_t *src,
|
||||
const int64_t *start, const int64_t *stop);
|
||||
|
||||
/**
|
||||
* @brief Squeeze a b2nd array
|
||||
*
|
||||
* This function remove selected single-dimensional entries from the shape of a
|
||||
b2nd array.
|
||||
*
|
||||
* @param array The b2nd array.
|
||||
* @param view The memory pointer where the new view will be created.
|
||||
* @param index Indexes of the single-dimensional entries to remove.
|
||||
*
|
||||
* @return An error code
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_squeeze_index(b2nd_array_t *array, b2nd_array_t **view, const bool *index);
|
||||
|
||||
/**
|
||||
* @brief Squeeze a b2nd array
|
||||
*
|
||||
* This function remove single-dimensional entries from the shape of a b2nd array.
|
||||
*
|
||||
* @param array The b2nd array.
|
||||
* @param view The memory pointer where the new view will be created.
|
||||
*
|
||||
* @return An error code
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_squeeze(b2nd_array_t *array, b2nd_array_t **view);
|
||||
|
||||
/**
|
||||
* @brief Add a newaxis to a b2nd array at location @p axis.
|
||||
*
|
||||
* @param array The b2nd array to be expanded.
|
||||
* @param axis The axes where the new dimensions will be added.
|
||||
* @param view The memory pointer where the new view will be created.
|
||||
* @param final_dims The final number of dimensions. Should be same as the number of elements in @p axis.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_expand_dims(const b2nd_array_t *array, b2nd_array_t **view, const bool *axis,
|
||||
const uint8_t final_dims);
|
||||
|
||||
/**
|
||||
* @brief Get a slice from an array and store it into a C buffer.
|
||||
*
|
||||
* @param array The array from which the slice will be extracted.
|
||||
* @param start The coordinates where the slice will begin.
|
||||
* @param stop The coordinates where the slice will end.
|
||||
* @param buffershape The shape of the buffer.
|
||||
* @param buffer The buffer where the data will be stored.
|
||||
* @param buffersize The size (in bytes) of the buffer.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_get_slice_cbuffer(const b2nd_array_t *array, const int64_t *start, const int64_t *stop,
|
||||
void *buffer, const int64_t *buffershape, int64_t buffersize);
|
||||
|
||||
/**
|
||||
* @brief Set a slice in a b2nd array using a C buffer.
|
||||
*
|
||||
* @param buffer The buffer where the slice data is.
|
||||
* @param buffershape The shape of the buffer.
|
||||
* @param buffersize The size (in bytes) of the buffer.
|
||||
* @param start The coordinates where the slice will begin.
|
||||
* @param stop The coordinates where the slice will end.
|
||||
* @param array The b2nd array where the slice will be set
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_set_slice_cbuffer(const void *buffer, const int64_t *buffershape, int64_t buffersize,
|
||||
const int64_t *start, const int64_t *stop, b2nd_array_t *array);
|
||||
|
||||
/**
|
||||
* @brief Make a copy of the array data. The copy is done into a new b2nd array.
|
||||
*
|
||||
* @param ctx The b2nd context for the new array.
|
||||
* @param src The array from which data is copied.
|
||||
* @param array The memory pointer where the array will be created.
|
||||
*
|
||||
* @return An error code
|
||||
*
|
||||
* @note The ndim and shape in ctx will be overwritten by the src ctx.
|
||||
*
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_copy(b2nd_context_t *ctx, const b2nd_array_t *src, b2nd_array_t **array);
|
||||
|
||||
/**
|
||||
* @brief Concatenate arrays. The result is stored in a new b2nd array, or an enlarged one.
|
||||
*
|
||||
* @param ctx The b2nd context for the new array.
|
||||
* @param src1 The first array from which data is copied.
|
||||
* @param src2 The second array from which data is copied.
|
||||
* @param axis The axis along which the arrays will be concatenated.
|
||||
* @param copy Whether the data should be copied or not. If false, the @p src1 array
|
||||
* will be expanded as needed to keep the result.
|
||||
* @param array The memory pointer where the array will be created. It will have the same
|
||||
* metalayers of @p src1, except for the b2nd metalayer, which will be updated with the
|
||||
* new shape.
|
||||
*
|
||||
* @ note The two arrays must have the same shape in all dimensions except the concatenation axis.
|
||||
* Also, the typesize of the two arrays must be the same.
|
||||
*
|
||||
* @return An error code
|
||||
*
|
||||
* @note The ndim and shape in ctx will be overwritten by the src1 ctx.
|
||||
*
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_concatenate(b2nd_context_t *ctx, const b2nd_array_t *src1, const b2nd_array_t *src2,
|
||||
int8_t axis, bool copy, b2nd_array_t **array);
|
||||
|
||||
/**
|
||||
* @brief Print metalayer parameters.
|
||||
*
|
||||
* @param array The array where the metalayer is stored.
|
||||
*
|
||||
* @return An error code
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_print_meta(const b2nd_array_t *array);
|
||||
|
||||
/**
|
||||
* @brief Resize the shape of an array
|
||||
*
|
||||
* @param array The array to be resized.
|
||||
* @param new_shape The new shape from the array.
|
||||
* @param start The position in which the array will be extended or shrunk.
|
||||
*
|
||||
* @return An error code
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_resize(b2nd_array_t *array, const int64_t *new_shape, const int64_t *start);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Insert given buffer in an array extending the given axis.
|
||||
*
|
||||
* @param array The array to insert the data in.
|
||||
* @param buffer The buffer data to be inserted.
|
||||
* @param buffersize The size (in bytes) of the buffer.
|
||||
* @param axis The axis that will be extended.
|
||||
* @param insert_start The position inside the axis to start inserting the data.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_insert(b2nd_array_t *array, const void *buffer, int64_t buffersize,
|
||||
int8_t axis, int64_t insert_start);
|
||||
|
||||
/**
|
||||
* Append a buffer at the end of a b2nd array.
|
||||
*
|
||||
* @param array The array to append the data in.
|
||||
* @param buffer The buffer data to be appended.
|
||||
* @param buffersize Size (in bytes) of the buffer.
|
||||
* @param axis The axis that will be extended to append the data.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_append(b2nd_array_t *array, const void *buffer, int64_t buffersize,
|
||||
int8_t axis);
|
||||
|
||||
/**
|
||||
* @brief Delete shrinking the given axis delete_len items.
|
||||
*
|
||||
* @param array The array to shrink.
|
||||
* @param axis The axis to shrink.
|
||||
* @param delete_start The start position from the axis to start deleting chunks.
|
||||
* @param delete_len The number of items to delete to the array->shape[axis].
|
||||
* The newshape[axis] will be the old array->shape[axis] - delete_len
|
||||
*
|
||||
* @return An error code.
|
||||
*
|
||||
* @note See also b2nd_resize
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_delete(b2nd_array_t *array, int8_t axis,
|
||||
int64_t delete_start, int64_t delete_len);
|
||||
|
||||
|
||||
// Indexing section
|
||||
|
||||
/**
|
||||
* @brief Get an element selection along each dimension of an array independently.
|
||||
*
|
||||
* @param array The array to get the data from.
|
||||
* @param selection The elements along each dimension.
|
||||
* @param selection_size The size of the selection along each dimension.
|
||||
* @param buffer The buffer for getting the data.
|
||||
* @param buffershape The shape of the buffer.
|
||||
* @param buffersize The buffer size (in bytes).
|
||||
*
|
||||
* @return An error code.
|
||||
*
|
||||
* @note See also b2nd_set_orthogonal_selection.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_get_orthogonal_selection(const b2nd_array_t *array, int64_t **selection,
|
||||
int64_t *selection_size, void *buffer,
|
||||
int64_t *buffershape, int64_t buffersize);
|
||||
|
||||
/**
|
||||
* @brief Set an element selection along each dimension of an array independently.
|
||||
*
|
||||
* @param array The array to set the data to.
|
||||
* @param selection The elements along each dimension.
|
||||
* @param selection_size The size of the selection along each dimension.
|
||||
* @param buffer The buffer with the data for setting.
|
||||
* @param buffershape The shape of the buffer.
|
||||
* @param buffersize The buffer size (in bytes).
|
||||
*
|
||||
* @return An error code.
|
||||
*
|
||||
* @note See also b2nd_get_orthogonal_selection.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_set_orthogonal_selection(b2nd_array_t *array, int64_t **selection,
|
||||
int64_t *selection_size, const void *buffer,
|
||||
int64_t *buffershape, int64_t buffersize);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Create the metainfo for the b2nd metalayer.
|
||||
*
|
||||
* @param ndim The number of dimensions in the array.
|
||||
* @param shape The shape of the array.
|
||||
* @param chunkshape The shape of the chunks in the array.
|
||||
* @param blockshape The shape of the blocks in the array.
|
||||
* @param dtype A string representation of the data type of the array.
|
||||
* @param dtype_format The format of the dtype representation. 0 means NumPy.
|
||||
* @param smeta The msgpack buffer (output).
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_serialize_meta(int8_t ndim, const int64_t *shape, const int32_t *chunkshape,
|
||||
const int32_t *blockshape, const char *dtype,
|
||||
int8_t dtype_format, uint8_t **smeta);
|
||||
|
||||
/**
|
||||
* @brief Read the metainfo in the b2nd metalayer.
|
||||
*
|
||||
* @param smeta The msgpack buffer (input).
|
||||
* @param smeta_len The length of the smeta buffer (input).
|
||||
* @param ndim The number of dimensions in the array (output).
|
||||
* @param shape The shape of the array (output).
|
||||
* @param chunkshape The shape of the chunks in the array (output).
|
||||
* @param blockshape The shape of the blocks in the array (output).
|
||||
* @param dtype A string representation of the data type of the array (output).
|
||||
* @param dtype_format The format of the dtype representation (output). 0 means NumPy (the default).
|
||||
*
|
||||
* @note This function is inlined and available even when not linking with libblosc2.
|
||||
*
|
||||
* @return An error code.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_deserialize_meta(const uint8_t *smeta, int32_t smeta_len, int8_t *ndim, int64_t *shape,
|
||||
int32_t *chunkshape, int32_t *blockshape, char **dtype, int8_t *dtype_format);
|
||||
|
||||
// Utilities for C buffers representing multidimensional arrays
|
||||
|
||||
/**
|
||||
* @brief Copy a slice of a source array into another array. The arrays have
|
||||
* the same number of dimensions (though their shapes may differ), the same
|
||||
* item size, and they are stored as C buffers with contiguous data (any
|
||||
* padding is considered part of the array).
|
||||
*
|
||||
* @param ndim The number of dimensions in both arrays.
|
||||
* @param itemsize The size of the individual data item in both arrays.
|
||||
* @param src The buffer for getting the data from the source array.
|
||||
* @param src_pad_shape The shape of the source array, including padding.
|
||||
* @param src_start The source coordinates where the slice will begin.
|
||||
* @param src_stop The source coordinates where the slice will end.
|
||||
* @param dst The buffer for setting the data into the destination array.
|
||||
* @param dst_pad_shape The shape of the destination array, including padding.
|
||||
* @param dst_start The destination coordinates where the slice will be placed.
|
||||
*
|
||||
* @return An error code.
|
||||
*
|
||||
* @note This is kept for backward compatibility with existing code out there. New code should use
|
||||
* b2nd_copy_buffer2 instead.
|
||||
*
|
||||
* @note Please make sure that slice boundaries fit within the source and
|
||||
* destination arrays before using this function, as it does not perform these
|
||||
* checks itself.
|
||||
*/
|
||||
B2ND_DEPRECATED("Use b2nd_copy_buffer2 instead.")
|
||||
BLOSC_EXPORT int b2nd_copy_buffer(int8_t ndim,
|
||||
uint8_t itemsize,
|
||||
const void *src, const int64_t *src_pad_shape,
|
||||
const int64_t *src_start, const int64_t *src_stop,
|
||||
void *dst, const int64_t *dst_pad_shape,
|
||||
const int64_t *dst_start);
|
||||
|
||||
/**
|
||||
* @brief Copy a slice of a source array into another array. The arrays have
|
||||
* the same number of dimensions (though their shapes may differ), the same
|
||||
* item size, and they are stored as C buffers with contiguous data (any
|
||||
* padding is considered part of the array).
|
||||
*
|
||||
* @param ndim The number of dimensions in both arrays.
|
||||
* @param itemsize The size of the individual data item in both arrays.
|
||||
* @param src The buffer for getting the data from the source array.
|
||||
* @param src_pad_shape The shape of the source array, including padding.
|
||||
* @param src_start The source coordinates where the slice will begin.
|
||||
* @param src_stop The source coordinates where the slice will end.
|
||||
* @param dst The buffer for setting the data into the destination array.
|
||||
* @param dst_pad_shape The shape of the destination array, including padding.
|
||||
* @param dst_start The destination coordinates where the slice will be placed.
|
||||
*
|
||||
* @return An error code.
|
||||
*
|
||||
* @note This is a version of (now deprecated) b2nd_copy_buffer() that uses
|
||||
* signed 32-bit integers for copying data. This is useful when data is stored
|
||||
* in a buffer that uses itemsizes that are larger than 255 bytes.
|
||||
*
|
||||
* @note Please make sure that slice boundaries fit within the source and
|
||||
* destination arrays before using this function, as it does not perform these
|
||||
* checks itself.
|
||||
*/
|
||||
BLOSC_EXPORT int b2nd_copy_buffer2(int8_t ndim,
|
||||
int32_t itemsize,
|
||||
const void *src, const int64_t *src_pad_shape,
|
||||
const int64_t *src_start, const int64_t *src_stop,
|
||||
void *dst, const int64_t *dst_pad_shape,
|
||||
const int64_t *dst_start);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BLOSC_B2ND_H */
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,80 @@
|
||||
/*********************************************************************
|
||||
Blosc - Blocked Shuffling and Compression Library
|
||||
|
||||
Copyright (c) 2021 Blosc Development Team <blosc@blosc.org>
|
||||
https://blosc.org
|
||||
License: BSD 3-Clause (see LICENSE.txt)
|
||||
|
||||
See LICENSE.txt for details about copyright and rights to use.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef BLOSC_BLOSC2_BLOSC2_COMMON_H
|
||||
#define BLOSC_BLOSC2_BLOSC2_COMMON_H
|
||||
|
||||
#include "blosc2-export.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
// For shutting up stupid compiler warning about some 'unused' variables in GCC
|
||||
#ifdef __GNUC__
|
||||
#define BLOSC_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
#define BLOSC_UNUSED_VAR __attribute__ ((unused))
|
||||
#else
|
||||
#define BLOSC_UNUSED_VAR
|
||||
#endif // __GNUC__
|
||||
|
||||
// For shutting up compiler warning about unused parameters
|
||||
#define BLOSC_UNUSED_PARAM(x) ((void)(x))
|
||||
|
||||
/* Use inlined functions for supported systems */
|
||||
#if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */
|
||||
#define inline __inline /* Visual C is not C99, but supports some kind of inline */
|
||||
#endif
|
||||
|
||||
|
||||
/* Define the __SSE2__ symbol if compiling with Visual C++ and
|
||||
targeting the minimum architecture level supporting SSE2.
|
||||
Other compilers define this as expected and emit warnings
|
||||
when it is re-defined. */
|
||||
#if !defined(__SSE2__) && defined(_MSC_VER) && \
|
||||
(defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2))
|
||||
#define __SSE2__
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Detect if the architecture is fine with unaligned access.
|
||||
*/
|
||||
#if !defined(BLOSC_STRICT_ALIGN)
|
||||
#define BLOSC_STRICT_ALIGN
|
||||
#if defined(__i386__) || defined(__386) || defined (__amd64) /* GNU C, Sun Studio */
|
||||
#undef BLOSC_STRICT_ALIGN
|
||||
#elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */
|
||||
#undef BLOSC_STRICT_ALIGN
|
||||
#elif defined(_M_IX86) || defined(_M_X64) /* Intel, MSVC */
|
||||
#undef BLOSC_STRICT_ALIGN
|
||||
#elif defined(__386)
|
||||
#undef BLOSC_STRICT_ALIGN
|
||||
#elif defined(_X86_) /* MinGW */
|
||||
#undef BLOSC_STRICT_ALIGN
|
||||
#elif defined(__I86__) /* Digital Mars */
|
||||
#undef BLOSC_STRICT_ALIGN
|
||||
/* Modern ARM systems (like ARM64) should support unaligned access
|
||||
quite efficiently. */
|
||||
#elif defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM64_ARCH_8__)
|
||||
#undef BLOSC_STRICT_ALIGN
|
||||
#elif defined(_ARCH_PPC) || defined(__PPC__)
|
||||
/* Modern PowerPC systems (like POWER8) should support unaligned access
|
||||
quite efficiently. */
|
||||
#undef BLOSC_STRICT_ALIGN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#if defined(__AVX2__) || defined(__AVX512F__) || defined (__AVX512BW__)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#endif /* BLOSC_BLOSC2_BLOSC2_COMMON_H */
|
||||
@ -0,0 +1,48 @@
|
||||
/*********************************************************************
|
||||
Blosc - Blocked Shuffling and Compression Library
|
||||
|
||||
Copyright (c) 2021 Blosc Development Team <blosc@blosc.org>
|
||||
https://blosc.org
|
||||
License: BSD 3-Clause (see LICENSE.txt)
|
||||
|
||||
See LICENSE.txt for details about copyright and rights to use.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef BLOSC_BLOSC2_BLOSC2_EXPORT_H
|
||||
#define BLOSC_BLOSC2_BLOSC2_EXPORT_H
|
||||
|
||||
/* Macros for specifying exported symbols.
|
||||
BLOSC_EXPORT is used to decorate symbols that should be
|
||||
exported by the blosc shared library.
|
||||
BLOSC_NO_EXPORT is used to decorate symbols that should NOT
|
||||
be exported by the blosc shared library.
|
||||
*/
|
||||
#if defined(BLOSC_SHARED_LIBRARY)
|
||||
#if defined(_MSC_VER)
|
||||
#define BLOSC_EXPORT __declspec(dllexport)
|
||||
#elif (defined(__GNUC__) && __GNUC__ >= 4) || defined(__clang__)
|
||||
#if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__)
|
||||
#define BLOSC_EXPORT __attribute__((dllexport))
|
||||
#else
|
||||
#define BLOSC_EXPORT __attribute__((visibility("default")))
|
||||
#endif /* defined(_WIN32) || defined(__CYGWIN__) */
|
||||
#else
|
||||
#error Cannot determine how to define BLOSC_EXPORT for this compiler.
|
||||
#endif
|
||||
#else
|
||||
#define BLOSC_EXPORT
|
||||
#endif /* defined(BLOSC_SHARED_LIBRARY) */
|
||||
|
||||
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MINGW32__)
|
||||
#define BLOSC_NO_EXPORT __attribute__((visibility("hidden")))
|
||||
#else
|
||||
#define BLOSC_NO_EXPORT
|
||||
#endif /* (defined(__GNUC__) || defined(__clang__)) && !defined(__MINGW32__) */
|
||||
|
||||
/* When testing, export everything to make it easier to implement tests. */
|
||||
#if defined(BLOSC_TESTING)
|
||||
#undef BLOSC_NO_EXPORT
|
||||
#define BLOSC_NO_EXPORT BLOSC_EXPORT
|
||||
#endif /* defined(BLOSC_TESTING) */
|
||||
|
||||
#endif /* BLOSC_BLOSC2_BLOSC2_EXPORT_H */
|
||||
@ -0,0 +1,117 @@
|
||||
/*********************************************************************
|
||||
Blosc - Blocked Shuffling and Compression Library
|
||||
|
||||
Copyright (c) 2021 Blosc Development Team <blosc@blosc.org>
|
||||
https://blosc.org
|
||||
License: BSD 3-Clause (see LICENSE.txt)
|
||||
|
||||
See LICENSE.txt for details about copyright and rights to use.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef BLOSC_BLOSC2_BLOSC2_STDIO_H
|
||||
#define BLOSC_BLOSC2_BLOSC2_STDIO_H
|
||||
|
||||
#include "blosc2-export.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <io.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
FILE *file;
|
||||
} blosc2_stdio_file;
|
||||
|
||||
BLOSC_EXPORT void *blosc2_stdio_open(const char *urlpath, const char *mode, void* params);
|
||||
BLOSC_EXPORT int blosc2_stdio_close(void *stream);
|
||||
BLOSC_EXPORT int64_t blosc2_stdio_size(void *stream);
|
||||
BLOSC_EXPORT int64_t blosc2_stdio_write(const void *ptr, int64_t size, int64_t nitems, int64_t position, void *stream);
|
||||
BLOSC_EXPORT int64_t blosc2_stdio_read(void **ptr, int64_t size, int64_t nitems, int64_t position, void *stream);
|
||||
BLOSC_EXPORT int blosc2_stdio_truncate(void *stream, int64_t size);
|
||||
BLOSC_EXPORT int blosc2_stdio_destroy(void* params);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Parameters for memory-mapped I/O. You can use the blosc2_schunk_open*_udio functions to memory-map existing
|
||||
* schunk files from disk. To create a new schunk which is backed up by a memory-mapped file on disk, set the io member
|
||||
* of the #blosc2_storage struct (see test_mmap for examples). Please note that memory-mapped I/O is only available for
|
||||
* cframes and not sframes.
|
||||
*/
|
||||
typedef struct {
|
||||
/* Arguments of the mapping */
|
||||
const char* mode;
|
||||
//!< The opening mode of the memory-mapped file (r, r+, w+ or c) similar to Numpy's np.memmap
|
||||
//!< (https://numpy.org/doc/stable/reference/generated/numpy.memmap.html). Set to r if the file should only be read,
|
||||
//!< r+ if you want to extend data to an existing file, w+ to create a new file and c to use an existing file as basis
|
||||
//!< but keep all modifications in-memory. On Windows, the size of the mapping cannot change in the c mode.
|
||||
int64_t initial_mapping_size;
|
||||
//!< The initial size of the memory mapping used as a large enough write buffer for the r+, w+ and c modes (for
|
||||
//!< Windows, only the r+ and w+ modes). On Windows, this will also be the size of the file while the file is opened.
|
||||
//!< It will be truncated to the target size when the file is closed (e.g., when the schunk is destroyed).
|
||||
bool needs_free;
|
||||
//!< Indicates whether this object should be freed in the blosc2_destroy_cb callback (set to true if the
|
||||
//!< blosc2_stdio_mmap struct was created on the heap).
|
||||
|
||||
/* Internal attributes of the mapping */
|
||||
char* addr;
|
||||
//!< The starting address of the mapping.
|
||||
char* urlpath;
|
||||
//!< The path to the file which is associated with this object.
|
||||
int64_t file_size;
|
||||
//!< The size of the file.
|
||||
int64_t mapping_size;
|
||||
//!< The size of the mapping (mapping_size >= file_size).
|
||||
bool is_memory_only;
|
||||
//!< Whether the mapping is only in-memory and changes are not reflected to the file on disk (c mode).
|
||||
FILE* file;
|
||||
//!< The underlying file handle.
|
||||
int fd;
|
||||
//!< The underlying file descriptor.
|
||||
int64_t access_flags;
|
||||
//!< The access attributes for the memory pages.
|
||||
int64_t map_flags;
|
||||
//!< The attributes of the mapping.
|
||||
#if defined(_WIN32)
|
||||
HANDLE mmap_handle;
|
||||
//!< The Windows handle to the memory mapping.
|
||||
#endif
|
||||
} blosc2_stdio_mmap;
|
||||
|
||||
/**
|
||||
* @brief Default struct for memory-mapped I/O for user initialization.
|
||||
*/
|
||||
static const blosc2_stdio_mmap BLOSC2_STDIO_MMAP_DEFAULTS = {
|
||||
"r", (1 << 30), false, NULL, NULL, -1, -1, false, NULL, -1, -1, -1
|
||||
#if defined(_WIN32)
|
||||
, INVALID_HANDLE_VALUE
|
||||
#endif
|
||||
};
|
||||
|
||||
BLOSC_EXPORT void *blosc2_stdio_mmap_open(const char *urlpath, const char *mode, void* params);
|
||||
BLOSC_EXPORT int blosc2_stdio_mmap_close(void *stream);
|
||||
BLOSC_EXPORT int64_t blosc2_stdio_mmap_size(void *stream);
|
||||
BLOSC_EXPORT int64_t blosc2_stdio_mmap_write(
|
||||
const void *ptr, int64_t size, int64_t nitems, int64_t position, void *stream);
|
||||
BLOSC_EXPORT int64_t blosc2_stdio_mmap_read(void **ptr, int64_t size, int64_t nitems, int64_t position, void *stream);
|
||||
BLOSC_EXPORT int blosc2_stdio_mmap_truncate(void *stream, int64_t size);
|
||||
BLOSC_EXPORT int blosc2_stdio_mmap_destroy(void* params);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BLOSC_BLOSC2_BLOSC2_STDIO_H */
|
||||
@ -0,0 +1,58 @@
|
||||
/*********************************************************************
|
||||
Blosc - Blocked Shuffling and Compression Library
|
||||
|
||||
Copyright (c) 2021 Blosc Development Team <blosc@blosc.org>
|
||||
https://blosc.org
|
||||
License: BSD 3-Clause (see LICENSE.txt)
|
||||
|
||||
See LICENSE.txt for details about copyright and rights to use.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef BLOSC_BLOSC2_CODECS_REGISTRY_H
|
||||
#define BLOSC_BLOSC2_CODECS_REGISTRY_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
BLOSC_CODEC_NDLZ = 32,
|
||||
//!< Simple Lempel-Ziv compressor for NDim data. Experimental, mainly for teaching purposes.
|
||||
BLOSC_CODEC_ZFP_FIXED_ACCURACY = 33,
|
||||
//!< ZFP compressor for fixed accuracy mode. The desired accuracy is set in `compcode_meta`.
|
||||
//!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/codecs/zfp/README.md
|
||||
BLOSC_CODEC_ZFP_FIXED_PRECISION = 34,
|
||||
//!< ZFP compressor for fixed precision. The desired precision is set in `compcode_meta`.
|
||||
//!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/codecs/zfp/README.md
|
||||
BLOSC_CODEC_ZFP_FIXED_RATE = 35,
|
||||
//!< ZFP compressor for fixed precision. The desired rate is set in `compcode_meta`.
|
||||
//!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/codecs/zfp/README.md
|
||||
BLOSC_CODEC_OPENHTJ2K = 36,
|
||||
//!< OpenHTJ2K compressor for JPEG 2000 HT.
|
||||
//!< See https://github.com/Blosc/blosc2_openhtj2k
|
||||
BLOSC_CODEC_GROK = 37,
|
||||
//!< Grok compressor for JPEG 2000.
|
||||
//!< See https://github.com/Blosc/blosc2_grok
|
||||
BLOSC_CODEC_OPENZL = 38,
|
||||
//!< OpenZL metacompressor.
|
||||
//!< See https://github.com/Blosc/blosc2_openzl
|
||||
};
|
||||
|
||||
void register_codecs(void);
|
||||
|
||||
// For dynamically loaded codecs
|
||||
typedef struct {
|
||||
char *encoder;
|
||||
char *decoder;
|
||||
} codec_info;
|
||||
|
||||
// If ever add .free func for codecs, may be needed
|
||||
// typedef struct {
|
||||
// char *free;
|
||||
// } codecparams_info;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BLOSC_BLOSC2_CODECS_REGISTRY_H */
|
||||
@ -0,0 +1,49 @@
|
||||
/*********************************************************************
|
||||
Blosc - Blocked Shuffling and Compression Library
|
||||
|
||||
Copyright (c) 2021 Blosc Development Team <blosc@blosc.org>
|
||||
https://blosc.org
|
||||
License: BSD 3-Clause (see LICENSE.txt)
|
||||
|
||||
See LICENSE.txt for details about copyright and rights to use.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef BLOSC_BLOSC2_FILTERS_REGISTRY_H
|
||||
#define BLOSC_BLOSC2_FILTERS_REGISTRY_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
BLOSC_FILTER_NDCELL = 32,
|
||||
//!< Simple filter for grouping NDim cell data together.
|
||||
//!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/filters/ndcell/README.md
|
||||
BLOSC_FILTER_NDMEAN = 33,
|
||||
//!< Simple filter for replacing content of a NDim cell with its mean value.
|
||||
//!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/filters/ndmean/README.md
|
||||
BLOSC_FILTER_BYTEDELTA_BUGGY = 34,
|
||||
// buggy version. See #524
|
||||
BLOSC_FILTER_BYTEDELTA = 35,
|
||||
//!< Byte-wise delta. Assumes M streams of bytes of length N, where M is the typesize (specified by `filters_meta`).
|
||||
//!< Should be used in combination with @ref BLOSC_SHUFFLE or @ref BLOSC_BITSHUFFLE.
|
||||
//!< See https://www.blosc.org/posts/bytedelta-enhance-compression-toolset/
|
||||
BLOSC_FILTER_INT_TRUNC = 36,
|
||||
//!< Truncate int precision; positive values in `filters_meta` slot will keep bits;
|
||||
//!< negative values will remove (set to zero) bits.
|
||||
//!< This is similar to @ref BLOSC_TRUNC_PREC, but for integers instead of floating point data.
|
||||
};
|
||||
|
||||
void register_filters(void);
|
||||
|
||||
// For dynamically loaded filters
|
||||
typedef struct {
|
||||
char *forward;
|
||||
char *backward;
|
||||
} filter_info;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BLOSC_BLOSC2_FILTERS_REGISTRY_H */
|
||||
@ -0,0 +1,37 @@
|
||||
/*********************************************************************
|
||||
Blosc - Blocked Shuffling and Compression Library
|
||||
|
||||
Copyright (C) 2021 The Blosc Developers <blosc@blosc.org>
|
||||
https://blosc.org
|
||||
License: BSD 3-Clause (see LICENSE.txt)
|
||||
|
||||
See LICENSE.txt for details about copyright and rights to use.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef BLOSC_BLOSC2_TUNERS_REGISTRY_H
|
||||
#define BLOSC_BLOSC2_TUNERS_REGISTRY_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
BLOSC_BTUNE = 32,
|
||||
};
|
||||
|
||||
void register_tuners(void);
|
||||
|
||||
// For dynamically loaded tuners
|
||||
typedef struct {
|
||||
char *init;
|
||||
char *next_blocksize;
|
||||
char *next_cparams;
|
||||
char *update;
|
||||
char *free;
|
||||
} tuner_info;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BLOSC_BLOSC2_TUNERS_REGISTRY_H */
|
||||
@ -0,0 +1,64 @@
|
||||
#######################################################################
|
||||
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#######################################################################
|
||||
|
||||
import io
|
||||
import pprint
|
||||
from textwrap import TextWrapper
|
||||
|
||||
|
||||
def info_text_report_(items: list) -> str:
|
||||
with io.StringIO() as buf:
|
||||
print(items, file=buf)
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def info_text_report(items: list) -> str:
|
||||
keys = [k for k, v in items]
|
||||
max_key_len = max(len(k) for k in keys)
|
||||
report = ""
|
||||
for k, v in items:
|
||||
if isinstance(v, dict):
|
||||
# rich way, this is disabled because it doesn't work well in the notebooks
|
||||
# with io.StringIO() as buf:
|
||||
# v_sorted = {k: val for k, val in sorted(v.items())}
|
||||
# rich.print(v_sorted, file=buf)
|
||||
# str_v = buf.getvalue()[:-1] # remove the trailing \n
|
||||
# text = k.ljust(max_key_len) + " : " + str_v
|
||||
# pprint way
|
||||
text = k.ljust(max_key_len) + " : " + pprint.pformat(v)
|
||||
else:
|
||||
wrapper = TextWrapper(
|
||||
width=96,
|
||||
initial_indent=k.ljust(max_key_len) + " : ",
|
||||
subsequent_indent=" " * max_key_len + " : ",
|
||||
)
|
||||
text = wrapper.fill(str(v))
|
||||
report += text + "\n"
|
||||
return report
|
||||
|
||||
|
||||
def info_html_report(items: list) -> str:
|
||||
report = '<table class="NDArray-info">'
|
||||
report += "<tbody>"
|
||||
for k, v in items:
|
||||
report += f'<tr><th style="text-align: left">{k}</th><td style="text-align: left">{v}</td></tr>'
|
||||
report += "</tbody>"
|
||||
report += "</table>"
|
||||
return report
|
||||
|
||||
|
||||
class InfoReporter:
|
||||
def __init__(self, obj):
|
||||
self.obj = obj
|
||||
|
||||
def __repr__(self):
|
||||
items = self.obj.info_items
|
||||
return info_text_report(items)
|
||||
|
||||
def _repr_html_(self):
|
||||
items = self.obj.info_items
|
||||
return info_html_report(items)
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -0,0 +1,133 @@
|
||||
# only add PUBLIC dependencies as well
|
||||
# https://cmake.org/cmake/help/latest/manual/cmake-packages.7.html#creating-a-package-configuration-file
|
||||
include(CMakeFindDependencyMacro)
|
||||
|
||||
# Search in <PackageName>_ROOT:
|
||||
# https://cmake.org/cmake/help/v3.12/policy/CMP0074.html
|
||||
if(POLICY CMP0074)
|
||||
cmake_policy(SET CMP0074 NEW)
|
||||
endif()
|
||||
|
||||
# locate the installed FindABC.cmake modules
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/Modules")
|
||||
|
||||
# this section stores which configuration options were set
|
||||
set(HAVE_THREADS ON)
|
||||
set(HAVE_IPP )
|
||||
set(HAVE_LZ4_CONFIG )
|
||||
set(HAVE_ZLIB_NG TRUE)
|
||||
set(HAVE_ZLIB_NG_CONFIG )
|
||||
set(HAVE_ZSTD_CONFIG )
|
||||
set(DEACTIVATE_IPP ON)
|
||||
set(DEACTIVATE_ZLIB OFF)
|
||||
set(DEACTIVATE_ZSTD OFF)
|
||||
set(PREFER_EXTERNAL_LZ4 OFF)
|
||||
set(PREFER_EXTERNAL_ZLIB OFF)
|
||||
set(PREFER_EXTERNAL_ZSTD OFF)
|
||||
|
||||
# find dependencies and their targets, which are used in our Blosc2Targets.cmake
|
||||
# additionally, the Blosc2_..._FOUND variables are used to support
|
||||
# find_package(Blosc2 ... COMPONENTS ... ...)
|
||||
# this enables downstream projects to express the need for specific features.
|
||||
set(CMAKE_THREAD_PREFER_PTHREAD TRUE) # pre 3.1
|
||||
set(THREADS_PREFER_PTHREAD_FLAG TRUE) # CMake 3.1+
|
||||
if(HAVE_THREADS)
|
||||
find_dependency(Threads)
|
||||
set(Blosc2_THREADS_FOUND TRUE)
|
||||
else()
|
||||
set(Blosc2_THREADS_FOUND FALSE)
|
||||
endif()
|
||||
|
||||
if(NOT DEACTIVATE_IPP AND HAVE_IPP)
|
||||
find_dependency(IPP)
|
||||
set(Blosc2_IPP_FOUND FALSE)
|
||||
else()
|
||||
set(Blosc2_IPP_FOUND TRUE)
|
||||
endif()
|
||||
|
||||
if(PREFER_EXTERNAL_LZ4 AND HAVE_LZ4_CONFIG)
|
||||
find_dependency(lz4 CONFIG)
|
||||
endif()
|
||||
set(Blosc2_LZ4_FOUND TRUE)
|
||||
|
||||
if(DEACTIVATE_ZLIB)
|
||||
set(Blosc2_ZLIB_FOUND FALSE)
|
||||
elseif(NOT DEACTIVATE_ZLIB AND PREFER_EXTERNAL_ZLIB)
|
||||
if(HAVE_ZLIB_NG)
|
||||
if (HAVE_ZLIB_NG_CONFIG)
|
||||
find_dependency(zlib-ng CONFIG)
|
||||
endif()
|
||||
else()
|
||||
find_dependency(ZLIB)
|
||||
endif()
|
||||
set(Blosc2_ZLIB_FOUND TRUE)
|
||||
endif()
|
||||
|
||||
if(DEACTIVATE_ZSTD)
|
||||
set(Blosc2_ZSTD_FOUND FALSE)
|
||||
elseif(NOT DEACTIVATE_ZSTD AND PREFER_EXTERNAL_ZSTD)
|
||||
if(HAVE_ZSTD_CONFIG)
|
||||
find_dependency(zstd CONFIG)
|
||||
endif()
|
||||
set(Blosc2_ZSTD_FOUND TRUE)
|
||||
endif()
|
||||
|
||||
# define central Blosc2::blosc2_shared/static targets
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/Blosc2Targets.cmake")
|
||||
|
||||
# check if components are fulfilled and set Blosc2_<COMPONENT>_FOUND vars
|
||||
# Blosc2_FIND_COMPONENTS is a list set by find_package(... COMPONENTS ... ...)
|
||||
# likewise Blosc2_FIND_REQUIRED_... per component specified
|
||||
foreach(comp ${Blosc2_FIND_COMPONENTS})
|
||||
if(NOT Blosc2_${comp}_FOUND)
|
||||
if(Blosc2_FIND_REQUIRED_${comp})
|
||||
set(Blosc2_FOUND FALSE)
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Defines imported targets for Blosc2 inside a Python wheel
|
||||
|
||||
# ------------------------------
|
||||
# Shared library target
|
||||
# ------------------------------
|
||||
if(NOT TARGET Blosc2::blosc2_shared)
|
||||
add_library(Blosc2::blosc2_shared SHARED IMPORTED GLOBAL)
|
||||
|
||||
if(WIN32)
|
||||
# MSVC: import library (.lib) + runtime DLL (.dll)
|
||||
set_target_properties(Blosc2::blosc2_shared PROPERTIES
|
||||
IMPORTED_IMPLIB "${CMAKE_CURRENT_LIST_DIR}/../blosc2_shared.lib"
|
||||
IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_shared.dll"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include"
|
||||
)
|
||||
else()
|
||||
# Linux/macOS
|
||||
set_target_properties(Blosc2::blosc2_shared PROPERTIES
|
||||
IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_shared.so"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ------------------------------
|
||||
# Static library target
|
||||
# ------------------------------
|
||||
if(NOT TARGET Blosc2::blosc2_static)
|
||||
add_library(Blosc2::blosc2_static STATIC IMPORTED GLOBAL)
|
||||
|
||||
if(MSVC)
|
||||
# Windows static library uses .lib
|
||||
set_target_properties(Blosc2::blosc2_static PROPERTIES
|
||||
IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_static.lib"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include"
|
||||
)
|
||||
else()
|
||||
# Linux/macOS static library uses .a
|
||||
set_target_properties(Blosc2::blosc2_static PROPERTIES
|
||||
IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_static.a"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -0,0 +1,65 @@
|
||||
# This is a basic version file for the Config-mode of find_package().
|
||||
# It is used by write_basic_package_version_file() as input file for configure_file()
|
||||
# to create a version-file which can be installed along a config.cmake file.
|
||||
#
|
||||
# The created file sets PACKAGE_VERSION_EXACT if the current version string and
|
||||
# the requested version string are exactly the same and it sets
|
||||
# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version,
|
||||
# but only if the requested major version is the same as the current one.
|
||||
# The variable CVF_VERSION must be set before calling configure_file().
|
||||
|
||||
|
||||
set(PACKAGE_VERSION "2.23.1")
|
||||
|
||||
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)
|
||||
set(PACKAGE_VERSION_COMPATIBLE FALSE)
|
||||
else()
|
||||
|
||||
if("2.23.1" MATCHES "^([0-9]+)\\.")
|
||||
set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}")
|
||||
if(NOT CVF_VERSION_MAJOR VERSION_EQUAL 0)
|
||||
string(REGEX REPLACE "^0+" "" CVF_VERSION_MAJOR "${CVF_VERSION_MAJOR}")
|
||||
endif()
|
||||
else()
|
||||
set(CVF_VERSION_MAJOR "2.23.1")
|
||||
endif()
|
||||
|
||||
if(PACKAGE_FIND_VERSION_RANGE)
|
||||
# both endpoints of the range must have the expected major version
|
||||
math (EXPR CVF_VERSION_MAJOR_NEXT "${CVF_VERSION_MAJOR} + 1")
|
||||
if (NOT PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR
|
||||
OR ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX_MAJOR STREQUAL CVF_VERSION_MAJOR)
|
||||
OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX VERSION_LESS_EQUAL CVF_VERSION_MAJOR_NEXT)))
|
||||
set(PACKAGE_VERSION_COMPATIBLE FALSE)
|
||||
elseif(PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR
|
||||
AND ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND PACKAGE_VERSION VERSION_LESS_EQUAL PACKAGE_FIND_VERSION_MAX)
|
||||
OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION_MAX)))
|
||||
set(PACKAGE_VERSION_COMPATIBLE TRUE)
|
||||
else()
|
||||
set(PACKAGE_VERSION_COMPATIBLE FALSE)
|
||||
endif()
|
||||
else()
|
||||
if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR)
|
||||
set(PACKAGE_VERSION_COMPATIBLE TRUE)
|
||||
else()
|
||||
set(PACKAGE_VERSION_COMPATIBLE FALSE)
|
||||
endif()
|
||||
|
||||
if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)
|
||||
set(PACKAGE_VERSION_EXACT TRUE)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it:
|
||||
if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "8" STREQUAL "")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# check that the installed version has the same 32/64bit-ness as the one which is currently searching:
|
||||
if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "8")
|
||||
math(EXPR installedBits "8 * 8")
|
||||
set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)")
|
||||
set(PACKAGE_VERSION_UNSUITABLE TRUE)
|
||||
endif()
|
||||
@ -0,0 +1,29 @@
|
||||
#----------------------------------------------------------------
|
||||
# Generated CMake target import file for configuration "Release".
|
||||
#----------------------------------------------------------------
|
||||
|
||||
# Commands may need to know the format version.
|
||||
set(CMAKE_IMPORT_FILE_VERSION 1)
|
||||
|
||||
# Import target "Blosc2::blosc2_shared" for configuration "Release"
|
||||
set_property(TARGET Blosc2::blosc2_shared APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
|
||||
set_target_properties(Blosc2::blosc2_shared PROPERTIES
|
||||
IMPORTED_IMPLIB_RELEASE "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/blosc2.lib"
|
||||
IMPORTED_LOCATION_RELEASE "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/libblosc2.dll"
|
||||
)
|
||||
|
||||
list(APPEND _cmake_import_check_targets Blosc2::blosc2_shared )
|
||||
list(APPEND _cmake_import_check_files_for_Blosc2::blosc2_shared "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/blosc2.lib" "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/libblosc2.dll" )
|
||||
|
||||
# Import target "Blosc2::blosc2_static" for configuration "Release"
|
||||
set_property(TARGET Blosc2::blosc2_static APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
|
||||
set_target_properties(Blosc2::blosc2_static PROPERTIES
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
|
||||
IMPORTED_LOCATION_RELEASE "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/libblosc2.lib"
|
||||
)
|
||||
|
||||
list(APPEND _cmake_import_check_targets Blosc2::blosc2_static )
|
||||
list(APPEND _cmake_import_check_files_for_Blosc2::blosc2_static "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/libblosc2.lib" )
|
||||
|
||||
# Commands beyond this point should not need to know the version.
|
||||
set(CMAKE_IMPORT_FILE_VERSION)
|
||||
@ -0,0 +1,116 @@
|
||||
# Generated by CMake
|
||||
|
||||
if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8)
|
||||
message(FATAL_ERROR "CMake >= 3.0.0 required")
|
||||
endif()
|
||||
if(CMAKE_VERSION VERSION_LESS "3.0.0")
|
||||
message(FATAL_ERROR "CMake >= 3.0.0 required")
|
||||
endif()
|
||||
cmake_policy(PUSH)
|
||||
cmake_policy(VERSION 3.0.0...3.29)
|
||||
#----------------------------------------------------------------
|
||||
# Generated CMake target import file.
|
||||
#----------------------------------------------------------------
|
||||
|
||||
# Commands may need to know the format version.
|
||||
set(CMAKE_IMPORT_FILE_VERSION 1)
|
||||
|
||||
# Protect against multiple inclusion, which would fail when already imported targets are added once more.
|
||||
set(_cmake_targets_defined "")
|
||||
set(_cmake_targets_not_defined "")
|
||||
set(_cmake_expected_targets "")
|
||||
foreach(_cmake_expected_target IN ITEMS Blosc2::blosc2_shared Blosc2::blosc2_static Blosc2::blosc2)
|
||||
list(APPEND _cmake_expected_targets "${_cmake_expected_target}")
|
||||
if(TARGET "${_cmake_expected_target}")
|
||||
list(APPEND _cmake_targets_defined "${_cmake_expected_target}")
|
||||
else()
|
||||
list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}")
|
||||
endif()
|
||||
endforeach()
|
||||
unset(_cmake_expected_target)
|
||||
if(_cmake_targets_defined STREQUAL _cmake_expected_targets)
|
||||
unset(_cmake_targets_defined)
|
||||
unset(_cmake_targets_not_defined)
|
||||
unset(_cmake_expected_targets)
|
||||
unset(CMAKE_IMPORT_FILE_VERSION)
|
||||
cmake_policy(POP)
|
||||
return()
|
||||
endif()
|
||||
if(NOT _cmake_targets_defined STREQUAL "")
|
||||
string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}")
|
||||
string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}")
|
||||
message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n")
|
||||
endif()
|
||||
unset(_cmake_targets_defined)
|
||||
unset(_cmake_targets_not_defined)
|
||||
unset(_cmake_expected_targets)
|
||||
|
||||
|
||||
# The installation prefix configured by this project.
|
||||
set(_IMPORT_PREFIX "C:/Users/runneradmin/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib")
|
||||
|
||||
# Create imported target Blosc2::blosc2_shared
|
||||
add_library(Blosc2::blosc2_shared SHARED IMPORTED)
|
||||
|
||||
set_target_properties(Blosc2::blosc2_shared PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/include"
|
||||
)
|
||||
|
||||
# Create imported target Blosc2::blosc2_static
|
||||
add_library(Blosc2::blosc2_static STATIC IMPORTED)
|
||||
|
||||
set_target_properties(Blosc2::blosc2_static PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/include"
|
||||
INTERFACE_LINK_LIBRARIES "\$<LINK_ONLY:Threads::Threads>"
|
||||
)
|
||||
|
||||
# Create imported target Blosc2::blosc2
|
||||
add_library(Blosc2::blosc2 INTERFACE IMPORTED)
|
||||
|
||||
set_target_properties(Blosc2::blosc2 PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/include"
|
||||
INTERFACE_LINK_LIBRARIES "Blosc2::blosc2_static"
|
||||
)
|
||||
|
||||
# Load information for each installed configuration.
|
||||
file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/Blosc2Targets-*.cmake")
|
||||
foreach(_cmake_config_file IN LISTS _cmake_config_files)
|
||||
include("${_cmake_config_file}")
|
||||
endforeach()
|
||||
unset(_cmake_config_file)
|
||||
unset(_cmake_config_files)
|
||||
|
||||
# Cleanup temporary variables.
|
||||
set(_IMPORT_PREFIX)
|
||||
|
||||
# Loop over all imported files and verify that they actually exist
|
||||
foreach(_cmake_target IN LISTS _cmake_import_check_targets)
|
||||
if(CMAKE_VERSION VERSION_LESS "3.28"
|
||||
OR NOT DEFINED _cmake_import_check_xcframework_for_${_cmake_target}
|
||||
OR NOT IS_DIRECTORY "${_cmake_import_check_xcframework_for_${_cmake_target}}")
|
||||
foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}")
|
||||
if(NOT EXISTS "${_cmake_file}")
|
||||
message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file
|
||||
\"${_cmake_file}\"
|
||||
but this file does not exist. Possible reasons include:
|
||||
* The file was deleted, renamed, or moved to another location.
|
||||
* An install or uninstall procedure did not complete successfully.
|
||||
* The installation package was faulty and contained
|
||||
\"${CMAKE_CURRENT_LIST_FILE}\"
|
||||
but not all the files it references.
|
||||
")
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
unset(_cmake_file)
|
||||
unset("_cmake_import_check_files_for_${_cmake_target}")
|
||||
endforeach()
|
||||
unset(_cmake_target)
|
||||
unset(_cmake_import_check_targets)
|
||||
|
||||
# This file does not depend on other imported targets which have
|
||||
# been exported from the same project but in a separate export set.
|
||||
|
||||
# Commands beyond this point should not need to know the version.
|
||||
set(CMAKE_IMPORT_FILE_VERSION)
|
||||
cmake_policy(POP)
|
||||
@ -0,0 +1,74 @@
|
||||
# Find the Intel IPP (Integrated Performance Primitives)
|
||||
#
|
||||
# IPP_FOUND - System has IPP
|
||||
# IPP_INCLUDE_DIRS - IPP include files directories
|
||||
# IPP_LIBRARIES - The IPP libraries
|
||||
#
|
||||
# The environment variable IPPROOT is used to find the installation location.
|
||||
# If the environment variable is not set we'll look for it in the default installation locations.
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# find_package(IPP)
|
||||
# if(IPP_FOUND)
|
||||
# target_link_libraries(TARGET ${IPP_LIBRARIES})
|
||||
# endif()
|
||||
|
||||
find_path(IPP_ROOT_DIR
|
||||
include/ipp.h
|
||||
PATHS
|
||||
$ENV{IPPROOT}
|
||||
/opt/intel/compilers_and_libraries/linux/ipp
|
||||
/opt/intel/compilers_and_libraries/mac/ipp
|
||||
"C:/IntelSWTools/compilers_and_libraries/windows/ipp/"
|
||||
"C:/Program Files (x86)/IntelSWTools/compilers_and_libraries/windows/ipp"
|
||||
$ENV{HOME}/intel/ipp
|
||||
$ENV{HOME}/miniconda3
|
||||
$ENV{USERPROFILE}/miniconda3/Library
|
||||
"C:/Miniconda37-x64/Library" # Making AppVeyor happy
|
||||
)
|
||||
|
||||
find_path(IPP_INCLUDE_DIR
|
||||
ipp.h
|
||||
PATHS
|
||||
${IPP_ROOT_DIR}/include
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
set(IPP_SEARCH_LIB ippcoremt.lib)
|
||||
set(IPP_LIBS ippcoremt.lib ippsmt.lib ippdcmt.lib)
|
||||
elseif(APPLE)
|
||||
set(IPP_SEARCH_LIB libippcore.a)
|
||||
set(IPP_LIBS libipps.a libippdc.a libippcore.a)
|
||||
else() # Linux
|
||||
set(IPP_SEARCH_LIB libippcore.so)
|
||||
set(IPP_LIBS ipps ippdc ippcore)
|
||||
endif()
|
||||
|
||||
|
||||
find_path(IPP_LIB_SEARCHPATH
|
||||
${IPP_SEARCH_LIB}
|
||||
PATHS
|
||||
${IPP_ROOT_DIR}/lib/intel64
|
||||
${IPP_ROOT_DIR}/lib
|
||||
)
|
||||
|
||||
foreach(LIB ${IPP_LIBS})
|
||||
find_library(${LIB}_PATH ${LIB} PATHS ${IPP_LIB_SEARCHPATH})
|
||||
if(${LIB}_PATH)
|
||||
set(IPP_LIBRARIES ${IPP_LIBRARIES} ${${LIB}_PATH})
|
||||
set(IPP_FOUND TRUE)
|
||||
else()
|
||||
# message(STATUS "Could not find ${LIB}: disabling IPP")
|
||||
set(IPP_NOTFOUND TRUE)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(IPP_FOUND AND NOT IPP_NOTFOUND)
|
||||
set(IPP_INCLUDE_DIRS ${IPP_INCLUDE_DIR})
|
||||
include_directories(${IPP_INCLUDE_DIRS})
|
||||
message(STATUS "Found IPP libraries in: ${IPP_LIBRARIES}")
|
||||
else()
|
||||
message(STATUS "No IPP libraries found.")
|
||||
set(IPP_FOUND FALSE)
|
||||
endif()
|
||||
@ -0,0 +1,10 @@
|
||||
find_path(LZ4_INCLUDE_DIR lz4.h)
|
||||
|
||||
find_library(LZ4_LIBRARY NAMES lz4 liblz4)
|
||||
|
||||
if(LZ4_INCLUDE_DIR AND LZ4_LIBRARY)
|
||||
set(LZ4_FOUND TRUE)
|
||||
message(STATUS "Found LZ4 library: ${LZ4_LIBRARY}")
|
||||
else()
|
||||
message(STATUS "No LZ4 library found. Using internal sources.")
|
||||
endif()
|
||||
@ -0,0 +1,58 @@
|
||||
# Check if SSE/AVX instructions are available on the machine where
|
||||
# the project is compiled.
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
exec_program(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
|
||||
|
||||
string(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE "${CPUINFO}")
|
||||
string(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE)
|
||||
if(SSE2_TRUE)
|
||||
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
|
||||
else()
|
||||
set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
|
||||
endif()
|
||||
|
||||
string(REGEX REPLACE "^.*(avx2).*$" "\\1" SSE_THERE "${CPUINFO}")
|
||||
string(COMPARE EQUAL "avx2" "${SSE_THERE}" AVX2_TRUE)
|
||||
if(AVX2_TRUE)
|
||||
set(AVX2_FOUND true CACHE BOOL "AVX2 available on host")
|
||||
else()
|
||||
set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
|
||||
endif()
|
||||
|
||||
elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin")
|
||||
exec_program("/usr/sbin/sysctl -a | grep machdep.cpu.features" OUTPUT_VARIABLE CPUINFO)
|
||||
string(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE "${CPUINFO}")
|
||||
string(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE)
|
||||
if(SSE2_TRUE)
|
||||
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
|
||||
else()
|
||||
set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
|
||||
endif()
|
||||
|
||||
exec_program("/usr/sbin/sysctl -a | grep machdep.cpu.leaf7_features" OUTPUT_VARIABLE CPUINFO)
|
||||
string(REGEX REPLACE "^.*(AVX2).*$" "\\1" SSE_THERE "${CPUINFO}")
|
||||
string(COMPARE EQUAL "AVX2" "${SSE_THERE}" AVX2_TRUE)
|
||||
if(AVX2_TRUE)
|
||||
set(AVX2_FOUND true CACHE BOOL "AVX2 available on host")
|
||||
else()
|
||||
set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
|
||||
endif()
|
||||
|
||||
elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
# TODO. For now supposing SSE2 is safe enough
|
||||
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
|
||||
set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
|
||||
else()
|
||||
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
|
||||
set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
|
||||
endif()
|
||||
|
||||
if(NOT SSE2_FOUND)
|
||||
message(STATUS "Could not find hardware support for SSE2 on this machine.")
|
||||
endif()
|
||||
if(NOT AVX2_FOUND)
|
||||
message(STATUS "Could not find hardware support for AVX2 on this machine.")
|
||||
endif()
|
||||
|
||||
mark_as_advanced(SSE2_FOUND AVX2_FOUND)
|
||||
@ -0,0 +1,54 @@
|
||||
find_path(ZLIB_NG_INCLUDE_DIR NAMES zlib-ng.h)
|
||||
|
||||
if(ZLIB_INCLUDE_DIRS)
|
||||
set(ZLIB_NG_LIBRARY_DIRS ${ZLIB_NG_INCLUDE_DIR})
|
||||
|
||||
if("${ZLIB_NG_LIBRARY_DIRS}" MATCHES "/include$")
|
||||
# Strip off the trailing "/include" in the path.
|
||||
GET_FILENAME_COMPONENT(ZLIB_NG_LIBRARY_DIRS ${ZLIB_NG_LIBRARY_DIRS} PATH)
|
||||
endif("${ZLIB_NG_LIBRARY_DIRS}" MATCHES "/include$")
|
||||
|
||||
if(EXISTS "${ZLIB_NG_LIBRARY_DIRS}/lib")
|
||||
set(ZLIB_NG_LIBRARY_DIRS ${ZLIB_NG_LIBRARY_DIRS}/lib)
|
||||
endif(EXISTS "${ZLIB_NG_LIBRARY_DIRS}/lib")
|
||||
endif()
|
||||
|
||||
find_library(ZLIB_NG_LIBRARY NAMES z-ng libz-ng zlib-ng libz-ng.a)
|
||||
|
||||
set(ZLIB_NG_LIBRARIES ${ZLIB_NG_LIBRARY})
|
||||
set(ZLIB_NG_INCLUDE_DIR ${ZLIB_NG_INCLUDE_DIR})
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(ZLIB_NG DEFAULT_MSG ZLIB_NG_LIBRARY ZLIB_NG_INCLUDE_DIR)
|
||||
|
||||
if(ZLIB_NG_INCLUDE_DIR AND ZLIB_NG_LIBRARIES)
|
||||
set(ZLIB_NG_FOUND TRUE)
|
||||
else(ZLIB_NG_INCLUDE_DIR AND ZLIB_NG_LIBRARIES)
|
||||
set(ZLIB_NG_FOUND FALSE)
|
||||
endif(ZLIB_NG_INCLUDE_DIR AND ZLIB_NG_LIBRARIES)
|
||||
|
||||
if(ZLIB_NG_FOUND)
|
||||
message(STATUS "Found zlib-ng: ${ZLIB_NG_LIBRARIES}, ${ZLIB_NG_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
#[[
|
||||
Copyright https://github.com/zlib-ng/minizip-ng, 2021
|
||||
|
||||
Condition of use and distribution are the same as zlib:
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgement in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
]]#
|
||||
@ -0,0 +1,8 @@
|
||||
find_path(ZSTD_INCLUDE_DIR zstd.h)
|
||||
|
||||
find_library(ZSTD_LIBRARY NAMES zstd)
|
||||
|
||||
if(ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY)
|
||||
set(ZSTD_FOUND TRUE)
|
||||
message(STATUS "Found ZSTD library: ${ZSTD_LIBRARY}")
|
||||
endif()
|
||||
@ -0,0 +1,26 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR aarch64)
|
||||
set(CMAKE_SYSTEM_VERSION 1)
|
||||
|
||||
message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}")
|
||||
|
||||
set(CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
|
||||
set(CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
|
||||
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-aarch64 -L /usr/${CMAKE_C_COMPILER_TARGET}/)
|
||||
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
|
||||
find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc)
|
||||
if(NOT C_COMPILER_FULL_PATH)
|
||||
message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found")
|
||||
endif()
|
||||
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
|
||||
|
||||
find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++)
|
||||
if(CXX_COMPILER_FULL_PATH)
|
||||
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
|
||||
endif()
|
||||
@ -0,0 +1,27 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR arm)
|
||||
set(CMAKE_SYSTEM_VERSION 1)
|
||||
|
||||
message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}")
|
||||
|
||||
set(CMAKE_C_COMPILER_TARGET arm-linux-gnueabihf)
|
||||
set(CMAKE_CXX_COMPILER_TARGET arm-linux-gnueabihf)
|
||||
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-arm -L /usr/${CMAKE_C_COMPILER_TARGET}/)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
|
||||
|
||||
find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc)
|
||||
if(NOT C_COMPILER_FULL_PATH)
|
||||
message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found")
|
||||
endif()
|
||||
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
|
||||
|
||||
find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++)
|
||||
if(CXX_COMPILER_FULL_PATH)
|
||||
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
|
||||
endif()
|
||||
@ -0,0 +1,31 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR arm)
|
||||
set(CMAKE_SYSTEM_VERSION 1)
|
||||
|
||||
message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}")
|
||||
|
||||
if(NOT DEFINED CMAKE_C_COMPILER_TARGET)
|
||||
set(CMAKE_C_COMPILER_TARGET arm-linux-gnueabi)
|
||||
endif()
|
||||
if(NOT DEFINED CMAKE_CXX_COMPILER_TARGET)
|
||||
set(CMAKE_CXX_COMPILER_TARGET arm-linux-gnueabi)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-arm -L /usr/${CMAKE_C_COMPILER_TARGET}/)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
|
||||
|
||||
find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc)
|
||||
if(NOT C_COMPILER_FULL_PATH)
|
||||
message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found")
|
||||
endif()
|
||||
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
|
||||
|
||||
find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++)
|
||||
if(CXX_COMPILER_FULL_PATH)
|
||||
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
|
||||
endif()
|
||||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,11 @@
|
||||
libdir=C:/Users/runneradmin/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib
|
||||
includedir=C:/Users/runneradmin/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/include
|
||||
|
||||
Name: blosc2
|
||||
Description: A blocking, shuffling and lossless compression library
|
||||
URL: https://blosc.org/
|
||||
Version: 2.23.1
|
||||
|
||||
Requires:
|
||||
Libs: -L${libdir} -lblosc2
|
||||
Cflags: -I${includedir}
|
||||
Binary file not shown.
@ -0,0 +1,822 @@
|
||||
#######################################################################
|
||||
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#######################################################################
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import builtins
|
||||
import math
|
||||
import warnings
|
||||
from itertools import product
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
import blosc2
|
||||
|
||||
from .utils import get_intersecting_chunks, nptranspose, npvecdot, slice_to_chunktuple
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
|
||||
def matmul(x1: blosc2.Array, x2: blosc2.NDArray, **kwargs: Any) -> blosc2.NDArray:
|
||||
"""
|
||||
Computes the matrix product between two Blosc2 NDArrays.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x1: :ref:`NDArray` | np.ndarray
|
||||
The first input array.
|
||||
x2: :ref:`NDArray` | np.ndarray
|
||||
The second input array.
|
||||
kwargs: Any, optional
|
||||
Keyword arguments that are supported by the :func:`empty` constructor.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: :ref:`NDArray`
|
||||
The matrix product of the inputs. This is a scalar only when both x1,
|
||||
x2 are 1-d vectors.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the last dimension of ``x1`` is not the same size as
|
||||
the second-to-last dimension of ``x2``.
|
||||
|
||||
If a scalar value is passed in.
|
||||
|
||||
References
|
||||
----------
|
||||
`numpy.matmul <https://numpy.org/doc/stable/reference/generated/numpy.matmul.html>`_
|
||||
|
||||
Examples
|
||||
--------
|
||||
For 2-D arrays it is the matrix product:
|
||||
|
||||
>>> import numpy as np
|
||||
>>> import blosc2
|
||||
>>> a = np.array([[1, 2],
|
||||
... [3, 4]])
|
||||
>>> nd_a = blosc2.asarray(a)
|
||||
>>> b = np.array([[2, 3],
|
||||
... [2, 1]])
|
||||
>>> nd_b = blosc2.asarray(b)
|
||||
>>> blosc2.matmul(nd_a, nd_b)
|
||||
array([[ 6, 5],
|
||||
[14, 13]])
|
||||
|
||||
For 2-D mixed with 1-D, the result is the usual.
|
||||
|
||||
>>> a = np.array([[1, 3],
|
||||
... [0, 1]])
|
||||
>>> nd_a = blosc2.asarray(a)
|
||||
>>> v = np.array([1, 2])
|
||||
>>> nd_v = blosc2.asarray(v)
|
||||
>>> blosc2.matmul(nd_a, nd_v)
|
||||
array([7, 2])
|
||||
>>> blosc2.matmul(nd_v, nd_a)
|
||||
array([1, 5])
|
||||
|
||||
"""
|
||||
# Validate arguments are not scalars
|
||||
if np.isscalar(x1) or np.isscalar(x2):
|
||||
raise ValueError("Arguments can't be scalars.")
|
||||
|
||||
# Makes a SimpleProxy if inputs are not blosc2 arrays
|
||||
x1, x2 = blosc2.as_simpleproxy(x1, x2)
|
||||
|
||||
# Validate matrix multiplication compatibility
|
||||
if x1.shape[builtins.max(-1, -len(x2.shape))] != x2.shape[builtins.max(-2, -len(x2.shape))]:
|
||||
raise ValueError("Shapes are not aligned for matrix multiplication.")
|
||||
|
||||
# Promote 1D arrays to 2D if necessary
|
||||
x1_is_vector = False
|
||||
x2_is_vector = False
|
||||
if x1.ndim == 1:
|
||||
x1 = blosc2.expand_dims(x1, axis=0) # (N,) -> (1, N)
|
||||
x1_is_vector = True
|
||||
if x2.ndim == 1:
|
||||
x2 = blosc2.expand_dims(x2, axis=1) # (M,) -> (M, 1)
|
||||
x2_is_vector = True
|
||||
|
||||
n, k = x1.shape[-2:]
|
||||
m = x2.shape[-1]
|
||||
result_shape = np.broadcast_shapes(x1.shape[:-2], x2.shape[:-2]) + (n, m)
|
||||
# For matmul, we don't want to reduce the chunksize, as experiments show that
|
||||
# the larger, the better (as long as some limits are not exceeded).
|
||||
kwargs["_chunksize_reduc_factor"] = 1
|
||||
result = blosc2.zeros(result_shape, dtype=blosc2.result_type(x1, x2), **kwargs)
|
||||
|
||||
if 0 not in result.shape + x1.shape + x2.shape: # if any array is empty, return array of 0s
|
||||
p, q = result.chunks[-2:]
|
||||
r = x2.chunks[-1]
|
||||
|
||||
intersecting_chunks = get_intersecting_chunks((), result.shape[:-2], result.chunks[:-2])
|
||||
for chunk in intersecting_chunks:
|
||||
chunk = chunk.raw
|
||||
for row in range(0, n, p):
|
||||
row_end = builtins.min(row + p, n)
|
||||
for col in range(0, m, q):
|
||||
col_end = builtins.min(col + q, m)
|
||||
for aux in range(0, k, r):
|
||||
aux_end = builtins.min(aux + r, k)
|
||||
bx1 = (
|
||||
x1[chunk[-x1.ndim + 2 :] + (slice(row, row_end), slice(aux, aux_end))]
|
||||
if x1.ndim > 2
|
||||
else x1[row:row_end, aux:aux_end]
|
||||
)
|
||||
bx2 = (
|
||||
x2[chunk[-x2.ndim + 2 :] + (slice(aux, aux_end), slice(col, col_end))]
|
||||
if x2.ndim > 2
|
||||
else x2[aux:aux_end, col:col_end]
|
||||
)
|
||||
result[chunk + (slice(row, row_end), slice(col, col_end))] += np.matmul(bx1, bx2)
|
||||
|
||||
if x1_is_vector:
|
||||
result = result.squeeze(axis=-2)
|
||||
if x2_is_vector:
|
||||
result = result.squeeze(axis=-1)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def tensordot(
|
||||
x1: blosc2.NDArray,
|
||||
x2: blosc2.NDArray,
|
||||
axes: int | tuple[Sequence[int], Sequence[int]] = 2,
|
||||
**kwargs: Any,
|
||||
) -> blosc2.NDArray:
|
||||
"""
|
||||
Returns a tensor contraction of x1 and x2 over specific axes. The tensordot function corresponds to the
|
||||
generalized matrix product. Note: Neither argument is complex-conjugated or transposed. If conjugation and/or transposition is desired, these operations should be explicitly
|
||||
performed prior to computing the generalized matrix product.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x1: blosc2.NDArray
|
||||
First input array. Should have a numeric data type.
|
||||
|
||||
x2: blosc2.NDArray
|
||||
Second input array. Should have a numeric data type. Corresponding contracted axes of x1 and x2
|
||||
must be equal.
|
||||
|
||||
axes: int | tuple[Sequence[int], Sequence[int]]
|
||||
Number of axes (dimensions) to contract or explicit sequences of axis (dimension) indices for x1 and x2,
|
||||
respectively.
|
||||
|
||||
* If axes is an int equal to N, then contraction is performed over the last N axes of x1 and the first N axes of x2 in order. The size of each corresponding axis (dimension) must match. Must be nonnegative.
|
||||
|
||||
* If N equals 0, the result is the tensor (outer) product.
|
||||
|
||||
* If N equals 1, the result is the tensor dot product.
|
||||
|
||||
* If N equals 2, the result is the tensor double contraction (default).
|
||||
|
||||
* If axes is a tuple of two sequences (x1_axes, x2_axes), the first sequence applies to x1 and the second sequence to x2.
|
||||
Both sequences must have the same length. Each axis (dimension) x1_axes[i] for x1 must have the same size as the respective
|
||||
axis (dimension) x2_axes[i] for x2. Each index referred to in a sequence must be unique. If x1 has rank (i.e, number of dimensions) N,
|
||||
a valid x1 axis must reside on the half-open interval [-N, N). If x2 has rank M, a valid x2 axis must reside on the half-open interval [-M, M).
|
||||
|
||||
kwargs: Any, optional
|
||||
Keyword arguments that are supported by the :func:`empty` constructor.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: blosc2.NDArray
|
||||
An array containing the tensor contraction whose shape consists of the non-contracted axes (dimensions) of the first array x1, followed by
|
||||
the non-contracted axes (dimensions) of the second array x2.
|
||||
"""
|
||||
fast_path = kwargs.pop("fast_path", None) # for testing purposes
|
||||
# TODO: add fast path for when don't need to change chunkshapes
|
||||
|
||||
# Makes a SimpleProxy if inputs are not blosc2 arrays
|
||||
x1, x2 = blosc2.as_simpleproxy(x1, x2)
|
||||
|
||||
if isinstance(axes, tuple):
|
||||
a_axes, b_axes = axes
|
||||
a_axes = list(a_axes)
|
||||
b_axes = list(b_axes)
|
||||
if len(a_axes) != len(b_axes):
|
||||
raise ValueError("Lengths of reduction axes for x1 and x2 must be equal!")
|
||||
# need to track order of b_axes; later we cycle through a_axes sorted for op_chunk
|
||||
# a_sorted[inv_sort][b_sort] matches b_sorted since b_axes matches a_axes
|
||||
inv_sort = np.argsort(np.argsort(a_axes))
|
||||
b_sort = np.argsort(b_axes)
|
||||
order = inv_sort[b_sort]
|
||||
a_keep, b_keep = [True] * x1.ndim, [True] * x2.ndim
|
||||
for i, j in zip(a_axes, b_axes, strict=False):
|
||||
i = x1.ndim + i if i < 0 else i
|
||||
j = x2.ndim + j if j < 0 else j
|
||||
a_keep[i] = False
|
||||
b_keep[j] = False
|
||||
a_axes = [] if a_axes == () else a_axes # handle no reduction
|
||||
b_axes = [] if b_axes == () else b_axes # handle no reduction
|
||||
elif isinstance(axes, int):
|
||||
if axes < 0:
|
||||
raise ValueError("Integer axes argument must be nonnegative!")
|
||||
order = np.arange(axes, dtype=int) # no reordering required
|
||||
a_axes = list(range(x1.ndim - axes, x1.ndim))
|
||||
b_axes = list(range(0, axes))
|
||||
a_keep = [i + axes < x1.ndim for i in range(x1.ndim)]
|
||||
b_keep = [i >= axes for i in range(x2.ndim)]
|
||||
else:
|
||||
raise ValueError("Axes argument must be two element tuple of sequences or an integer.")
|
||||
x1shape = np.array(x1.shape)
|
||||
x2shape = np.array(x2.shape)
|
||||
a_chunks_red = tuple(c for i, c in enumerate(x1.chunks) if not a_keep[i])
|
||||
a_shape_red = tuple(c for i, c in enumerate(x1.shape) if not a_keep[i])
|
||||
|
||||
if np.any(x1shape[a_axes] != x2shape[b_axes]):
|
||||
raise ValueError("x1 and x2 must have same shapes along reduction dimensions")
|
||||
|
||||
result_shape = tuple(x1shape[a_keep]) + tuple(x2shape[b_keep])
|
||||
result = blosc2.zeros(result_shape, dtype=blosc2.result_type(x1, x2), **kwargs)
|
||||
|
||||
op_chunks = [
|
||||
slice_to_chunktuple(slice(0, s, 1), c) for s, c in zip(x1shape[a_axes], a_chunks_red, strict=True)
|
||||
]
|
||||
res_chunks = [
|
||||
slice_to_chunktuple(s, c)
|
||||
for s, c in zip([slice(0, r, 1) for r in result.shape], result.chunks, strict=True)
|
||||
]
|
||||
a_selection = (slice(None, None, 1),) * x1.ndim
|
||||
b_selection = (slice(None, None, 1),) * x2.ndim
|
||||
|
||||
chunk_memory = np.prod(result.chunks) * (
|
||||
np.prod(x1shape[a_axes]) * x1.dtype.itemsize + np.prod(x2shape[b_axes]) * x2.dtype.itemsize
|
||||
)
|
||||
if chunk_memory < blosc2.MAX_FAST_PATH_SIZE:
|
||||
fast_path = True if fast_path is None else fast_path
|
||||
fast_path = False if fast_path is None else fast_path # fast_path set via kwargs for testing
|
||||
|
||||
# adapted from numpy.tensordot
|
||||
a_keep_axes = [i for i, k in enumerate(a_keep) if k]
|
||||
b_keep_axes = [i for i, k in enumerate(b_keep) if k]
|
||||
newaxes_a = a_keep_axes + a_axes
|
||||
newaxes_b = b_axes + b_keep_axes
|
||||
|
||||
for rchunk in product(*res_chunks):
|
||||
res_chunk = tuple(
|
||||
slice(rc * rcs, builtins.min((rc + 1) * rcs, rshape), 1)
|
||||
for rc, rcs, rshape in zip(rchunk, result.chunks, result.shape, strict=True)
|
||||
)
|
||||
rchunk_iter = iter(res_chunk)
|
||||
a_selection = tuple(next(rchunk_iter) if a else slice(None, None, 1) for a in a_keep)
|
||||
b_selection = tuple(next(rchunk_iter) if b else slice(None, None, 1) for b in b_keep)
|
||||
res_chunks = tuple(s.stop - s.start for s in res_chunk)
|
||||
for ochunk in product(*op_chunks):
|
||||
if not fast_path: # operands too big, have to go chunk-by-chunk
|
||||
op_chunk = tuple(
|
||||
slice(rc * rcs, builtins.min((rc + 1) * rcs, x1s), 1)
|
||||
for rc, rcs, x1s in zip(ochunk, a_chunks_red, a_shape_red, strict=True)
|
||||
) # use x1 chunk shape to iterate over reduction axes
|
||||
ochunk_iter = iter(op_chunk)
|
||||
a_selection = tuple(
|
||||
next(ochunk_iter) if not a else as_ for as_, a in zip(a_selection, a_keep, strict=True)
|
||||
)
|
||||
# have to permute to match order of a_axes
|
||||
order_iter = iter(order)
|
||||
b_selection = tuple(
|
||||
op_chunk[next(order_iter)] if not b else bs_
|
||||
for bs_, b in zip(b_selection, b_keep, strict=True)
|
||||
)
|
||||
bx1 = x1[a_selection]
|
||||
bx2 = x2[b_selection]
|
||||
# adapted from numpy tensordot
|
||||
newshape_a = (
|
||||
math.prod([bx1.shape[i] for i in a_keep_axes]),
|
||||
math.prod([bx1.shape[a] for a in a_axes]),
|
||||
)
|
||||
newshape_b = (
|
||||
math.prod([bx2.shape[b] for b in b_axes]),
|
||||
math.prod([bx2.shape[i] for i in b_keep_axes]),
|
||||
)
|
||||
at = nptranspose(bx1, newaxes_a).reshape(newshape_a)
|
||||
bt = nptranspose(bx2, newaxes_b).reshape(newshape_b)
|
||||
res = np.dot(at, bt)
|
||||
result[res_chunk] += res.reshape(res_chunks)
|
||||
if fast_path: # already done everything
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
def vecdot(x1: blosc2.NDArray, x2: blosc2.NDArray, axis: int = -1, **kwargs) -> blosc2.NDArray:
|
||||
"""
|
||||
Computes the (vector) dot product of two arrays. Complex conjugates x1.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x1: blosc2.NDArray
|
||||
First input array. Must have floating-point data type.
|
||||
|
||||
x2: blosc2.NDArray
|
||||
Second input array. Must be compatible with x1 for all non-contracted axes (via broadcasting).
|
||||
The size of the axis over which to compute the dot product must be the same size as the respective axis in x1.
|
||||
Must have a floating-point data type.
|
||||
|
||||
axis: int
|
||||
The axis (dimension) of x1 and x2 containing the vectors for which to compute the dot product.
|
||||
Should be an integer on the interval [-N, -1], where N is min(x1.ndim, x2.ndim). Default: -1.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: blosc2.NDArray
|
||||
If x1 and x2 are both one-dimensional arrays, a zero-dimensional containing the dot product;
|
||||
otherwise, a non-zero-dimensional array containing the dot products and having rank N-1,
|
||||
where N is the rank (number of dimensions) of the shape determined according to broadcasting
|
||||
along the non-contracted axes.
|
||||
"""
|
||||
fast_path = kwargs.pop("fast_path", None) # for testing purposes
|
||||
# Added this to pass array-api tests (which use internal getitem to check results)
|
||||
if isinstance(x1, np.ndarray) and isinstance(x2, np.ndarray):
|
||||
return npvecdot(x1, x2, axis=axis)
|
||||
|
||||
# Makes a SimpleProxy if inputs are not blosc2 arrays
|
||||
x1, x2 = blosc2.as_simpleproxy(x1, x2)
|
||||
|
||||
N = builtins.min(x1.ndim, x2.ndim)
|
||||
if axis < -N or axis > -1:
|
||||
raise ValueError("axis must be on interval [-N,-1].")
|
||||
a_axes = axis + x1.ndim
|
||||
b_axes = axis + x2.ndim
|
||||
a_keep = [True] * x1.ndim
|
||||
a_keep[a_axes] = False
|
||||
b_keep = [True] * x2.ndim
|
||||
b_keep[b_axes] = False
|
||||
|
||||
x1shape = np.array(x1.shape)
|
||||
x2shape = np.array(x2.shape)
|
||||
a_chunks_red = x1.chunks[a_axes]
|
||||
a_shape_red = x1.shape[a_axes]
|
||||
|
||||
if np.any(x1shape[a_axes] != x2shape[b_axes]):
|
||||
raise ValueError("x1 and x2 must have same shapes along reduction dimensions")
|
||||
|
||||
result_shape = np.broadcast_shapes(x1shape[a_keep], x2shape[b_keep])
|
||||
result = blosc2.zeros(result_shape, dtype=blosc2.result_type(x1, x2), **kwargs)
|
||||
|
||||
res_chunks = [
|
||||
slice_to_chunktuple(s, c)
|
||||
for s, c in zip([slice(0, r, 1) for r in result.shape], result.chunks, strict=True)
|
||||
]
|
||||
a_selection = (slice(None, None, 1),) * x1.ndim
|
||||
b_selection = (slice(None, None, 1),) * x2.ndim
|
||||
|
||||
chunk_memory = np.prod(result.chunks) * (
|
||||
x1shape[a_axes] * x1.dtype.itemsize + x2shape[b_axes] * x2.dtype.itemsize
|
||||
)
|
||||
if chunk_memory < blosc2.MAX_FAST_PATH_SIZE:
|
||||
fast_path = True if fast_path is None else fast_path
|
||||
fast_path = False if fast_path is None else fast_path # fast_path set via kwargs for testing
|
||||
|
||||
for rchunk in product(*res_chunks):
|
||||
res_chunk = tuple(
|
||||
slice(rc * rcs, builtins.min((rc + 1) * rcs, rshape), 1)
|
||||
for rc, rcs, rshape in zip(rchunk, result.chunks, result.shape, strict=True)
|
||||
)
|
||||
# handle broadcasting - if x1, x2 different ndim, could have to prepend 1s
|
||||
rchunk_iter = (
|
||||
slice(0, 1, 1) if s == 1 else r
|
||||
for r, s in zip(res_chunk[-x1.ndim + 1 :], x1shape[a_keep], strict=True)
|
||||
)
|
||||
a_selection = tuple(next(rchunk_iter) if a else slice(None, None, 1) for a in a_keep)
|
||||
rchunk_iter = (
|
||||
slice(0, 1, 1) if s == 1 else r
|
||||
for r, s in zip(res_chunk[-x2.ndim + 1 :], x2shape[b_keep], strict=True)
|
||||
)
|
||||
b_selection = tuple(next(rchunk_iter) if b else slice(None, None, 1) for b in b_keep)
|
||||
|
||||
for ochunk in range(0, a_shape_red, a_chunks_red):
|
||||
if not fast_path: # operands too big, go chunk-by-chunk
|
||||
op_chunk = (slice(ochunk, builtins.min(ochunk + a_chunks_red, x1.shape[a_axes]), 1),)
|
||||
a_selection = a_selection[:a_axes] + op_chunk + a_selection[a_axes + 1 :]
|
||||
b_selection = b_selection[:b_axes] + op_chunk + b_selection[b_axes + 1 :]
|
||||
bx1 = x1[a_selection]
|
||||
bx2 = x2[b_selection]
|
||||
res = npvecdot(bx1, bx2, axis=axis) # handles conjugation of bx1
|
||||
result[res_chunk] += res
|
||||
if fast_path: # already done everything
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
def permute_dims(
|
||||
arr: blosc2.Array, axes: tuple[int] | list[int] | None = None, **kwargs: Any
|
||||
) -> blosc2.NDArray:
|
||||
"""
|
||||
Permutes the axes (dimensions) of an array.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr: :ref:`blosc2.NDArray` | np.ndarray
|
||||
The input array.
|
||||
axes: tuple[int], list[int], optional
|
||||
The desired permutation of axes. If None, the axes are reversed by default.
|
||||
If specified, axes must be a tuple or list representing a permutation of
|
||||
``[0, 1, ..., N-1]``, where ``N`` is the number of dimensions of the input array.
|
||||
Negative indices are also supported. The *i*-th axis of the result will correspond
|
||||
to the axis numbered ``axes[i]`` of the input.
|
||||
kwargs: Any, optional
|
||||
Keyword arguments that are supported by the :func:`empty` constructor.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: :ref:`blosc2.NDArray`
|
||||
A Blosc2 :ref:`blosc2.NDArray` with axes transposed.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If ``axes`` is not a valid permutation of the dimensions of ``arr``.
|
||||
|
||||
References
|
||||
----------
|
||||
`numpy.transpose <https://numpy.org/doc/2.2/reference/generated/numpy.transpose.html>`_
|
||||
|
||||
`permute_dims <https://data-apis.org/array-api/latest/API_specification/generated/array_api.permute_dims.html#permute-dims>`_
|
||||
|
||||
Examples
|
||||
--------
|
||||
For 2-D arrays it is the matrix transposition as usual:
|
||||
|
||||
>>> import blosc2
|
||||
>>> a = blosc2.arange(1, 10).reshape((3, 3))
|
||||
>>> a[:]
|
||||
array([[1, 2, 3],
|
||||
[4, 5, 6],
|
||||
[7, 8, 9]])
|
||||
>>> at = blosc2.permute_dims(a)
|
||||
>>> at[:]
|
||||
array([[1, 4, 7],
|
||||
[2, 5, 8],
|
||||
[3, 6, 9]])
|
||||
|
||||
For 3-D arrays:
|
||||
|
||||
>>> import blosc2
|
||||
>>> a = blosc2.arange(1, 25).reshape((2, 3, 4))
|
||||
>>> a[:]
|
||||
array([[[ 1, 2, 3, 4],
|
||||
[ 5, 6, 7, 8],
|
||||
[ 9, 10, 11, 12]],
|
||||
[[13, 14, 15, 16],
|
||||
[17, 18, 19, 20],
|
||||
[21, 22, 23, 24]]])
|
||||
|
||||
>>> at = blosc2.permute_dims(a, axes=(1, 0, 2))
|
||||
>>> at[:]
|
||||
array([[[ 1, 2, 3, 4],
|
||||
[13, 14, 15, 16]],
|
||||
[[ 5, 6, 7, 8],
|
||||
[17, 18, 19, 20]],
|
||||
[[ 9, 10, 11, 12],
|
||||
[21, 22, 23, 24]]])
|
||||
"""
|
||||
if np.isscalar(arr) or arr.ndim < 2:
|
||||
return arr
|
||||
|
||||
# Makes a SimpleProxy if input is not blosc2 array
|
||||
arr = blosc2.as_simpleproxy(arr)
|
||||
|
||||
ndim = arr.ndim
|
||||
|
||||
if axes is None:
|
||||
axes = tuple(range(ndim))[::-1]
|
||||
else:
|
||||
axes = tuple(axis if axis >= 0 else ndim + axis for axis in axes)
|
||||
if sorted(axes) != list(range(ndim)):
|
||||
raise ValueError(f"axes {axes} is not a valid permutation of {ndim} dimensions")
|
||||
|
||||
new_shape = tuple(arr.shape[axis] for axis in axes)
|
||||
if "chunks" not in kwargs or kwargs["chunks"] is None:
|
||||
kwargs["chunks"] = tuple(arr.chunks[axis] for axis in axes)
|
||||
|
||||
result = blosc2.empty(shape=new_shape, dtype=arr.dtype, **kwargs)
|
||||
|
||||
chunks = arr.chunks
|
||||
shape = arr.shape
|
||||
# handle SimpleProxy which doesn't have iterchunks_info
|
||||
if hasattr(arr, "iterchunks_info"):
|
||||
my_it = arr.iterchunks_info()
|
||||
_get_el = lambda x: x.coords # noqa: E731
|
||||
else:
|
||||
my_it = get_intersecting_chunks((), shape, chunks)
|
||||
_get_el = lambda x: x.raw # noqa: E731
|
||||
for info in my_it:
|
||||
coords = _get_el(info)
|
||||
start_stop = [
|
||||
(coord * chunk, builtins.min(chunk * (coord + 1), dim))
|
||||
for coord, chunk, dim in zip(coords, chunks, shape, strict=False)
|
||||
]
|
||||
|
||||
src_slice = tuple(slice(start, stop) for start, stop in start_stop)
|
||||
dst_slice = tuple(slice(start_stop[ax][0], start_stop[ax][1]) for ax in axes)
|
||||
|
||||
transposed = nptranspose(arr[src_slice], axes=axes)
|
||||
result[dst_slice] = np.ascontiguousarray(transposed)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def transpose(x, **kwargs: Any) -> blosc2.NDArray:
|
||||
"""
|
||||
Returns a Blosc2 blosc2.NDArray with axes transposed.
|
||||
|
||||
Only 2D arrays are supported for now. Other dimensions raise an error.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x: :ref:`blosc2.NDArray`
|
||||
The input array.
|
||||
kwargs: Any, optional
|
||||
Keyword arguments that are supported by the :func:`empty` constructor.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: :ref:`blosc2.NDArray`
|
||||
The Blosc2 blosc2.NDArray with axes transposed.
|
||||
|
||||
References
|
||||
----------
|
||||
`numpy.transpose <https://numpy.org/doc/2.2/reference/generated/numpy.transpose.html>`_
|
||||
"""
|
||||
warnings.warn(
|
||||
"transpose is deprecated and will be removed in a future version. "
|
||||
"Use matrix_transpose or permute_dims instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# If arguments are dimension < 2, they are returned
|
||||
if np.isscalar(x) or x.ndim < 2:
|
||||
return x
|
||||
# Makes a SimpleProxy if input is not blosc2 array
|
||||
x = blosc2.as_simpleproxy(x)
|
||||
# Validate arguments are dimension 2
|
||||
if x.ndim > 2:
|
||||
raise ValueError("Transposing arrays with dimension greater than 2 is not supported yet.")
|
||||
return permute_dims(x, **kwargs)
|
||||
|
||||
|
||||
def matrix_transpose(arr: blosc2.Array, **kwargs: Any) -> blosc2.NDArray:
|
||||
"""
|
||||
Transposes a matrix (or a stack of matrices).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr: :ref:`blosc2.NDArray` | np.ndarray
|
||||
The input blosc2.NDArray having shape ``(..., M, N)`` and whose innermost two dimensions form
|
||||
``MxN`` matrices.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: :ref:`blosc2.NDArray`
|
||||
A new :ref:`blosc2.NDArray` containing the transpose for each matrix and having shape
|
||||
``(..., N, M)``.
|
||||
"""
|
||||
axes = None
|
||||
# Makes a SimpleProxy if input is not blosc2 array
|
||||
arr = blosc2.as_simpleproxy(arr)
|
||||
if not np.isscalar(arr) and arr.ndim > 2:
|
||||
axes = list(range(arr.ndim))
|
||||
axes[-2], axes[-1] = axes[-1], axes[-2]
|
||||
return permute_dims(arr, axes, **kwargs)
|
||||
|
||||
|
||||
def diagonal(x: blosc2.blosc2.NDArray, offset: int = 0) -> blosc2.blosc2.NDArray:
|
||||
"""
|
||||
Returns the specified diagonals of a matrix (or a stack of matrices) x.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x: blosc2.NDArray
|
||||
Input array having shape (..., M, N) and whose innermost two dimensions form MxN matrices.
|
||||
|
||||
offset: int
|
||||
Offset specifying the off-diagonal relative to the main diagonal.
|
||||
|
||||
* offset = 0: the main diagonal.
|
||||
* offset > 0: off-diagonal above the main diagonal.
|
||||
* offset < 0: off-diagonal below the main diagonal.
|
||||
|
||||
Default: 0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: blosc2.NDArray
|
||||
An array containing the diagonals and whose shape is determined by
|
||||
removing the last two dimensions and appending a dimension equal to the size of the
|
||||
resulting diagonals.
|
||||
|
||||
Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.diag.html#diag
|
||||
"""
|
||||
# Makes a SimpleProxy if input is not blosc2 array
|
||||
x = blosc2.as_simpleproxy(x)
|
||||
n_rows, n_cols = x.shape[-2:]
|
||||
min_idx = builtins.min(n_rows, n_cols)
|
||||
if offset < 0:
|
||||
start = -offset
|
||||
rows = np.arange(start, builtins.min(start + n_cols, n_rows))
|
||||
cols = np.arange(len(rows))
|
||||
elif offset > 0:
|
||||
cols = np.arange(offset, builtins.min(offset + n_rows, n_cols))
|
||||
rows = np.arange(len(cols))
|
||||
else:
|
||||
rows = cols = np.arange(min_idx)
|
||||
key = tuple(slice(None, None, 1) for i in range(x.ndim - 2)) + (rows, cols)
|
||||
# TODO: change to use slice to give optimised compressing
|
||||
return blosc2.asarray(x[key])
|
||||
|
||||
|
||||
def outer(x1: blosc2.blosc2.NDArray, x2: blosc2.blosc2.NDArray, **kwargs: Any) -> blosc2.blosc2.NDArray:
|
||||
"""
|
||||
Returns the outer product of two vectors x1 and x2.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x1: blosc2.NDArray
|
||||
First one-dimensional input array of size N. Must have a numeric data type.
|
||||
|
||||
x2: blosc2.NDArray
|
||||
Second one-dimensional input array of size M. Must have a numeric data type.
|
||||
|
||||
kwargs: Any, optional
|
||||
Keyword arguments that are supported by the :func:`empty` constructor.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: blosc2.NDArray
|
||||
A two-dimensional array containing the outer product and whose shape is (N, M).
|
||||
"""
|
||||
x1, x2 = blosc2.as_simpleproxy(x1, x2)
|
||||
if (x1.ndim != 1) or (x2.ndim != 1):
|
||||
raise ValueError("outer only valid for 1D inputs.")
|
||||
return tensordot(x1, x2, ((), ()), **kwargs) # for testing purposes
|
||||
|
||||
|
||||
def cholesky(x: blosc2.blosc2.NDArray, upper: bool = False) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.cholesky.html#cholesky
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def cross(x1: blosc2.blosc2.NDArray, x2: blosc2.blosc2.NDArray, axis: int = -1) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.cross.html#cross
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def det(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.det.html#det
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def eigh(x: blosc2.blosc2.NDArray) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.eigh.html#eigh
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def eigvalsh(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.eigvalsh.html#eigvalsh
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def inv(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.inv.html#inv
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def matrix_norm(
|
||||
x: blosc2.blosc2.NDArray, keepdims: bool = False, ord: int | float | str | None = "fro"
|
||||
) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented but could be doable. ord may take values:
|
||||
# * 'fro' - Frobenius norm
|
||||
# * 'nuc' - nuclear norm
|
||||
# * 1 - max(sum(abs(x), axis=-2))
|
||||
# * 2 - largest singular value (sum(x**2, axis=[-1,-2]))
|
||||
# * inf - max(sum(abs(x), axis=-1))
|
||||
# * -1 - min(sum(abs(x), axis=-2))
|
||||
# * -2 - smallest singular value
|
||||
# * -inf - min(sum(abs(x), axis=-1))
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.matrix_norm.html#matrix_norm
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def matrix_power(x: blosc2.blosc2.NDArray, n: int) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.matrix_power.html#matrix_power
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def matrix_rank(
|
||||
x: blosc2.blosc2.NDArray, rtol: float | blosc2.blosc2.NDArray | None = None
|
||||
) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.matrix_rank.html#matrix_rank
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def pinv(
|
||||
x: blosc2.blosc2.NDArray, rtol: float | blosc2.blosc2.NDArray | None = None
|
||||
) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.pinv.html#pinv
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def qr(
|
||||
x: blosc2.blosc2.NDArray, mode: str = "reduced"
|
||||
) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.qr.html#qr
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def slogdet(x: blosc2.blosc2.NDArray) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.slogdet.html#slogdet
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def solve(x1: blosc2.blosc2.NDArray, x2: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.solve.html#solve
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def svd(
|
||||
x: blosc2.blosc2.NDArray, full_matrices: bool = True
|
||||
) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.svd.html#svd
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def svdvals(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.svdvals.html#svdvals
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def trace(x: blosc2.blosc2.NDArray, offset: int = 0, dtype: np.dtype | None = None) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.trace.html#trace
|
||||
# """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def vector_norm(
|
||||
x: blosc2.blosc2.NDArray,
|
||||
axis: int | tuple[int] | None = None,
|
||||
keepdims: bool = False,
|
||||
ord: int | float = 2,
|
||||
) -> blosc2.blosc2.NDArray:
|
||||
# """
|
||||
# Not Implemented but could be doable. ord may take values:
|
||||
# * p: int - p-norm
|
||||
# * inf - max(x)
|
||||
# * -inf - min(abs(x))
|
||||
|
||||
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.vector_norm.html#vector_norm
|
||||
# """
|
||||
raise NotImplementedError
|
||||
@ -0,0 +1,462 @@
|
||||
/* Runtime-agnostic wasm32 JIT JS glue for miniexpr.
|
||||
* Callers provide runtime facilities via the `runtime` object.
|
||||
*/
|
||||
(function(root) {
|
||||
'use strict';
|
||||
|
||||
function _meJitInstantiate(runtime, wasmBytes, bridgeLookupFnIdx) {
|
||||
if (!runtime || !wasmBytes) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
var HEAPF64 = runtime.HEAPF64;
|
||||
var HEAPF32 = runtime.HEAPF32;
|
||||
var wasmMemory = runtime.wasmMemory;
|
||||
var wasmTable = runtime.wasmTable;
|
||||
var stackSave = runtime.stackSave;
|
||||
var stackAlloc = runtime.stackAlloc;
|
||||
var stackRestore = runtime.stackRestore;
|
||||
var lengthBytesUTF8 = runtime.lengthBytesUTF8;
|
||||
var stringToUTF8 = runtime.stringToUTF8;
|
||||
var addFunction = runtime.addFunction;
|
||||
var err = runtime.err || function(message) {
|
||||
if (typeof console !== 'undefined' && typeof console.error === 'function') {
|
||||
console.error(message);
|
||||
}
|
||||
};
|
||||
|
||||
if (!HEAPF64 || !HEAPF32 || !wasmMemory || !wasmTable ||
|
||||
typeof stackSave !== 'function' || typeof stackAlloc !== 'function' ||
|
||||
typeof stackRestore !== 'function' || typeof lengthBytesUTF8 !== 'function' ||
|
||||
typeof stringToUTF8 !== 'function' || typeof addFunction !== 'function') {
|
||||
err('[me-wasm-jit] invalid runtime object');
|
||||
return 0;
|
||||
}
|
||||
|
||||
var src = wasmBytes;
|
||||
var enc = new TextEncoder();
|
||||
var dec = new TextDecoder();
|
||||
/* --- LEB128 helpers ------------------------------------------------- */
|
||||
function readULEB(buf, pos) {
|
||||
var r = 0, s = 0, b;
|
||||
do { b = buf[pos++]; r |= (b & 0x7f) << s; s += 7; } while (b & 0x80);
|
||||
return [r, pos];
|
||||
}
|
||||
function encULEB(v) {
|
||||
var a = [];
|
||||
do { var b = v & 0x7f; v >>>= 7; if (v) b |= 0x80; a.push(b); } while (v);
|
||||
return a;
|
||||
}
|
||||
function encStr(s) {
|
||||
var b = enc.encode(s);
|
||||
return encULEB(b.length).concat(Array.from(b));
|
||||
}
|
||||
function readName(buf, pos) {
|
||||
var t = readULEB(buf, pos);
|
||||
var n = t[0];
|
||||
pos = t[1];
|
||||
var s = dec.decode(buf.subarray(pos, pos + n));
|
||||
return [s, pos + n];
|
||||
}
|
||||
function skipLimits(buf, pos) {
|
||||
var t = readULEB(buf, pos);
|
||||
var flags = t[0];
|
||||
pos = t[1];
|
||||
t = readULEB(buf, pos);
|
||||
pos = t[1];
|
||||
if (flags & 0x01) {
|
||||
t = readULEB(buf, pos);
|
||||
pos = t[1];
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
function encMemoryImport() {
|
||||
var imp = [];
|
||||
imp = imp.concat(encStr("env"), encStr("memory"));
|
||||
imp.push(0x02, 0x00); /* memory, limits-flag: no-max */
|
||||
imp = imp.concat(encULEB(256));
|
||||
return imp;
|
||||
}
|
||||
function buildImportSecWithMemory() {
|
||||
var body = encULEB(1);
|
||||
body = body.concat(encMemoryImport());
|
||||
var sec = [0x02];
|
||||
sec = sec.concat(encULEB(body.length));
|
||||
return sec.concat(body);
|
||||
}
|
||||
function patchImportSec(secData) {
|
||||
var pos = 0;
|
||||
var t = readULEB(secData, pos);
|
||||
var nimports = t[0];
|
||||
pos = t[1];
|
||||
var entries = [];
|
||||
var hasEnvMemory = false;
|
||||
for (var i = 0; i < nimports; i++) {
|
||||
var start = pos;
|
||||
var moduleName = "";
|
||||
var fieldName = "";
|
||||
t = readName(secData, pos);
|
||||
moduleName = t[0];
|
||||
pos = t[1];
|
||||
t = readName(secData, pos);
|
||||
fieldName = t[0];
|
||||
pos = t[1];
|
||||
var kind = secData[pos++];
|
||||
if (kind === 0x00) {
|
||||
t = readULEB(secData, pos);
|
||||
pos = t[1];
|
||||
}
|
||||
else if (kind === 0x01) {
|
||||
pos++; /* elem type */
|
||||
pos = skipLimits(secData, pos);
|
||||
}
|
||||
else if (kind === 0x02) {
|
||||
pos = skipLimits(secData, pos);
|
||||
if (moduleName === "env" && fieldName === "memory") {
|
||||
hasEnvMemory = true;
|
||||
}
|
||||
}
|
||||
else if (kind === 0x03) {
|
||||
pos += 2; /* valtype + mutability */
|
||||
}
|
||||
else {
|
||||
throw new Error("unsupported wasm import kind " + kind);
|
||||
}
|
||||
entries.push(Array.from(secData.subarray(start, pos)));
|
||||
}
|
||||
if (!hasEnvMemory) {
|
||||
entries.push(encMemoryImport());
|
||||
}
|
||||
var body = encULEB(entries.length);
|
||||
for (var ei = 0; ei < entries.length; ei++) {
|
||||
body = body.concat(entries[ei]);
|
||||
}
|
||||
var sec = [0x02];
|
||||
sec = sec.concat(encULEB(body.length));
|
||||
return sec.concat(body);
|
||||
}
|
||||
function buildEnvImports() {
|
||||
var bridgeLookup = null;
|
||||
var bridgeCache = Object.create(null);
|
||||
if (bridgeLookupFnIdx) {
|
||||
bridgeLookup = wasmTable.get(bridgeLookupFnIdx);
|
||||
}
|
||||
function lookupBridge(name) {
|
||||
if (!bridgeLookup) {
|
||||
return null;
|
||||
}
|
||||
if (Object.prototype.hasOwnProperty.call(bridgeCache, name)) {
|
||||
return bridgeCache[name];
|
||||
}
|
||||
var sp = stackSave();
|
||||
try {
|
||||
var nbytes = lengthBytesUTF8(name) + 1;
|
||||
var namePtr = stackAlloc(nbytes);
|
||||
stringToUTF8(name, namePtr, nbytes);
|
||||
var fnIdx = bridgeLookup(namePtr) | 0;
|
||||
bridgeCache[name] = fnIdx ? wasmTable.get(fnIdx) : null;
|
||||
} finally {
|
||||
stackRestore(sp);
|
||||
}
|
||||
return bridgeCache[name];
|
||||
}
|
||||
function bindBridge(name, fallback) {
|
||||
var fn = lookupBridge(name);
|
||||
return fn ? fn : fallback;
|
||||
}
|
||||
function fdim(x, y) { return x > y ? (x - y) : 0.0; }
|
||||
function copysign(x, y) {
|
||||
if (y === 0) {
|
||||
return (1 / y === -Infinity) ? -Math.abs(x) : Math.abs(x);
|
||||
}
|
||||
return y < 0 ? -Math.abs(x) : Math.abs(x);
|
||||
}
|
||||
function ldexp(x, e) { return x * Math.pow(2.0, e); }
|
||||
function rint(x) {
|
||||
if (!isFinite(x)) {
|
||||
return x;
|
||||
}
|
||||
var n = Math.round(x);
|
||||
if (Math.abs(x - n) === 0.5) {
|
||||
n = 2 * Math.round(x / 2);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
function remainder(x, y) {
|
||||
if (!isFinite(x) || !isFinite(y) || y === 0.0) {
|
||||
return NaN;
|
||||
}
|
||||
return x - y * Math.round(x / y);
|
||||
}
|
||||
function erfApprox(x) {
|
||||
var sign = x < 0 ? -1.0 : 1.0;
|
||||
x = Math.abs(x);
|
||||
var a1 = 0.254829592;
|
||||
var a2 = -0.284496736;
|
||||
var a3 = 1.421413741;
|
||||
var a4 = -1.453152027;
|
||||
var a5 = 1.061405429;
|
||||
var p = 0.3275911;
|
||||
var t = 1.0 / (1.0 + p * x);
|
||||
var y = 1.0 - (((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t) * Math.exp(-x * x);
|
||||
return sign * y;
|
||||
}
|
||||
function erfcApprox(x) { return 1.0 - erfApprox(x); }
|
||||
function tgammaApprox(z) {
|
||||
var p = [
|
||||
676.5203681218851, -1259.1392167224028, 771.32342877765313,
|
||||
-176.61502916214059, 12.507343278686905, -0.13857109526572012,
|
||||
9.9843695780195716e-6, 1.5056327351493116e-7
|
||||
];
|
||||
if (z < 0.5) {
|
||||
return Math.PI / (Math.sin(Math.PI * z) * tgammaApprox(1.0 - z));
|
||||
}
|
||||
z -= 1.0;
|
||||
var x = 0.99999999999980993;
|
||||
for (var i = 0; i < p.length; i++) {
|
||||
x += p[i] / (z + i + 1.0);
|
||||
}
|
||||
var t = z + p.length - 0.5;
|
||||
return Math.sqrt(2.0 * Math.PI) * Math.pow(t, z + 0.5) * Math.exp(-t) * x;
|
||||
}
|
||||
function lgammaApprox(x) {
|
||||
var g = tgammaApprox(x);
|
||||
return Math.log(Math.abs(g));
|
||||
}
|
||||
function nextafterApprox(x, y) {
|
||||
if (isNaN(x) || isNaN(y)) {
|
||||
return NaN;
|
||||
}
|
||||
if (x === y) {
|
||||
return y;
|
||||
}
|
||||
if (x === 0.0) {
|
||||
return y > 0.0 ? Number.MIN_VALUE : -Number.MIN_VALUE;
|
||||
}
|
||||
var buf = new ArrayBuffer(8);
|
||||
var dv = new DataView(buf);
|
||||
dv.setFloat64(0, x, true);
|
||||
var bits = dv.getBigUint64(0, true);
|
||||
if ((y > x) === (x > 0.0)) {
|
||||
bits += 1n;
|
||||
}
|
||||
else {
|
||||
bits -= 1n;
|
||||
}
|
||||
dv.setBigUint64(0, bits, true);
|
||||
return dv.getFloat64(0, true);
|
||||
}
|
||||
function meJitExp10(x) { return Math.pow(10.0, x); }
|
||||
function meJitSinpi(x) { return Math.sin(Math.PI * x); }
|
||||
function meJitCospi(x) { return Math.cos(Math.PI * x); }
|
||||
var mathExp2 = Math.exp2 ? Math.exp2 : function(x) { return Math.pow(2.0, x); };
|
||||
function meJitLogaddexp(a, b) {
|
||||
var hi = a > b ? a : b;
|
||||
var lo = a > b ? b : a;
|
||||
return hi + Math.log1p(Math.exp(lo - hi));
|
||||
}
|
||||
function meJitWhere(c, x, y) { return c !== 0.0 ? x : y; }
|
||||
function vecUnaryF64(inPtr, outPtr, n, fn) {
|
||||
var ii = inPtr >> 3;
|
||||
var oo = outPtr >> 3;
|
||||
for (var i = 0; i < n; i++) {
|
||||
HEAPF64[oo + i] = fn(HEAPF64[ii + i]);
|
||||
}
|
||||
}
|
||||
function vecBinaryF64(aPtr, bPtr, outPtr, n, fn) {
|
||||
var aa = aPtr >> 3;
|
||||
var bb = bPtr >> 3;
|
||||
var oo = outPtr >> 3;
|
||||
for (var i = 0; i < n; i++) {
|
||||
HEAPF64[oo + i] = fn(HEAPF64[aa + i], HEAPF64[bb + i]);
|
||||
}
|
||||
}
|
||||
function vecUnaryF32(inPtr, outPtr, n, fn) {
|
||||
var ii = inPtr >> 2;
|
||||
var oo = outPtr >> 2;
|
||||
for (var i = 0; i < n; i++) {
|
||||
HEAPF32[oo + i] = fn(HEAPF32[ii + i]);
|
||||
}
|
||||
}
|
||||
function vecBinaryF32(aPtr, bPtr, outPtr, n, fn) {
|
||||
var aa = aPtr >> 2;
|
||||
var bb = bPtr >> 2;
|
||||
var oo = outPtr >> 2;
|
||||
for (var i = 0; i < n; i++) {
|
||||
HEAPF32[oo + i] = fn(HEAPF32[aa + i], HEAPF32[bb + i]);
|
||||
}
|
||||
}
|
||||
var env = {
|
||||
memory: wasmMemory,
|
||||
acos: Math.acos, acosh: Math.acosh, asin: Math.asin, asinh: Math.asinh,
|
||||
atan: Math.atan, atan2: Math.atan2, atanh: Math.atanh, cbrt: Math.cbrt,
|
||||
ceil: Math.ceil, copysign: copysign, cos: Math.cos, cosh: Math.cosh,
|
||||
erf: erfApprox, erfc: erfcApprox, exp: Math.exp, exp2: mathExp2,
|
||||
expm1: Math.expm1, fabs: Math.abs, fdim: fdim, floor: Math.floor,
|
||||
fma: function(a, b, c) { return a * b + c; }, fmax: Math.max, fmin: Math.min,
|
||||
fmod: function(a, b) { return a % b; }, hypot: Math.hypot, ldexp: ldexp,
|
||||
lgamma: lgammaApprox, log: Math.log, log10: Math.log10, log1p: Math.log1p,
|
||||
log2: Math.log2, nextafter: nextafterApprox, pow: Math.pow, remainder: remainder,
|
||||
rint: rint, round: Math.round, sin: Math.sin, sinh: Math.sinh, sqrt: Math.sqrt,
|
||||
tan: Math.tan, tanh: Math.tanh, tgamma: tgammaApprox, trunc: Math.trunc,
|
||||
me_jit_exp10: meJitExp10, me_jit_sinpi: meJitSinpi, me_jit_cospi: meJitCospi,
|
||||
me_jit_logaddexp: meJitLogaddexp, me_jit_where: meJitWhere
|
||||
};
|
||||
env.me_wasm32_cast_int = function(x) {
|
||||
return x < 0 ? Math.ceil(x) : Math.floor(x);
|
||||
};
|
||||
env.me_wasm32_cast_float = function(x) {
|
||||
return x;
|
||||
};
|
||||
env.me_wasm32_cast_bool = function(x) {
|
||||
return x !== 0 ? 1 : 0;
|
||||
};
|
||||
env.memset = bindBridge("memset", function(ptr, value, n) {
|
||||
if (n > 0) {
|
||||
HEAPU8.fill(value & 255, ptr, ptr + n);
|
||||
}
|
||||
return ptr | 0;
|
||||
});
|
||||
/* Prefer host wasm bridge symbols; keep JS fallbacks for robustness. */
|
||||
env.me_jit_exp10 = bindBridge("me_jit_exp10", env.me_jit_exp10);
|
||||
env.me_jit_sinpi = bindBridge("me_jit_sinpi", env.me_jit_sinpi);
|
||||
env.me_jit_cospi = bindBridge("me_jit_cospi", env.me_jit_cospi);
|
||||
env.me_jit_logaddexp = bindBridge("me_jit_logaddexp", env.me_jit_logaddexp);
|
||||
env.me_jit_where = bindBridge("me_jit_where", env.me_jit_where);
|
||||
env.me_jit_vec_sin_f64 = bindBridge("me_jit_vec_sin_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.sin); });
|
||||
env.me_jit_vec_cos_f64 = bindBridge("me_jit_vec_cos_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.cos); });
|
||||
env.me_jit_vec_exp_f64 = bindBridge("me_jit_vec_exp_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.exp); });
|
||||
env.me_jit_vec_log_f64 = bindBridge("me_jit_vec_log_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.log); });
|
||||
env.me_jit_vec_exp10_f64 = bindBridge("me_jit_vec_exp10_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, meJitExp10); });
|
||||
env.me_jit_vec_sinpi_f64 = bindBridge("me_jit_vec_sinpi_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, meJitSinpi); });
|
||||
env.me_jit_vec_cospi_f64 = bindBridge("me_jit_vec_cospi_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, meJitCospi); });
|
||||
env.me_jit_vec_atan2_f64 = bindBridge("me_jit_vec_atan2_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.atan2); });
|
||||
env.me_jit_vec_hypot_f64 = bindBridge("me_jit_vec_hypot_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.hypot); });
|
||||
env.me_jit_vec_pow_f64 = bindBridge("me_jit_vec_pow_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.pow); });
|
||||
env.me_jit_vec_fmax_f64 = bindBridge("me_jit_vec_fmax_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.max); });
|
||||
env.me_jit_vec_fmin_f64 = bindBridge("me_jit_vec_fmin_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.min); });
|
||||
env.me_jit_vec_expm1_f64 = bindBridge("me_jit_vec_expm1_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.expm1); });
|
||||
env.me_jit_vec_log10_f64 = bindBridge("me_jit_vec_log10_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.log10); });
|
||||
env.me_jit_vec_sinh_f64 = bindBridge("me_jit_vec_sinh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.sinh); });
|
||||
env.me_jit_vec_cosh_f64 = bindBridge("me_jit_vec_cosh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.cosh); });
|
||||
env.me_jit_vec_tanh_f64 = bindBridge("me_jit_vec_tanh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.tanh); });
|
||||
env.me_jit_vec_asinh_f64 = bindBridge("me_jit_vec_asinh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.asinh); });
|
||||
env.me_jit_vec_acosh_f64 = bindBridge("me_jit_vec_acosh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.acosh); });
|
||||
env.me_jit_vec_atanh_f64 = bindBridge("me_jit_vec_atanh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.atanh); });
|
||||
env.me_jit_vec_abs_f64 = bindBridge("me_jit_vec_abs_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.abs); });
|
||||
env.me_jit_vec_sqrt_f64 = bindBridge("me_jit_vec_sqrt_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.sqrt); });
|
||||
env.me_jit_vec_log1p_f64 = bindBridge("me_jit_vec_log1p_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.log1p); });
|
||||
env.me_jit_vec_exp2_f64 = bindBridge("me_jit_vec_exp2_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, mathExp2); });
|
||||
env.me_jit_vec_log2_f64 = bindBridge("me_jit_vec_log2_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.log2); });
|
||||
env.me_jit_vec_sin_f32 = bindBridge("me_jit_vec_sin_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.sin); });
|
||||
env.me_jit_vec_cos_f32 = bindBridge("me_jit_vec_cos_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.cos); });
|
||||
env.me_jit_vec_exp_f32 = bindBridge("me_jit_vec_exp_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.exp); });
|
||||
env.me_jit_vec_log_f32 = bindBridge("me_jit_vec_log_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.log); });
|
||||
env.me_jit_vec_exp10_f32 = bindBridge("me_jit_vec_exp10_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, meJitExp10); });
|
||||
env.me_jit_vec_sinpi_f32 = bindBridge("me_jit_vec_sinpi_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, meJitSinpi); });
|
||||
env.me_jit_vec_cospi_f32 = bindBridge("me_jit_vec_cospi_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, meJitCospi); });
|
||||
env.me_jit_vec_atan2_f32 = bindBridge("me_jit_vec_atan2_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.atan2); });
|
||||
env.me_jit_vec_hypot_f32 = bindBridge("me_jit_vec_hypot_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.hypot); });
|
||||
env.me_jit_vec_pow_f32 = bindBridge("me_jit_vec_pow_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.pow); });
|
||||
env.me_jit_vec_fmax_f32 = bindBridge("me_jit_vec_fmax_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.max); });
|
||||
env.me_jit_vec_fmin_f32 = bindBridge("me_jit_vec_fmin_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.min); });
|
||||
env.me_jit_vec_expm1_f32 = bindBridge("me_jit_vec_expm1_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.expm1); });
|
||||
env.me_jit_vec_log10_f32 = bindBridge("me_jit_vec_log10_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.log10); });
|
||||
env.me_jit_vec_sinh_f32 = bindBridge("me_jit_vec_sinh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.sinh); });
|
||||
env.me_jit_vec_cosh_f32 = bindBridge("me_jit_vec_cosh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.cosh); });
|
||||
env.me_jit_vec_tanh_f32 = bindBridge("me_jit_vec_tanh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.tanh); });
|
||||
env.me_jit_vec_asinh_f32 = bindBridge("me_jit_vec_asinh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.asinh); });
|
||||
env.me_jit_vec_acosh_f32 = bindBridge("me_jit_vec_acosh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.acosh); });
|
||||
env.me_jit_vec_atanh_f32 = bindBridge("me_jit_vec_atanh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.atanh); });
|
||||
env.me_jit_vec_abs_f32 = bindBridge("me_jit_vec_abs_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.abs); });
|
||||
env.me_jit_vec_sqrt_f32 = bindBridge("me_jit_vec_sqrt_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.sqrt); });
|
||||
env.me_jit_vec_log1p_f32 = bindBridge("me_jit_vec_log1p_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.log1p); });
|
||||
env.me_jit_vec_exp2_f32 = bindBridge("me_jit_vec_exp2_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, mathExp2); });
|
||||
env.me_jit_vec_log2_f32 = bindBridge("me_jit_vec_log2_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.log2); });
|
||||
return env;
|
||||
}
|
||||
/* --- parse sections ------------------------------------------------- */
|
||||
var pos = 8, sections = [];
|
||||
while (pos < src.length) {
|
||||
var id = src[pos++];
|
||||
var tmp = readULEB(src, pos), len = tmp[0]; pos = tmp[1];
|
||||
sections.push({ id: id, data: src.subarray(pos, pos + len) });
|
||||
pos += len;
|
||||
}
|
||||
/* --- reassemble with patched memory -------------------------------- */
|
||||
var out = [0x00,0x61,0x73,0x6d, 0x01,0x00,0x00,0x00];
|
||||
var impDone = false;
|
||||
for (var i = 0; i < sections.length; i++) {
|
||||
var s = sections[i];
|
||||
if (s.id === 5) continue; /* drop memory section */
|
||||
if (s.id === 2) {
|
||||
out = out.concat(patchImportSec(s.data));
|
||||
impDone = true;
|
||||
continue;
|
||||
}
|
||||
if (!impDone && s.id > 2) {
|
||||
out = out.concat(buildImportSecWithMemory());
|
||||
impDone = true;
|
||||
}
|
||||
if (s.id === 7) { /* strip memory export from export section */
|
||||
var ep = 0, et = readULEB(s.data, ep), ne = et[0]; ep = et[1];
|
||||
var exps = [];
|
||||
for (var e = 0; e < ne; e++) {
|
||||
var nt = readULEB(s.data, ep), nl = nt[0]; ep = nt[1];
|
||||
var nm = dec.decode(s.data.subarray(ep, ep + nl)); ep += nl;
|
||||
var kd = s.data[ep++];
|
||||
var xt = readULEB(s.data, ep), xi = xt[0]; ep = xt[1];
|
||||
if (nm === "memory" && kd === 0x02) continue;
|
||||
exps.push({ n: nm, k: kd, i: xi });
|
||||
}
|
||||
var eb = encULEB(exps.length);
|
||||
for (var e = 0; e < exps.length; e++) {
|
||||
eb = eb.concat(encStr(exps[e].n));
|
||||
eb.push(exps[e].k);
|
||||
eb = eb.concat(encULEB(exps[e].i));
|
||||
}
|
||||
out.push(0x07);
|
||||
out = out.concat(encULEB(eb.length));
|
||||
out = out.concat(eb);
|
||||
continue;
|
||||
}
|
||||
out.push(s.id);
|
||||
out = out.concat(encULEB(s.data.length));
|
||||
out = out.concat(Array.from(s.data));
|
||||
}
|
||||
if (!impDone) {
|
||||
out = out.concat(buildImportSecWithMemory());
|
||||
}
|
||||
/* --- instantiate with shared memory -------------------------------- */
|
||||
var patched = new Uint8Array(out);
|
||||
try {
|
||||
var mod = new WebAssembly.Module(patched);
|
||||
var inst = new WebAssembly.Instance(mod, { env: buildEnvImports() });
|
||||
} catch (e) {
|
||||
err("[me-wasm-jit] " + e.message);
|
||||
return 0;
|
||||
}
|
||||
var fn = inst.exports["me_dsl_jit_kernel"];
|
||||
if (!fn) { err("[me-wasm-jit] missing export"); return 0; }
|
||||
return addFunction(fn, "iiii");
|
||||
}
|
||||
|
||||
function _meJitFreeFn(runtime, idx) {
|
||||
if (!runtime || typeof runtime.removeFunction !== 'function') {
|
||||
return;
|
||||
}
|
||||
if (idx) {
|
||||
runtime.removeFunction(idx);
|
||||
}
|
||||
}
|
||||
|
||||
root._meJitInstantiate = _meJitInstantiate;
|
||||
root._meJitFreeFn = _meJitFreeFn;
|
||||
|
||||
if (typeof module !== 'undefined' && module.exports) {
|
||||
module.exports = {
|
||||
_meJitInstantiate: _meJitInstantiate,
|
||||
_meJitFreeFn: _meJitFreeFn
|
||||
};
|
||||
}
|
||||
})(typeof globalThis !== 'undefined' ? globalThis : (typeof self !== 'undefined' ? self : this));
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,856 @@
|
||||
#######################################################################
|
||||
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#######################################################################
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Sequence
|
||||
|
||||
try:
|
||||
from numpy.typing import DTypeLike
|
||||
except (ImportError, AttributeError):
|
||||
# fallback to internal module (use with caution)
|
||||
from numpy._typing import DTypeLike
|
||||
|
||||
import numpy as np
|
||||
|
||||
import blosc2
|
||||
|
||||
|
||||
class ProxyNDSource(ABC):
|
||||
"""
|
||||
Base interface for NDim sources in :ref:`Proxy`.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def shape(self) -> tuple:
|
||||
"""
|
||||
The shape of the source.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def chunks(self) -> tuple:
|
||||
"""
|
||||
The chunk shape of the source.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def blocks(self) -> tuple:
|
||||
"""
|
||||
The block shape of the source.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def dtype(self) -> np.dtype:
|
||||
"""
|
||||
The dtype of the source.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
def cparams(self) -> blosc2.CParams:
|
||||
"""
|
||||
The compression parameters of the source.
|
||||
|
||||
This property is optional and can be overridden if the source has a
|
||||
different compression configuration.
|
||||
"""
|
||||
return blosc2.CParams(typesize=self.dtype.itemsize)
|
||||
|
||||
@abstractmethod
|
||||
def get_chunk(self, nchunk: int) -> bytes:
|
||||
"""
|
||||
Return the compressed chunk in :paramref:`self`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nchunk: int
|
||||
The unidimensional index of the chunk to retrieve.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: bytes object
|
||||
The compressed chunk.
|
||||
"""
|
||||
pass
|
||||
|
||||
async def aget_chunk(self, nchunk: int) -> bytes:
|
||||
"""
|
||||
Return the compressed chunk in :paramref:`self` asynchronously.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nchunk: int
|
||||
The index of the chunk to retrieve.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: bytes object
|
||||
The compressed chunk.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This method is optional, and only available if the source has an async
|
||||
`aget_chunk` method.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"aget_chunk is only available if the source has an async aget_chunk method"
|
||||
)
|
||||
|
||||
|
||||
class ProxySource(ABC):
|
||||
"""
|
||||
Base interface for sources of :ref:`Proxy` that are not NDim objects.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def nbytes(self) -> int:
|
||||
"""
|
||||
The total number of bytes in the source.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def chunksize(self) -> tuple:
|
||||
"""
|
||||
The chunksize of the source.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def typesize(self) -> int:
|
||||
"""
|
||||
The typesize of the source.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
def cparams(self) -> blosc2.CParams:
|
||||
"""
|
||||
The compression parameters of the source.
|
||||
|
||||
This property is optional and can be overridden if the source has a
|
||||
different compression configuration.
|
||||
"""
|
||||
return blosc2.CParams(typesize=self.typesize)
|
||||
|
||||
@abstractmethod
|
||||
def get_chunk(self, nchunk: int) -> bytes:
|
||||
"""
|
||||
Return the compressed chunk in :paramref:`self`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nchunk: int
|
||||
The index of the chunk to retrieve.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: bytes object
|
||||
The compressed chunk.
|
||||
"""
|
||||
pass
|
||||
|
||||
async def aget_chunk(self, nchunk: int) -> bytes:
|
||||
"""
|
||||
Return the compressed chunk in :paramref:`self` asynchronously.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nchunk: int
|
||||
The index of the chunk to retrieve.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: bytes object
|
||||
The compressed chunk.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This method is optional and only available if the source has an async
|
||||
`aget_chunk` method.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"aget_chunk is only available if the source has an async aget_chunk method"
|
||||
)
|
||||
|
||||
|
||||
class Proxy(blosc2.Operand):
|
||||
"""Proxy (with cache support) for an object following the :ref:`ProxySource` interface.
|
||||
|
||||
This can be used to cache chunks of a regular data container which follows the
|
||||
:ref:`ProxySource` or :ref:`ProxyNDSource` interfaces.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, src: ProxySource or ProxyNDSource, urlpath: str | None = None, mode="a", **kwargs: dict
|
||||
):
|
||||
"""
|
||||
Create a new :ref:`Proxy` to serve as a cache to save accessed chunks locally.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
src: :ref:`ProxySource` or :ref:`ProxyNDSource`
|
||||
The original container.
|
||||
urlpath: str, optional
|
||||
The urlpath where to save the container that will work as a cache.
|
||||
mode: str, optional
|
||||
"a" means read/write (create if it doesn't exist); "w" means create
|
||||
(overwrite if it exists). Default is "a".
|
||||
kwargs: dict, optional
|
||||
Keyword arguments supported:
|
||||
|
||||
vlmeta: dict or None
|
||||
A dictionary with different variable length metalayers. One entry per metalayer:
|
||||
key: bytes or str
|
||||
The name of the metalayer.
|
||||
value: object
|
||||
The metalayer object that will be serialized using msgpack.
|
||||
|
||||
"""
|
||||
self.src = src
|
||||
self.urlpath = urlpath
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
self._cache = kwargs.pop("_cache", None)
|
||||
|
||||
if self._cache is None:
|
||||
meta_val = {
|
||||
"local_abspath": None,
|
||||
"urlpath": None,
|
||||
"caterva2_env": kwargs.pop("caterva2_env", False),
|
||||
}
|
||||
container = getattr(self.src, "schunk", self.src)
|
||||
if hasattr(container, "urlpath"):
|
||||
meta_val["local_abspath"] = container.urlpath
|
||||
elif isinstance(self.src, blosc2.C2Array):
|
||||
meta_val["urlpath"] = (self.src.path, self.src.urlbase, self.src.auth_token)
|
||||
meta = {"proxy-source": meta_val}
|
||||
if hasattr(self.src, "shape"):
|
||||
self._cache = blosc2.empty(
|
||||
self.src.shape,
|
||||
self.src.dtype,
|
||||
chunks=self.src.chunks,
|
||||
blocks=self.src.blocks,
|
||||
cparams=self.src.cparams,
|
||||
urlpath=urlpath,
|
||||
mode=mode,
|
||||
meta=meta,
|
||||
)
|
||||
else:
|
||||
self._cache = blosc2.SChunk(
|
||||
chunksize=self.src.chunksize,
|
||||
cparams=self.src.cparams,
|
||||
urlpath=urlpath,
|
||||
mode=mode,
|
||||
meta=meta,
|
||||
)
|
||||
self._cache.fill_special(self.src.nbytes // self.src.typesize, blosc2.SpecialValue.UNINIT)
|
||||
self._schunk_cache = getattr(self._cache, "schunk", self._cache)
|
||||
vlmeta = kwargs.get("vlmeta")
|
||||
if vlmeta:
|
||||
for key in vlmeta:
|
||||
self._schunk_cache.vlmeta[key] = vlmeta[key]
|
||||
|
||||
def fetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray | blosc2.schunk.SChunk:
|
||||
"""
|
||||
Get the container used as cache with the requested data updated.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
item: slice or list of slices, optional
|
||||
If not None, only the chunks that intersect with the slices
|
||||
in items will be retrieved if they have not been already.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: :ref:`NDArray` or :ref:`SChunk`
|
||||
The local container used to cache the already requested data.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import blosc2
|
||||
>>> data = np.arange(20).reshape(10, 2)
|
||||
>>> ndarray = blosc2.asarray(data)
|
||||
>>> proxy = blosc2.Proxy(ndarray)
|
||||
>>> slice_data = proxy.fetch((slice(0, 3), slice(0, 2)))
|
||||
>>> slice_data[:3, :2]
|
||||
[[0 1]
|
||||
[2 3]
|
||||
[4 5]]
|
||||
"""
|
||||
if item == ():
|
||||
# Full realization
|
||||
for info in self._schunk_cache.iterchunks_info():
|
||||
if info.special != blosc2.SpecialValue.NOT_SPECIAL:
|
||||
chunk = self.src.get_chunk(info.nchunk)
|
||||
self._schunk_cache.update_chunk(info.nchunk, chunk)
|
||||
else:
|
||||
# Get only a slice
|
||||
nchunks = blosc2.get_slice_nchunks(self._cache, item)
|
||||
for info in self._schunk_cache.iterchunks_info():
|
||||
if info.nchunk in nchunks and info.special != blosc2.SpecialValue.NOT_SPECIAL:
|
||||
chunk = self.src.get_chunk(info.nchunk)
|
||||
self._schunk_cache.update_chunk(info.nchunk, chunk)
|
||||
|
||||
return self._cache
|
||||
|
||||
async def afetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray | blosc2.schunk.SChunk:
|
||||
"""
|
||||
Retrieve the cache container with the requested data updated asynchronously.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
item: slice or list of slices, optional
|
||||
If provided, only the chunks intersecting with the specified slices
|
||||
will be retrieved if they have not been already.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: :ref:`NDArray` or :ref:`SChunk`
|
||||
The local container used to cache the already requested data.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This method is only available if the :ref:`ProxySource` or :ref:`ProxyNDSource`
|
||||
have an async `aget_chunk` method.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import blosc2
|
||||
>>> import asyncio
|
||||
>>> from blosc2 import ProxyNDSource
|
||||
>>> class MyProxySource(ProxyNDSource):
|
||||
>>> def __init__(self, data):
|
||||
>>> # If the next source is multidimensional, it must have the attributes:
|
||||
>>> self.data = data
|
||||
>>> f"Data shape: {self.shape}, Chunks: {self.chunks}"
|
||||
>>> f"Blocks: {self.blocks}, Dtype: {self.dtype}"
|
||||
>>> @property
|
||||
>>> def shape(self):
|
||||
>>> return self.data.shape
|
||||
>>> @property
|
||||
>>> def chunks(self):
|
||||
>>> return self.data.chunks
|
||||
>>> @property
|
||||
>>> def blocks(self):
|
||||
>>> return self.data.blocks
|
||||
>>> @property
|
||||
>>> def dtype(self):
|
||||
>>> return self.data.dtype
|
||||
>>> # This method must be present
|
||||
>>> def get_chunk(self, nchunk):
|
||||
>>> return self.data.get_chunk(nchunk)
|
||||
>>> # This method is optional
|
||||
>>> async def aget_chunk(self, nchunk):
|
||||
>>> await asyncio.sleep(0.1) # Simulate an asynchronous operation
|
||||
>>> return self.data.get_chunk(nchunk)
|
||||
>>> data = np.arange(20).reshape(4, 5)
|
||||
>>> chunks = [2, 5]
|
||||
>>> blocks = [1, 5]
|
||||
>>> data = blosc2.asarray(data, chunks=chunks, blocks=blocks)
|
||||
>>> source = MyProxySource(data)
|
||||
>>> proxy = blosc2.Proxy(source)
|
||||
>>> async def fetch_data():
|
||||
>>> # Fetch a slice of the data from the proxy asynchronously
|
||||
>>> slice_data = await proxy.afetch(slice(0, 2))
|
||||
>>> # Note that only data fetched is shown, the rest is uninitialized
|
||||
>>> slice_data[:]
|
||||
>>> asyncio.run(fetch_data())
|
||||
>>> # Using getitem to get a slice of the data
|
||||
>>> result = proxy[1:2, 1:3]
|
||||
>>> f"Proxy getitem: {result}"
|
||||
Data shape: (4, 5), Chunks: (2, 5)
|
||||
Blocks: (1, 5), Dtype: int64
|
||||
[[0 1 2 3 4]
|
||||
[5 6 7 8 9]
|
||||
[0 0 0 0 0]
|
||||
[0 0 0 0 0]]
|
||||
Proxy getitem: [[6 7]]
|
||||
"""
|
||||
if not callable(getattr(self.src, "aget_chunk", None)):
|
||||
raise NotImplementedError("afetch is only available if the source has an aget_chunk method")
|
||||
if item == ():
|
||||
# Full realization
|
||||
for info in self._schunk_cache.iterchunks_info():
|
||||
if info.special != blosc2.SpecialValue.NOT_SPECIAL:
|
||||
chunk = await self.src.aget_chunk(info.nchunk)
|
||||
self._schunk_cache.update_chunk(info.nchunk, chunk)
|
||||
else:
|
||||
# Get only a slice
|
||||
nchunks = blosc2.get_slice_nchunks(self._cache, item)
|
||||
for info in self._schunk_cache.iterchunks_info():
|
||||
if info.nchunk in nchunks and info.special != blosc2.SpecialValue.NOT_SPECIAL:
|
||||
chunk = await self.src.aget_chunk(info.nchunk)
|
||||
self._schunk_cache.update_chunk(info.nchunk, chunk)
|
||||
|
||||
return self._cache
|
||||
|
||||
def __getitem__(self, item: slice | list[slice]) -> np.ndarray:
|
||||
"""
|
||||
Get a slice as a numpy.ndarray using the :ref:`Proxy`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
item: slice or list of slices
|
||||
The slice of the desired data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: numpy.ndarray
|
||||
An array with the data slice.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import blosc2
|
||||
>>> data = np.arange(25).reshape(5, 5)
|
||||
>>> ndarray = blosc2.asarray(data)
|
||||
>>> proxy = blosc2.Proxy(ndarray)
|
||||
>>> proxy[0:3, 0:3]
|
||||
[[ 0 1 2]
|
||||
[ 5 6 7]
|
||||
[10 11 12]
|
||||
[20 21 22]]
|
||||
>>> proxy[2:5, 2:5]
|
||||
[[12 13 14]
|
||||
[17 18 19]
|
||||
[22 23 24]]
|
||||
"""
|
||||
# Populate the cache
|
||||
self.fetch(item)
|
||||
return self._cache[item]
|
||||
|
||||
@property
|
||||
def dtype(self) -> np.dtype:
|
||||
"""The dtype of :paramref:`self` or None if the data is unidimensional"""
|
||||
return self._cache.dtype if isinstance(self._cache, blosc2.NDArray) else None
|
||||
|
||||
@property
|
||||
def shape(self) -> tuple[int]:
|
||||
"""The shape of :paramref:`self`"""
|
||||
return self._cache.shape if isinstance(self._cache, blosc2.NDArray) else len(self._cache)
|
||||
|
||||
@property
|
||||
def chunks(self) -> tuple[int]: # cache should have same chunks as src
|
||||
"""The chunks of :paramref:`self` or None if the data is not a Blosc2 NDArray"""
|
||||
return self._cache.chunks if isinstance(self._cache, blosc2.NDArray) else None
|
||||
|
||||
@property
|
||||
def blocks(self) -> tuple[int]: # cache should have same blocks as src
|
||||
"""The blocks of :paramref:`self` or None if the data is not a Blosc2 NDArray"""
|
||||
return self._cache.blocks if isinstance(self._cache, blosc2.NDArray) else None
|
||||
|
||||
@property
|
||||
def schunk(self) -> blosc2.schunk.SChunk:
|
||||
"""The :ref:`SChunk` of the cache"""
|
||||
return self._schunk_cache
|
||||
|
||||
@property
|
||||
def cparams(self) -> blosc2.CParams:
|
||||
"""The compression parameters of the cache"""
|
||||
return self._cache.cparams
|
||||
|
||||
@property
|
||||
def info(self) -> str:
|
||||
"""The info of the cache"""
|
||||
if isinstance(self._cache, blosc2.NDArray):
|
||||
return self._cache.info
|
||||
raise NotImplementedError("info is only available if the source is a NDArray")
|
||||
|
||||
def __str__(self):
|
||||
return f"Proxy({self.src}, urlpath={self.urlpath})"
|
||||
|
||||
@property
|
||||
def vlmeta(self) -> blosc2.schunk.vlmeta:
|
||||
"""
|
||||
Get the vlmeta of the cache.
|
||||
|
||||
See Also
|
||||
--------
|
||||
:py:attr:`blosc2.schunk.SChunk.vlmeta`
|
||||
"""
|
||||
return self._schunk_cache.vlmeta
|
||||
|
||||
@property
|
||||
def fields(self) -> dict:
|
||||
"""
|
||||
Dictionary with the fields of :paramref:`self`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
fields: dict
|
||||
A dictionary with the fields of the :ref:`Proxy`.
|
||||
|
||||
See Also
|
||||
--------
|
||||
:ref:`NDField`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import blosc2
|
||||
>>> data = np.ones(16, dtype=[('field1', 'i4'), ('field2', 'f4')]).reshape(4, 4)
|
||||
>>> ndarray = blosc2.asarray(data)
|
||||
>>> proxy = blosc2.Proxy(ndarray)
|
||||
>>> # Get a dictionary of fields from the proxy, where each field can be accessed individually
|
||||
>>> fields_dict = proxy.fields
|
||||
>>> for field_name, field_proxy in fields_dict.items():
|
||||
>>> print(f"Field name: {field_name}, Field data: {field_proxy}")
|
||||
Field name: field1, Field data: <blosc2.proxy.ProxyNDField object at 0x114472d20>
|
||||
Field name: field2, Field data: <blosc2.proxy.ProxyNDField object at 0x10e215be0>
|
||||
>>> fields_dict['field2'][:]
|
||||
[[1. 1. 1. 1.]
|
||||
[1. 1. 1. 1.]
|
||||
[1. 1. 1. 1.]
|
||||
[1. 1. 1. 1.]]
|
||||
"""
|
||||
_fields = getattr(self._cache, "fields", None)
|
||||
if _fields is None:
|
||||
return None
|
||||
return {key: ProxyNDField(self, key) for key in _fields}
|
||||
|
||||
|
||||
class ProxyNDField(blosc2.Operand):
|
||||
def __init__(self, proxy: Proxy, field: str):
|
||||
self.proxy = proxy
|
||||
self.field = field
|
||||
self._dtype = proxy.dtype[field]
|
||||
self._shape = proxy.shape
|
||||
|
||||
@property
|
||||
def dtype(self) -> np.dtype:
|
||||
"""
|
||||
Get the data type of the :ref:`ProxyNDField`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: np.dtype
|
||||
The data type of the :ref:`ProxyNDField`.
|
||||
"""
|
||||
return self._dtype
|
||||
|
||||
@property
|
||||
def shape(self) -> tuple[int]:
|
||||
"""
|
||||
Get the shape of the :ref:`ProxyNDField`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: tuple
|
||||
The shape of the :ref:`ProxyNDField`.
|
||||
"""
|
||||
return self._shape
|
||||
|
||||
def __getitem__(self, item: slice | list[slice]) -> np.ndarray:
|
||||
"""
|
||||
Get a slice as a numpy.ndarray using the `field` in `proxy`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
item: slice or list of slices
|
||||
The slice of the desired data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: numpy.ndarray
|
||||
An array with the data slice.
|
||||
"""
|
||||
# Get the data and return the corresponding field
|
||||
nparr = self.proxy[item]
|
||||
return nparr[self.field]
|
||||
|
||||
|
||||
def _convert_dtype(dt: str | DTypeLike):
|
||||
"""
|
||||
Attempts to convert to blosc2.dtype (i.e. numpy dtype)
|
||||
"""
|
||||
if hasattr(dt, "as_numpy_dtype"):
|
||||
dt = dt.as_numpy_dtype
|
||||
try:
|
||||
return np.dtype(dt)
|
||||
except TypeError: # likely passed e.g. a torch.float64
|
||||
return np.dtype(str(dt).split(".")[1])
|
||||
except Exception as e:
|
||||
raise TypeError(f"Could not parse dtype arg {dt}.") from e
|
||||
|
||||
|
||||
class SimpleProxy(blosc2.Operand):
|
||||
"""
|
||||
Simple proxy for any data container to be used with the compute engine.
|
||||
|
||||
The source must have a `shape` and `dtype` attributes; if not,
|
||||
it will be converted to a NumPy array via the `np.asarray` function.
|
||||
It should also have a `__getitem__` method.
|
||||
|
||||
This only supports the __getitem__ method. No caching is performed.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import blosc2
|
||||
>>> a = np.arange(20, dtype=np.float32).reshape(4, 5)
|
||||
>>> proxy = blosc2.SimpleProxy(a)
|
||||
>>> proxy[1:3, 2:4]
|
||||
[[ 7. 8.]
|
||||
[12. 13.]]
|
||||
"""
|
||||
|
||||
def __init__(self, src, chunks: tuple | None = None, blocks: tuple | None = None):
|
||||
if not hasattr(src, "shape") or not hasattr(src, "dtype"):
|
||||
# If the source is not an array, convert it to NumPy
|
||||
src = np.asarray(src)
|
||||
if not hasattr(src, "__getitem__"):
|
||||
raise TypeError("The source must have a __getitem__ method")
|
||||
self._src = src
|
||||
self._dtype = _convert_dtype(src.dtype)
|
||||
self._shape = src.shape if isinstance(src.shape, tuple) else tuple(src.shape)
|
||||
# Compute reasonable values for chunks and blocks
|
||||
cparams = blosc2.CParams(clevel=0)
|
||||
|
||||
def is_ints_sequence(src, attr):
|
||||
seq = getattr(src, attr, None)
|
||||
if not isinstance(seq, Sequence) or isinstance(seq, (str, bytes)):
|
||||
return False
|
||||
return all(isinstance(x, int) for x in seq)
|
||||
|
||||
chunks = src.chunks if chunks is None and is_ints_sequence(src, "chunks") else chunks
|
||||
blocks = src.blocks if blocks is None and is_ints_sequence(src, "blocks") else blocks
|
||||
self.chunks, self.blocks = blosc2.compute_chunks_blocks(
|
||||
self.shape, chunks, blocks, self.dtype, cparams=cparams
|
||||
)
|
||||
|
||||
@property
|
||||
def src(self):
|
||||
"""The source object that this proxy wraps."""
|
||||
return self._src
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
"""The shape of the source array."""
|
||||
return self._shape
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
"""The data type of the source array."""
|
||||
return self._dtype
|
||||
|
||||
@property
|
||||
def ndim(self):
|
||||
"""The number of dimensions of the source array."""
|
||||
return len(self.shape)
|
||||
|
||||
def __getitem__(self, item: slice | list[slice]) -> np.ndarray:
|
||||
"""
|
||||
Get a slice as a numpy.ndarray (via this proxy).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
item
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: numpy.ndarray
|
||||
An array with the data slice.
|
||||
"""
|
||||
out = self._src[item]
|
||||
if not hasattr(out, "shape") or out.shape == ():
|
||||
return out
|
||||
else:
|
||||
# avoids copy for PyTorch (JAX/Tensorflow will always copy,
|
||||
# no easy way around it)
|
||||
return np.asarray(out)
|
||||
|
||||
|
||||
def as_simpleproxy(*arrs: Sequence[blosc2.Array]) -> tuple[SimpleProxy | blosc2.Operand]:
|
||||
"""
|
||||
Convert an Array object which fulfills Array protocol into SimpleProxy. If x is already a
|
||||
blosc2.Operand simply returns object.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arrs: Sequence[blosc2.Array]
|
||||
Objects fulfilling Array protocol.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: tuple[blosc2.SimpleProxy | blosc2.Operand]
|
||||
Objects with minimal interface for blosc2 LazyExpr computations.
|
||||
"""
|
||||
out = ()
|
||||
for x in arrs:
|
||||
if isinstance(x, blosc2.Operand):
|
||||
out += (x,)
|
||||
else:
|
||||
out += (SimpleProxy(x),)
|
||||
return out[0] if len(out) == 1 else out
|
||||
|
||||
|
||||
def jit(func=None, *, out=None, disable=False, **kwargs): # noqa: C901
|
||||
"""
|
||||
Prepare a function so that it can be used with the Blosc2 compute engine.
|
||||
|
||||
The inputs of the function can be any combination of NumPy/NDArray arrays
|
||||
and scalars. The function will be called with the NumPy arrays replaced by
|
||||
:ref:`SimpleProxy` objects, whereas NDArray objects will be used as is.
|
||||
|
||||
The returned value will be a NDArray if appropriate kwargs are provided
|
||||
(e.g. `cparams=`). Else, the return value will be a NumPy array
|
||||
(if the function returns a NumPy array). If `out` is provided,
|
||||
the result will be computed and stored in the `out` array
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func: callable
|
||||
The function to be prepared for the Blosc2 compute engine.
|
||||
out: np.ndarray, NDArray, optional
|
||||
The output array where the result will be stored.
|
||||
disable: bool, optional
|
||||
If True, the decorator is disabled and the original function is returned unchanged.
|
||||
Default is False.
|
||||
**kwargs: dict, optional
|
||||
Additional keyword arguments supported by the :func:`empty` constructor.
|
||||
|
||||
Returns
|
||||
-------
|
||||
wrapper
|
||||
|
||||
Notes
|
||||
-----
|
||||
* Although many NumPy functions are supported, some may not be implemented yet.
|
||||
If you find a function that is not supported, please open an issue.
|
||||
* `out` and `kwargs` parameters are not supported for all expressions
|
||||
(e.g. when using a reduction as the last function). In this case, you can
|
||||
still use the `out` parameter of the reduction function for some custom
|
||||
control over the output.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import blosc2
|
||||
>>> @blosc2.jit
|
||||
>>> def compute_expression(a, b, c):
|
||||
>>> return np.sum(((a ** 3 + np.sin(a * 2)) > 2 * c) & (b > 0), axis=1)
|
||||
>>> a = np.arange(20, dtype=np.float32).reshape(4, 5)
|
||||
>>> b = np.arange(20).reshape(4, 5)
|
||||
>>> c = np.arange(5)
|
||||
>>> compute_expression(a, b, c)
|
||||
[5 5 5 5]
|
||||
"""
|
||||
|
||||
def decorator(func):
|
||||
if disable:
|
||||
return func
|
||||
|
||||
def wrapper(*args, **func_kwargs):
|
||||
# Get some kwargs in decorator for SimpleProxy constructor
|
||||
proxy_kwargs = {"chunks": kwargs.get("chunks"), "blocks": kwargs.get("blocks")}
|
||||
|
||||
# Wrap the arguments in SimpleProxy objects if they are not NDArrays
|
||||
new_args = []
|
||||
for arg in args:
|
||||
if issubclass(type(arg), blosc2.Operand):
|
||||
new_args.append(arg)
|
||||
else:
|
||||
new_args.append(SimpleProxy(arg, **proxy_kwargs))
|
||||
# The same for the keyword arguments
|
||||
for key, value in func_kwargs.items():
|
||||
if issubclass(type(value), blosc2.Operand):
|
||||
continue
|
||||
func_kwargs[key] = SimpleProxy(value, **proxy_kwargs)
|
||||
|
||||
# Call function with the new arguments
|
||||
retval = func(*new_args, **func_kwargs)
|
||||
|
||||
# Treat return value
|
||||
# If it is a numpy array, return it as is
|
||||
if isinstance(retval, np.ndarray):
|
||||
if kwargs and any(kwargs[key] is not None for key in kwargs):
|
||||
# But if kwargs are provided, return a NDArray instead
|
||||
return blosc2.asarray(retval, **kwargs)
|
||||
return retval
|
||||
|
||||
# In some instances, the return value is not a LazyExpr
|
||||
# (e.g. using a reduction as the last function, and using an `out` param)
|
||||
if not isinstance(retval, blosc2.LazyExpr):
|
||||
return retval
|
||||
|
||||
# If the return value is a LazyExpr, compute it
|
||||
if out is not None:
|
||||
return retval.compute(out=out, **kwargs)
|
||||
if kwargs and any(kwargs[key] is not None for key in kwargs):
|
||||
return retval.compute(**kwargs)
|
||||
# If no kwargs are provided, return a numpy array
|
||||
return retval[()]
|
||||
|
||||
return wrapper
|
||||
|
||||
if func is None:
|
||||
return decorator
|
||||
else:
|
||||
return decorator(func)
|
||||
|
||||
|
||||
class PandasUdfEngine:
|
||||
@staticmethod
|
||||
def _ensure_numpy_data(data):
|
||||
if not isinstance(data, np.ndarray):
|
||||
try:
|
||||
data = data.values
|
||||
except AttributeError as err:
|
||||
raise ValueError(
|
||||
"blosc2.jit received an object of type {data.__name__}, which is not supported. "
|
||||
"Try casting your Series or DataFrame to a NumPy dtype."
|
||||
) from err
|
||||
return data
|
||||
|
||||
@classmethod
|
||||
def map(cls, data, func, args, kwargs, decorator, skip_na):
|
||||
"""
|
||||
JIT a NumPy array element-wise. In the case of Blosc2, functions are
|
||||
expected to be vectorized NumPy operations, so the function is called
|
||||
with the NumPy array as the function parameter, instead of calling the
|
||||
function once for each element.
|
||||
"""
|
||||
raise NotImplementedError("The Blosc2 engine does not support map. Use apply instead.")
|
||||
|
||||
@classmethod
|
||||
def apply(cls, data, func, args, kwargs, decorator, axis):
|
||||
"""
|
||||
JIT a NumPy array by column or row. In the case of Blosc2, functions are
|
||||
expected to be vectorized NumPy operations, so the function is called
|
||||
with the NumPy array as the function parameter, instead of calling the
|
||||
function once for each column or row.
|
||||
"""
|
||||
data = cls._ensure_numpy_data(data)
|
||||
func = decorator(func)
|
||||
if data.ndim == 1 or axis is None:
|
||||
# pandas Series.apply or pipe
|
||||
return func(data, *args, **kwargs)
|
||||
elif axis in (0, "index"):
|
||||
# pandas apply(axis=0) column-wise
|
||||
result = [func(data[:, row_idx], *args, **kwargs) for row_idx in range(data.shape[1])]
|
||||
return np.vstack(result).transpose()
|
||||
elif axis in (1, "columns"):
|
||||
# pandas apply(axis=1) row-wise
|
||||
result = [func(data[col_idx, :], *args, **kwargs) for col_idx in range(data.shape[0])]
|
||||
return np.vstack(result)
|
||||
else:
|
||||
raise NotImplementedError(f"Unknown axis '{axis}'. Use one of 0, 1 or None.")
|
||||
|
||||
|
||||
jit.__pandas_udf__ = PandasUdfEngine
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,255 @@
|
||||
#######################################################################
|
||||
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#######################################################################
|
||||
|
||||
import contextlib
|
||||
import warnings
|
||||
from dataclasses import asdict, dataclass, field, fields
|
||||
|
||||
import blosc2
|
||||
|
||||
|
||||
def default_nthreads():
|
||||
return blosc2.nthreads
|
||||
|
||||
|
||||
def default_filters():
|
||||
return [
|
||||
blosc2.Filter.NOFILTER,
|
||||
blosc2.Filter.NOFILTER,
|
||||
blosc2.Filter.NOFILTER,
|
||||
blosc2.Filter.NOFILTER,
|
||||
blosc2.Filter.NOFILTER,
|
||||
blosc2.Filter.SHUFFLE,
|
||||
]
|
||||
|
||||
|
||||
def default_filters_meta():
|
||||
return [0] * 6
|
||||
|
||||
|
||||
@dataclass
|
||||
class CParams:
|
||||
"""Dataclass for hosting the different compression parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
codec: :class:`Codec` or int
|
||||
The compressor code. Default is :py:obj:`Codec.ZSTD <Codec>`.
|
||||
codec_meta: int
|
||||
The metadata for the compressor code. Default is 0.
|
||||
clevel: int
|
||||
The compression level from 0 (no compression) to 9
|
||||
(maximum compression). Default is 1.
|
||||
use_dict: bool
|
||||
Whether to use dictionaries when compressing
|
||||
(only for :py:obj:`blosc2.Codec.ZSTD <Codec>`). Default is `False`.
|
||||
typesize: int
|
||||
The data type size, ranging from 1 to 255. Default is 8.
|
||||
nthreads: int
|
||||
The number of threads to use internally. By default, the
|
||||
value of :py:obj:`blosc2.nthreads` is used. If not set with
|
||||
:func:`blosc2.set_nthreads`, blosc2 computes a good guess for it.
|
||||
blocksize: int
|
||||
The requested size of the compressed blocks. If set to 0 (the default)
|
||||
blosc2 will choose the size automatically.
|
||||
splitmode: :class:`SplitMode`
|
||||
The split mode for the blocks.
|
||||
The default value is :py:obj:`SplitMode.AUTO_SPLIT <SplitMode>`.
|
||||
filters: :class:`Filter` or int list or None
|
||||
The sequence of filters. Default: [:py:obj:`Filter.NOFILTER <Filter>`,
|
||||
:py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.NOFILTER <Filter>`,
|
||||
:py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.SHUFFLE <Filter>`].
|
||||
filters_meta: list
|
||||
The metadata for filters. Default: `[0, 0, 0, 0, 0, 0]`.
|
||||
tuner: :class:`Tuner`
|
||||
The tuner to use. Default: :py:obj:`Tuner.STUNE <Tuner>`.
|
||||
"""
|
||||
|
||||
codec: blosc2.Codec | int = blosc2.Codec.ZSTD
|
||||
codec_meta: int = 0
|
||||
clevel: int = 5
|
||||
use_dict: bool = False
|
||||
typesize: int = 8
|
||||
nthreads: int = field(default_factory=default_nthreads)
|
||||
blocksize: int = 0
|
||||
splitmode: blosc2.SplitMode = blosc2.SplitMode.AUTO_SPLIT
|
||||
filters: list[blosc2.Filter | int] = field(default_factory=default_filters)
|
||||
filters_meta: list[int] = field(default_factory=default_filters_meta)
|
||||
tuner: blosc2.Tuner = blosc2.Tuner.STUNE
|
||||
|
||||
def __post_init__(self):
|
||||
# C2Array sends metadata (like codec, filters, splitmode and tuner) as ints
|
||||
if not isinstance(self.codec, blosc2.Codec):
|
||||
with contextlib.suppress(ValueError):
|
||||
# User-defined codecs may have no entries in Codec
|
||||
self.codec = blosc2.Codec(self.codec)
|
||||
if not isinstance(self.splitmode, blosc2.SplitMode):
|
||||
with contextlib.suppress(ValueError):
|
||||
self.splitmode = blosc2.SplitMode(self.splitmode)
|
||||
if not isinstance(self.tuner, blosc2.Tuner):
|
||||
with contextlib.suppress(ValueError):
|
||||
self.tuner = blosc2.Tuner(self.tuner)
|
||||
|
||||
if len(self.filters) > 6:
|
||||
raise ValueError("Number of filters exceeds 6")
|
||||
if len(self.filters) < len(self.filters_meta):
|
||||
self.filters_meta = self.filters_meta[: len(self.filters)]
|
||||
# There is no need to raise a warning here
|
||||
# warnings.warn("Changed `filters_meta` length to match `filters` length")
|
||||
if len(self.filters) > len(self.filters_meta):
|
||||
raise ValueError("Number of filters cannot exceed number of filters meta")
|
||||
|
||||
for i, filter_i in enumerate(self.filters):
|
||||
if not isinstance(filter_i, blosc2.Filter):
|
||||
with contextlib.suppress(ValueError):
|
||||
# User-defined filters may have no entries in Filter
|
||||
self.filters[i] = blosc2.Filter(filter_i)
|
||||
if self.filters_meta[i] == 0 and self.filters[i] == blosc2.Filter.BYTEDELTA:
|
||||
self.filters_meta[i] = self.typesize
|
||||
|
||||
|
||||
@dataclass
|
||||
class DParams:
|
||||
"""Dataclass for hosting the different decompression parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nthreads: int
|
||||
The number of threads to use internally. By default, the
|
||||
value of :py:obj:`blosc2.nthreads` is used. If not set with
|
||||
:func:`blosc2.set_nthreads`, blosc2 computes a good guess for it.
|
||||
"""
|
||||
|
||||
nthreads: int = field(default_factory=default_nthreads)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Storage:
|
||||
"""Dataclass for hosting the different storage parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
contiguous: bool
|
||||
Indicates whether the chunks are stored contiguously.
|
||||
Default is True when :paramref:`urlpath` is not None;
|
||||
False otherwise.
|
||||
urlpath: str or pathlib.Path, optional
|
||||
If the storage is persistent, the name of the file (when
|
||||
`contiguous = True`) or the directory (if `contiguous = False`).
|
||||
If the storage is in-memory, then this field is `None`.
|
||||
mode: str, optional
|
||||
Persistence mode: 'r' means read only (must exist);
|
||||
'a' means read/write (create if it doesn't exist);
|
||||
'w' means create (overwrite if it exists). Default is 'a'.
|
||||
mmap_mode: str, optional
|
||||
If set, the file will be memory-mapped instead of using the default
|
||||
I/O functions and the `mode` argument will be ignored. The memory-mapping
|
||||
modes are similar to those used by the
|
||||
`numpy.memmap <https://numpy.org/doc/stable/reference/generated/numpy.memmap.html>`_
|
||||
function, but it is possible to extend the file:
|
||||
|
||||
.. list-table::
|
||||
:widths: 10 90
|
||||
:header-rows: 1
|
||||
|
||||
* - mode
|
||||
- description
|
||||
* - 'r'
|
||||
- Open an existing file for reading only.
|
||||
* - 'r+'
|
||||
- Open an existing file for reading and writing. Use this mode if you want
|
||||
to append data to an existing schunk file.
|
||||
* - 'w+'
|
||||
- Create or overwrite an existing file for reading and writing. Use this
|
||||
mode if you want to create a new schunk.
|
||||
* - 'c'
|
||||
- Open an existing file in copy-on-write mode: all changes affect the data
|
||||
in memory but changes are not saved to disk. The file on disk is
|
||||
read-only. On Windows, the size of the mapping cannot change.
|
||||
|
||||
Only contiguous storage can be memory-mapped. Hence, `urlpath` must point to a
|
||||
file (and not a directory).
|
||||
|
||||
.. note::
|
||||
Memory-mapped files are opened once, and their contents remain in (virtual)
|
||||
memory for the lifetime of the schunk. Using memory-mapped I/O can be faster
|
||||
than the default I/O functions, depending on the use case. While
|
||||
reading performance is generally better, writing performance may be
|
||||
slower in some cases on certain systems. Memory-mapped files
|
||||
can be especially beneficial when operating with network file systems
|
||||
(like NFS).
|
||||
|
||||
This is currently a beta feature (especially for write operations) and we
|
||||
recommend trying it out and reporting any issues you may encounter.
|
||||
|
||||
initial_mapping_size: int, optional
|
||||
The initial size of the mapping for the memory-mapped file when writes are
|
||||
allowed (r+ w+, or c mode). Once a file is memory-mapped and extended beyond the
|
||||
initial mapping size, the file must be remapped, which may be expensive. This
|
||||
parameter allows decoupling the mapping size from the actual file size to
|
||||
reserve memory early for future writes and avoid remappings. The memory is only
|
||||
reserved virtually and does not occupy physical memory unless actual writes
|
||||
occur. Since the virtual address space is large enough, it is ok to be generous
|
||||
with this parameter (with special consideration on Windows, see note below).
|
||||
For best performance, set this to the maximum expected size of the compressed
|
||||
data (see example in :obj:`SChunk.__init__ <blosc2.schunk.SChunk.__init__>`).
|
||||
The size is in bytes.
|
||||
|
||||
Default: 1 GiB.
|
||||
|
||||
.. note::
|
||||
On Windows, the size of the mapping is directly coupled to the file size.
|
||||
When the schunk is destroyed, the file size will be truncated to the
|
||||
actual size of the schunk.
|
||||
|
||||
meta: dict or None
|
||||
A dictionary with different metalayers. Each entry represents a metalayer:
|
||||
|
||||
key: bytes or str
|
||||
The name of the metalayer.
|
||||
value: object
|
||||
The metalayer object that will be serialized using msgpack.
|
||||
"""
|
||||
|
||||
contiguous: bool = None
|
||||
urlpath: str = None
|
||||
mode: str = "a"
|
||||
mmap_mode: str = None
|
||||
initial_mapping_size: int = None
|
||||
meta: dict = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.contiguous is None:
|
||||
self.contiguous = self.urlpath is not None
|
||||
# Check for None values
|
||||
for f in fields(self):
|
||||
if getattr(self, f.name) is None and f.name not in [
|
||||
"urlpath",
|
||||
"mmap_mode",
|
||||
"initial_mapping_size",
|
||||
"meta",
|
||||
]:
|
||||
setattr(self, f.name, getattr(Storage(), f.name))
|
||||
warnings.warn(f"`{f.name}` field value changed from `None` to `{getattr(self, f.name)}`")
|
||||
|
||||
|
||||
# Defaults for compression params
|
||||
cparams_dflts = asdict(CParams())
|
||||
"""
|
||||
Compression params defaults.
|
||||
"""
|
||||
|
||||
# Defaults for decompression params
|
||||
dparams_dflts = asdict(DParams())
|
||||
"""
|
||||
Decompression params defaults.
|
||||
"""
|
||||
# Default for storage
|
||||
storage_dflts = asdict(Storage())
|
||||
"""
|
||||
Storage params defaults. This is meant only for :ref:`SChunk <SChunk>` or :ref:`NDArray <NDArray>`.
|
||||
"""
|
||||
@ -0,0 +1,700 @@
|
||||
#######################################################################
|
||||
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#######################################################################
|
||||
|
||||
import contextlib
|
||||
import os
|
||||
from collections.abc import Iterator, MutableMapping
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
|
||||
import blosc2
|
||||
from blosc2.dict_store import DictStore
|
||||
from blosc2.schunk import SChunk
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from blosc2.c2array import C2Array
|
||||
from blosc2.ndarray import NDArray
|
||||
|
||||
|
||||
class vlmetaProxy(MutableMapping):
|
||||
"""Proxy for SChunk.vlmeta to control access and slicing.
|
||||
|
||||
- Ensures `vlmeta[:]` returns a dict of {name: value} using decoded values.
|
||||
- Enforces TreeStore read-only mode for set/del operations.
|
||||
- Delegates iteration and length to the underlying vlmeta object.
|
||||
"""
|
||||
|
||||
def __init__(self, tstore: "TreeStore", inner_vlmeta):
|
||||
self._tstore = tstore
|
||||
self._inner = inner_vlmeta
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if self._tstore.mode == "r":
|
||||
raise ValueError("TreeStore is in read-only mode")
|
||||
|
||||
# Ensure the vlmeta SChunk is persisted before any write operation.
|
||||
# This handles the case where vlmeta is being created lazily.
|
||||
# Use DictStore's methods directly to bypass TreeStore's vlmeta filtering
|
||||
if not DictStore.__contains__(self._tstore, self._tstore._vlmeta_key):
|
||||
DictStore.__setitem__(self._tstore, self._tstore._vlmeta_key, self._tstore._vlmeta)
|
||||
|
||||
# Support bulk set via [:]
|
||||
if isinstance(key, slice):
|
||||
if key.start is None and key.stop is None:
|
||||
# Merge/update existing values instead of replacing
|
||||
for k, v in value.items():
|
||||
self._inner[k] = v
|
||||
# Persist once after bulk update
|
||||
self._tstore._persist_vlmeta()
|
||||
return
|
||||
raise NotImplementedError("Slicing is not supported, unless [:]")
|
||||
|
||||
self._inner[key] = value
|
||||
# Persist changes in the embed store snapshot
|
||||
self._tstore._persist_vlmeta()
|
||||
|
||||
def __getitem__(self, key):
|
||||
# Support bulk get via [:]
|
||||
if isinstance(key, slice):
|
||||
if key.start is None and key.stop is None:
|
||||
# Build a Python dict to ensure keys are str and values decoded
|
||||
return {name: self._inner[name] for name in self._inner}
|
||||
raise NotImplementedError("Slicing is not supported, unless [:]")
|
||||
return self._inner[key]
|
||||
|
||||
def __delitem__(self, key):
|
||||
if self._tstore.mode == "r":
|
||||
raise ValueError("TreeStore is in read-only mode")
|
||||
self._inner.__delitem__(key)
|
||||
# Persist changes in the embed store snapshot
|
||||
self._tstore._persist_vlmeta()
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._inner)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._inner)
|
||||
|
||||
|
||||
class TreeStore(DictStore):
|
||||
"""
|
||||
A hierarchical tree-based storage container for Blosc2 data.
|
||||
|
||||
Extends :class:`blosc2.DictStore` with strict hierarchical key validation
|
||||
and tree traversal capabilities. Keys must follow a hierarchical structure
|
||||
using '/' as separator and always start with '/'. If user passes a key
|
||||
that doesn't start with '/', it will be automatically added.
|
||||
|
||||
It supports the same arguments as :class:`blosc2.DictStore`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
localpath : str
|
||||
Local path for the directory (`.b2d`) or file (`.b2z`); other extensions
|
||||
are not supported. If a directory is specified, it will be treated as
|
||||
a Blosc2 directory format (B2DIR). If a file is specified, it
|
||||
will be treated as a Blosc2 zip format (B2ZIP).
|
||||
mode : str, optional
|
||||
File mode ('r', 'w', 'a'). Default is 'a'.
|
||||
tmpdir : str or None, optional
|
||||
Temporary directory to use when working with `.b2z` files. If None,
|
||||
a system temporary directory will be managed. Default is None.
|
||||
cparams : dict or None, optional
|
||||
Compression parameters for the internal embed store.
|
||||
If None, the default Blosc2 parameters are used.
|
||||
dparams : dict or None, optional
|
||||
Decompression parameters for the internal embed store.
|
||||
If None, the default Blosc2 parameters are used.
|
||||
storage : blosc2.Storage or None, optional
|
||||
Storage properties for the internal embed store.
|
||||
If None, the default Blosc2 storage properties are used.
|
||||
threshold : int, optional
|
||||
Threshold for the array size (bytes) to be kept in the embed store.
|
||||
If the *compressed* array size is below this threshold, it will be
|
||||
stored in the embed store instead of as a separate file. If None,
|
||||
in-memory arrays are stored in the embed store and on-disk arrays
|
||||
are stored as separate files.
|
||||
C2Array objects will always be stored in the embed store,
|
||||
regardless of their size.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> tstore = TreeStore(localpath="my_tstore.b2z", mode="w")
|
||||
>>> # Create a hierarchy. Data is stored in leaf nodes.
|
||||
>>> # Structural nodes like /child0 and /child0/child1 are created automatically.
|
||||
>>> tstore["/child0/leaf1"] = np.array([1, 2, 3])
|
||||
>>> tstore["/child0/child1/leaf2"] = np.array([4, 5, 6])
|
||||
>>> tstore["/child0/child2"] = np.array([7, 8, 9])
|
||||
>>>
|
||||
>>> # Walk the tree structure
|
||||
>>> for path, children, nodes in tstore.walk("/child0"):
|
||||
... print(f"Path: {path}, Children: {sorted(children)}, Nodes: {sorted(nodes)}")
|
||||
Path: /child0, Children: ['/child0/child1'], Nodes: ['/child0/child2', '/child0/leaf1']
|
||||
Path: /child0/child1, Children: [], Nodes: ['/child0/child1/leaf2']
|
||||
>>>
|
||||
>>> # Get a subtree view
|
||||
>>> subtree = tstore.get_subtree("/child0")
|
||||
>>> sorted(list(subtree.keys()))
|
||||
['/child1/leaf2', '/child2', '/leaf1']
|
||||
|
||||
"""
|
||||
|
||||
# For some reason, we had to revert the explicit parametrisation of the
|
||||
# constructor to make benchmarks working again.
|
||||
def __init__(self, *args, _from_parent_store=None, **kwargs):
|
||||
"""Initialize TreeStore with subtree support.
|
||||
|
||||
It supports the same arguments as :class:`blosc2.DictStore`.
|
||||
"""
|
||||
if _from_parent_store is not None:
|
||||
# This is a subtree view, copy state from parent
|
||||
self.__dict__.update(_from_parent_store.__dict__)
|
||||
else:
|
||||
# Call initialization and mark this storage as a b2tree object
|
||||
super().__init__(*args, **kwargs, _storage_meta={"b2tree": {"version": 1}})
|
||||
|
||||
self.subtree_path = "" # Empty string means full tree
|
||||
|
||||
def _is_vlmeta_key(self, key: str) -> bool:
|
||||
"""Check if a key is a vlmeta key that should be hidden from regular access."""
|
||||
return key.endswith("/__vlmeta__")
|
||||
|
||||
def _translate_key_to_full(self, key: str) -> str:
|
||||
"""Translate subtree-relative key to full tree key."""
|
||||
if not self.subtree_path:
|
||||
return key
|
||||
if key == "/":
|
||||
return self.subtree_path
|
||||
else:
|
||||
return self.subtree_path + key
|
||||
|
||||
def _translate_key_from_full(self, full_key: str) -> str | None:
|
||||
"""Translate full tree key to subtree-relative key."""
|
||||
if not self.subtree_path:
|
||||
return full_key
|
||||
if full_key == self.subtree_path:
|
||||
return "/"
|
||||
elif full_key.startswith(self.subtree_path + "/"):
|
||||
return full_key[len(self.subtree_path) :]
|
||||
else:
|
||||
# Key is not within this subtree
|
||||
return None
|
||||
|
||||
def _validate_key(self, key: str) -> str:
|
||||
"""Validate and normalize hierarchical key structure.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key : str
|
||||
The key to validate and normalize.
|
||||
|
||||
Returns
|
||||
-------
|
||||
normalized_key : str
|
||||
The normalized key with leading '/' added if missing.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If key doesn't follow hierarchical rules.
|
||||
"""
|
||||
if not isinstance(key, str):
|
||||
raise ValueError(f"Key must be a string, got {type(key)}")
|
||||
|
||||
# Auto-add leading '/' if missing
|
||||
if not key.startswith("/"):
|
||||
key = "/" + key
|
||||
|
||||
if key != "/" and key.endswith("/"):
|
||||
raise ValueError(f"Key cannot end with '/' (except for root), got: {key}")
|
||||
|
||||
if "//" in key:
|
||||
raise ValueError(f"Key cannot contain empty path segments '//', got: {key}")
|
||||
|
||||
# Additional validation for special characters that might cause issues
|
||||
invalid_chars = ["\0", "\n", "\r", "\t"]
|
||||
for char in invalid_chars:
|
||||
if char in key:
|
||||
raise ValueError(f"Key cannot contain invalid character {char!r}, got: {key}")
|
||||
|
||||
return key
|
||||
|
||||
def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None:
|
||||
"""Add a node with hierarchical key validation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key : str
|
||||
Hierarchical node key.
|
||||
value : np.ndarray or blosc2.NDArray or blosc2.C2Array or blosc2.SChunk
|
||||
to store.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If key doesn't follow hierarchical structure rules, if trying to
|
||||
assign to a structural path that already has children, or if trying
|
||||
to add a child to a path that already contains data.
|
||||
"""
|
||||
key = self._validate_key(key)
|
||||
|
||||
# Check if this key already has children (is a structural subtree)
|
||||
children = self.get_children(key)
|
||||
if children:
|
||||
raise ValueError(
|
||||
f"Cannot assign array to structural path '{key}' that already has children: {children}"
|
||||
)
|
||||
|
||||
# Check if we're trying to add a child to a path that already has data
|
||||
# Extract parent path from the key
|
||||
if key != "/":
|
||||
parent_path = "/".join(key.split("/")[:-1])
|
||||
if not parent_path: # Handle case where parent is root
|
||||
parent_path = "/"
|
||||
|
||||
full_parent_key = self._translate_key_to_full(parent_path)
|
||||
if super().__contains__(full_parent_key):
|
||||
raise ValueError(
|
||||
f"Cannot add child '{key}' to path '{parent_path}' that already contains data"
|
||||
)
|
||||
|
||||
full_key = self._translate_key_to_full(key)
|
||||
super().__setitem__(full_key, value)
|
||||
|
||||
def __getitem__(self, key: str) -> "NDArray | C2Array | SChunk | TreeStore":
|
||||
"""Retrieve a node or subtree view.
|
||||
|
||||
If the key points to a subtree (intermediate path with children),
|
||||
returns a TreeStore view of that subtree. If the key points to
|
||||
a final node (leaf), returns the stored array or schunk.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key : str
|
||||
Hierarchical node key.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out : blosc2.NDArray or blosc2.C2Array or blosc2.SChunk or TreeStore
|
||||
The stored array/chunk if key is a leaf node, or a TreeStore subtree view
|
||||
if key is an intermediate path with children.
|
||||
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
If key is not found.
|
||||
ValueError
|
||||
If key doesn't follow hierarchical structure rules.
|
||||
"""
|
||||
key = self._validate_key(key)
|
||||
if self._is_vlmeta_key(key):
|
||||
raise KeyError(f"Key '{key}' not found; vlmeta keys are not directly accessible.")
|
||||
|
||||
full_key = self._translate_key_to_full(key)
|
||||
|
||||
# Check if this key has children (is a subtree)
|
||||
children = self.get_children(key)
|
||||
|
||||
# Check if the key exists as an actual data node
|
||||
key_exists_as_data = super().__contains__(full_key)
|
||||
|
||||
if children:
|
||||
# If it has children, return a subtree view
|
||||
return self.get_subtree(key)
|
||||
elif key_exists_as_data:
|
||||
# If no children but exists as data, it's a leaf node - get the actual data
|
||||
return super().__getitem__(full_key)
|
||||
else:
|
||||
# Key doesn't exist at all
|
||||
raise KeyError(f"Key '{key}' not found")
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
"""Remove a node or subtree.
|
||||
|
||||
If the key points to a subtree (intermediate path with children),
|
||||
removes all nodes in that subtree recursively. If the key points to a final
|
||||
node (leaf), removes only that node.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key : str
|
||||
Hierarchical node key.
|
||||
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
If key is not found.
|
||||
ValueError
|
||||
If key doesn't follow hierarchical structure rules.
|
||||
"""
|
||||
key = self._validate_key(key)
|
||||
|
||||
if self._is_vlmeta_key(key):
|
||||
raise KeyError(f"Key '{key}' not found; vlmeta keys are not directly accessible.")
|
||||
|
||||
# Check if the key exists (either as data or as a structural node with descendants)
|
||||
full_key = self._translate_key_to_full(key)
|
||||
key_exists_as_data = super().__contains__(full_key)
|
||||
descendants = self.get_descendants(key)
|
||||
|
||||
if not key_exists_as_data and not descendants:
|
||||
raise KeyError(f"Key '{key}' not found")
|
||||
|
||||
# Collect all keys to delete (leaf nodes only, since structural nodes don't exist as data)
|
||||
keys_to_delete = []
|
||||
|
||||
# If the key itself has data, include it
|
||||
if key_exists_as_data:
|
||||
keys_to_delete.append(key)
|
||||
|
||||
# Add all descendant leaf nodes (only those that actually exist as data)
|
||||
for descendant in descendants:
|
||||
full_descendant_key = self._translate_key_to_full(descendant)
|
||||
if super().__contains__(full_descendant_key):
|
||||
keys_to_delete.append(descendant)
|
||||
|
||||
# Delete all data keys in the subtree
|
||||
for k in keys_to_delete:
|
||||
full_key_to_delete = self._translate_key_to_full(k)
|
||||
super().__delitem__(full_key_to_delete)
|
||||
|
||||
def __contains__(self, key: str) -> bool:
|
||||
"""Check if a key exists.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key : str
|
||||
Hierarchical node key.
|
||||
|
||||
Returns
|
||||
-------
|
||||
exists : bool
|
||||
True if key exists, False otherwise.
|
||||
"""
|
||||
try:
|
||||
key = self._validate_key(key)
|
||||
if self._is_vlmeta_key(key):
|
||||
return False
|
||||
full_key = self._translate_key_to_full(key)
|
||||
return super().__contains__(full_key)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def keys(self):
|
||||
"""Return all keys in the current subtree view."""
|
||||
if not self.subtree_path:
|
||||
all_keys = set(super().keys())
|
||||
else:
|
||||
all_keys = set()
|
||||
for full_key in super().keys(): # noqa: SIM118
|
||||
relative_key = self._translate_key_from_full(full_key)
|
||||
if relative_key is not None:
|
||||
all_keys.add(relative_key)
|
||||
|
||||
# Filter out vlmeta keys
|
||||
all_keys = {key for key in all_keys if not self._is_vlmeta_key(key)}
|
||||
|
||||
# Also include structural paths (intermediate nodes that have children but no data)
|
||||
structural_keys = set()
|
||||
for key in all_keys:
|
||||
# For each leaf key, add all its parent paths
|
||||
parts = key.split("/")[1:] # Remove empty first element from split
|
||||
current_path = ""
|
||||
for part in parts[:-1]: # Exclude the leaf itself
|
||||
current_path = current_path + "/" + part if current_path else "/" + part
|
||||
if current_path and current_path != "/" and current_path not in all_keys:
|
||||
structural_keys.add(current_path)
|
||||
|
||||
return all_keys | structural_keys
|
||||
|
||||
def __iter__(self) -> Iterator[str]:
|
||||
"""Iterate over keys, excluding vlmeta keys."""
|
||||
return iter(self.keys())
|
||||
|
||||
def items(self) -> Iterator[tuple[str, "NDArray | C2Array | SChunk | TreeStore"]]:
|
||||
"""Return key-value pairs in the current subtree view."""
|
||||
for key in self.keys():
|
||||
yield key, self[key]
|
||||
|
||||
def get_children(self, path: str) -> list[str]:
|
||||
"""Get direct children of a given path.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str
|
||||
The parent path to get children for.
|
||||
|
||||
Returns
|
||||
-------
|
||||
children : list[str]
|
||||
List of direct child paths.
|
||||
"""
|
||||
path = self._validate_key(path)
|
||||
|
||||
if path == "/":
|
||||
prefix = "/"
|
||||
else:
|
||||
prefix = path + "/"
|
||||
|
||||
prefix_len = len(prefix)
|
||||
children_names = set()
|
||||
|
||||
for key in self.keys():
|
||||
if self._is_vlmeta_key(key):
|
||||
continue # Should be already filtered by self.keys(), but for safety
|
||||
if key.startswith(prefix):
|
||||
# e.g. key = /hierarchy/level1/data, prefix = /hierarchy/
|
||||
# rest = level1/data
|
||||
rest = key[prefix_len:]
|
||||
# child_name = level1
|
||||
child_name = rest.split("/")[0]
|
||||
children_names.add(child_name)
|
||||
|
||||
if path == "/":
|
||||
return sorted(["/" + name for name in children_names])
|
||||
else:
|
||||
return sorted([path + "/" + name for name in children_names])
|
||||
|
||||
def get_descendants(self, path: str) -> list[str]:
|
||||
"""Get all descendants of a given path.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str
|
||||
The parent path to get descendants for.
|
||||
|
||||
Returns
|
||||
-------
|
||||
descendants : list[str]
|
||||
List of all descendant paths.
|
||||
"""
|
||||
path = self._validate_key(path)
|
||||
|
||||
if path == "/":
|
||||
prefix = "/"
|
||||
else:
|
||||
prefix = path + "/"
|
||||
|
||||
descendants = set()
|
||||
|
||||
# Get all leaf nodes under this path
|
||||
for key in self.keys():
|
||||
if self._is_vlmeta_key(key):
|
||||
continue # Should be already filtered by self.keys(), but for safety
|
||||
if key.startswith(prefix) and key != path:
|
||||
descendants.add(key)
|
||||
|
||||
return sorted(descendants)
|
||||
|
||||
def walk(self, path: str = "/", topdown: bool = True) -> Iterator[tuple[str, list[str], list[str]]]:
|
||||
"""Walk the tree structure.
|
||||
|
||||
Similar to os.walk(), this visits all structural nodes in the hierarchy,
|
||||
yielding information about each level. Returns relative names, not full paths.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, optional
|
||||
The root path to start walking from. Default is "/".
|
||||
topdown : bool, optional
|
||||
If True (default), traverse top-down (yield parent before children).
|
||||
If False, traverse bottom-up (yield children before parent), mimicking os.walk(topdown=False).
|
||||
|
||||
Yields
|
||||
------
|
||||
path : str
|
||||
Current path being walked.
|
||||
children : list[str]
|
||||
List of child directory names (structural nodes that have descendants).
|
||||
These are just the names, not full paths.
|
||||
nodes : list[str]
|
||||
List of leaf node names (nodes that contain data).
|
||||
These are just the names, not full paths.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> for path, children, nodes in tstore.walk("/child0", topdown=True):
|
||||
... print(f"Path: {path}, Children: {children}, Nodes: {nodes}")
|
||||
"""
|
||||
path = self._validate_key(path)
|
||||
|
||||
# Get all direct children of this path
|
||||
direct_children = self.get_children(path)
|
||||
|
||||
# Separate children into directories (have descendants) and leaf nodes
|
||||
children_dirs = []
|
||||
leaf_nodes = []
|
||||
|
||||
for child in direct_children:
|
||||
child_descendants = self.get_descendants(child)
|
||||
if child_descendants:
|
||||
# Extract just the name from the full path
|
||||
child_name = child.split("/")[-1]
|
||||
children_dirs.append(child_name)
|
||||
else:
|
||||
# Extract just the name from the full path
|
||||
child_name = child.split("/")[-1]
|
||||
leaf_nodes.append(child_name)
|
||||
|
||||
# Validate and normalize names to ensure robustness
|
||||
# 1) Enforce that returned names are simple (no '/')
|
||||
children_dirs = [
|
||||
name for name in children_dirs if isinstance(name, str) and "/" not in name and name != ""
|
||||
]
|
||||
leaf_nodes = [
|
||||
name for name in leaf_nodes if isinstance(name, str) and "/" not in name and name != ""
|
||||
]
|
||||
|
||||
# 2) Ensure leaf nodes correspond to actual data nodes in the underlying store
|
||||
valid_leaf_nodes: list[str] = []
|
||||
for name in leaf_nodes:
|
||||
# Compose subtree-relative child path
|
||||
child_rel_path = path + "/" + name if path != "/" else "/" + name
|
||||
# Translate to full key in the backing store and verify it's a data node
|
||||
full_key = self._translate_key_to_full(child_rel_path)
|
||||
if super().__contains__(full_key):
|
||||
valid_leaf_nodes.append(name)
|
||||
leaf_nodes = valid_leaf_nodes
|
||||
|
||||
if topdown:
|
||||
# Yield current level first (pre-order)
|
||||
yield path, children_dirs, leaf_nodes
|
||||
|
||||
# Recursively walk child directories (structural nodes)
|
||||
for child in direct_children:
|
||||
child_descendants = self.get_descendants(child)
|
||||
if child_descendants:
|
||||
yield from self.walk(child, topdown=topdown)
|
||||
|
||||
if not topdown:
|
||||
# Yield current level after children (post-order)
|
||||
yield path, children_dirs, leaf_nodes
|
||||
|
||||
def get_subtree(self, path: str) -> "TreeStore":
|
||||
"""Create a subtree view with the specified path as root.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str
|
||||
The path that will become the root of the subtree view (relative to current subtree,
|
||||
will be normalized to start with '/' if missing).
|
||||
|
||||
Returns
|
||||
-------
|
||||
subtree : TreeStore
|
||||
A new TreeStore instance that presents the subtree as if `path` were the root.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> tstore["/child0/child1/data"] = np.array([1, 2, 3])
|
||||
>>> tstore["/child0/child1/grandchild"] = np.array([4, 5, 6])
|
||||
>>> subtree = tstore.get_subtree("/child0/child1")
|
||||
>>> list(subtree.keys())
|
||||
['/data', '/grandchild']
|
||||
>>> subtree["/grandchild"][:]
|
||||
array([4, 5, 6])
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is equivalent to `tstore[path]` when path is a structural path.
|
||||
"""
|
||||
path = self._validate_key(path)
|
||||
full_path = self._translate_key_to_full(path)
|
||||
|
||||
# Create a new TreeStore instance that shares the same underlying storage
|
||||
# but with a different subtree_path
|
||||
subtree = TreeStore(_from_parent_store=self)
|
||||
subtree.subtree_path = full_path
|
||||
|
||||
return subtree
|
||||
|
||||
@property
|
||||
def vlmeta(self) -> MutableMapping:
|
||||
"""Access variable-length metadata for the TreeStore or current subtree.
|
||||
|
||||
Returns a proxy to the vlmeta attribute of an internal SChunk stored at
|
||||
'/__vlmeta__' for the root tree, or '<subtree_path>/__vlmeta__' for subtrees.
|
||||
The SChunk is created on-demand if it doesn't exist.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The metadata is stored as vlmeta of an internal SChunk, ensuring robust
|
||||
serialization and persistence. This mirrors SChunk.vlmeta behavior, with
|
||||
additional guarantees:
|
||||
- Bulk get via `[:]` always returns a dict with string keys and decoded values.
|
||||
- Read-only protection is enforced at the TreeStore level.
|
||||
- Each subtree has its own independent vlmeta storage.
|
||||
"""
|
||||
# Create vlmeta key based on subtree_path
|
||||
if not self.subtree_path:
|
||||
# Root tree uses global vlmeta
|
||||
vlmeta_key = "/__vlmeta__"
|
||||
else:
|
||||
# Subtree uses path-specific vlmeta: <subtree_path>/__vlmeta__
|
||||
vlmeta_key = f"{self.subtree_path}/__vlmeta__"
|
||||
|
||||
# Use super().__contains__ to bypass our own filtering logic
|
||||
if super().__contains__(vlmeta_key):
|
||||
# Load the current snapshot from the store to ensure freshness
|
||||
self._vlmeta = super().__getitem__(vlmeta_key)
|
||||
else:
|
||||
# Create a new, empty SChunk in memory. It will be persisted on first write.
|
||||
self._vlmeta = blosc2.SChunk()
|
||||
|
||||
# Store the key for _persist_vlmeta method
|
||||
self._vlmeta_key = vlmeta_key
|
||||
|
||||
# Return a fresh proxy that wraps the latest inner vlmeta
|
||||
return vlmetaProxy(self, self._vlmeta.vlmeta)
|
||||
|
||||
def _persist_vlmeta(self) -> None:
|
||||
"""Persist current vlmeta SChunk into the store.
|
||||
|
||||
This is needed because the EmbedStore keeps a serialized snapshot of
|
||||
stored objects; mutating the in-memory SChunk does not automatically
|
||||
update the snapshot. We emulate an update by deleting and re-adding
|
||||
the object in the embed store.
|
||||
"""
|
||||
if hasattr(self, "_vlmeta_key"):
|
||||
vlmeta_key = self._vlmeta_key
|
||||
# Only embedded case is expected; handle it safely.
|
||||
if hasattr(self, "_estore") and vlmeta_key in self._estore:
|
||||
# Replace the stored snapshot
|
||||
with contextlib.suppress(KeyError):
|
||||
del self._estore[vlmeta_key]
|
||||
self._estore[vlmeta_key] = self._vlmeta
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage
|
||||
localpath = "example_tstore.b2z"
|
||||
|
||||
with TreeStore(localpath, mode="w") as tstore:
|
||||
# Create a hierarchical structure.
|
||||
# Note: data is stored in leaf nodes, not structural nodes.
|
||||
tstore["/child0/data_node"] = np.array([1, 2, 3])
|
||||
tstore["/child0/child1/data_node"] = np.array([4, 5, 6])
|
||||
tstore["/child0/child2"] = np.array([7, 8, 9])
|
||||
tstore["/child0/child1/grandchild"] = np.array([10, 11, 12])
|
||||
tstore["/other"] = np.array([13, 14, 15])
|
||||
|
||||
print("TreeStore keys:", sorted(tstore.keys()))
|
||||
|
||||
# Test subtree view
|
||||
root_subtree = tstore["/child0"]
|
||||
root_subtree.vlmeta["foo"] = "bar"
|
||||
print("Subtree keys:", sorted(root_subtree.keys()))
|
||||
print("Subtree vlmeta:", root_subtree.vlmeta)
|
||||
|
||||
# Walk the tree
|
||||
for path, children, nodes in root_subtree.walk("/"):
|
||||
print(f"Path: {path}, Children: {children}, Nodes: {nodes}")
|
||||
|
||||
# Clean up
|
||||
if os.path.exists(localpath):
|
||||
os.remove(localpath)
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,2 @@
|
||||
__version__ = "4.1.2"
|
||||
__array_api_version__ = "2024.12"
|
||||
@ -0,0 +1,5 @@
|
||||
|
||||
import sys
|
||||
from cpuinfo.cpuinfo import *
|
||||
|
||||
|
||||
@ -0,0 +1,5 @@
|
||||
|
||||
import cpuinfo
|
||||
|
||||
cpuinfo.main()
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,132 @@
|
||||
#ifndef LIBTCC_H
|
||||
#define LIBTCC_H
|
||||
|
||||
#ifndef LIBTCCAPI
|
||||
# define LIBTCCAPI
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*****************************/
|
||||
/* set custom allocator for all allocations (optional), NULL for default. */
|
||||
typedef void *TCCReallocFunc(void *ptr, unsigned long size);
|
||||
LIBTCCAPI void tcc_set_realloc(TCCReallocFunc *my_realloc);
|
||||
|
||||
/*****************************/
|
||||
typedef struct TCCState TCCState;
|
||||
|
||||
/* create a new TCC compilation context */
|
||||
LIBTCCAPI TCCState *tcc_new(void);
|
||||
|
||||
/* free a TCC compilation context */
|
||||
LIBTCCAPI void tcc_delete(TCCState *s);
|
||||
|
||||
/* set CONFIG_TCCDIR at runtime */
|
||||
LIBTCCAPI void tcc_set_lib_path(TCCState *s, const char *path);
|
||||
|
||||
/* set error/warning callback (optional) */
|
||||
typedef void TCCErrorFunc(void *opaque, const char *msg);
|
||||
LIBTCCAPI void tcc_set_error_func(TCCState *s, void *error_opaque, TCCErrorFunc *error_func);
|
||||
|
||||
/* set options as from command line (multiple supported) */
|
||||
LIBTCCAPI int tcc_set_options(TCCState *s, const char *str);
|
||||
|
||||
/*****************************/
|
||||
/* preprocessor */
|
||||
|
||||
/* add include path */
|
||||
LIBTCCAPI int tcc_add_include_path(TCCState *s, const char *pathname);
|
||||
|
||||
/* add in system include path */
|
||||
LIBTCCAPI int tcc_add_sysinclude_path(TCCState *s, const char *pathname);
|
||||
|
||||
/* define preprocessor symbol 'sym'. value can be NULL, sym can be "sym=val" */
|
||||
LIBTCCAPI void tcc_define_symbol(TCCState *s, const char *sym, const char *value);
|
||||
|
||||
/* undefine preprocess symbol 'sym' */
|
||||
LIBTCCAPI void tcc_undefine_symbol(TCCState *s, const char *sym);
|
||||
|
||||
/*****************************/
|
||||
/* compiling */
|
||||
|
||||
/* add a file (C file, dll, object, library, ld script). Return -1 if error. */
|
||||
LIBTCCAPI int tcc_add_file(TCCState *s, const char *filename);
|
||||
|
||||
/* compile a string containing a C source. Return -1 if error. */
|
||||
LIBTCCAPI int tcc_compile_string(TCCState *s, const char *buf);
|
||||
|
||||
/* Tip: to have more specific errors/warnings from tcc_compile_string(),
|
||||
you can prefix the string with "#line <num> \"<filename>\"\n" */
|
||||
|
||||
/*****************************/
|
||||
/* linking commands */
|
||||
|
||||
/* set output type. MUST BE CALLED before any compilation */
|
||||
LIBTCCAPI int tcc_set_output_type(TCCState *s, int output_type);
|
||||
#define TCC_OUTPUT_MEMORY 1 /* output will be run in memory */
|
||||
#define TCC_OUTPUT_EXE 2 /* executable file */
|
||||
#define TCC_OUTPUT_DLL 4 /* dynamic library */
|
||||
#define TCC_OUTPUT_OBJ 3 /* object file */
|
||||
#define TCC_OUTPUT_PREPROCESS 5 /* only preprocess */
|
||||
|
||||
/* equivalent to -Lpath option */
|
||||
LIBTCCAPI int tcc_add_library_path(TCCState *s, const char *pathname);
|
||||
|
||||
/* the library name is the same as the argument of the '-l' option */
|
||||
LIBTCCAPI int tcc_add_library(TCCState *s, const char *libraryname);
|
||||
|
||||
/* add a symbol to the compiled program */
|
||||
LIBTCCAPI int tcc_add_symbol(TCCState *s, const char *name, const void *val);
|
||||
|
||||
/* output an executable, library or object file. DO NOT call
|
||||
tcc_relocate() before. */
|
||||
LIBTCCAPI int tcc_output_file(TCCState *s, const char *filename);
|
||||
|
||||
/* link and run main() function and return its value. DO NOT call
|
||||
tcc_relocate() before. */
|
||||
LIBTCCAPI int tcc_run(TCCState *s, int argc, char **argv);
|
||||
|
||||
/* do all relocations (needed before using tcc_get_symbol()) */
|
||||
LIBTCCAPI int tcc_relocate(TCCState *s1);
|
||||
|
||||
/* return symbol value or NULL if not found */
|
||||
LIBTCCAPI void *tcc_get_symbol(TCCState *s, const char *name);
|
||||
|
||||
/* list all (global) symbols and their values via 'symbol_cb()' */
|
||||
LIBTCCAPI void tcc_list_symbols(TCCState *s, void *ctx,
|
||||
void (*symbol_cb)(void *ctx, const char *name, const void *val));
|
||||
|
||||
/* experimental/advanced section (see libtcc_test_mt.c for an example) */
|
||||
|
||||
/* catch runtime exceptions (optionally limit backtraces at top_func),
|
||||
when using tcc_set_options("-bt") and when not using tcc_run() */
|
||||
LIBTCCAPI void *_tcc_setjmp(TCCState *s1, void *jmp_buf, void *top_func, void *longjmp);
|
||||
#define tcc_setjmp(s1,jb,f) setjmp(_tcc_setjmp(s1, jb, f, longjmp))
|
||||
|
||||
/* debugging */
|
||||
/* For debugging to work you have to enable it with tcc_set_options */
|
||||
|
||||
/* compile a string containing a C source. Return -1 if error.
|
||||
Write the string to file filename if debug is set. */
|
||||
LIBTCCAPI int tcc_compile_string_file(TCCState *s, const char *buf, const char *filename);
|
||||
|
||||
/* Output object file. This must be done after tcc_relocate.
|
||||
It only generates the file if debug is set.
|
||||
The filename can be loaded with gdb command add-symbol-file */
|
||||
LIBTCCAPI int elf_output_obj(TCCState *s1, const char *filename);
|
||||
|
||||
/* Set base address for wasm32 data/stack layout (default 1024).
|
||||
Call before tcc_output_file(). Only meaningful for TCC_TARGET_WASM32. */
|
||||
LIBTCCAPI void tcc_set_wasm_data_base(TCCState *s, unsigned int base);
|
||||
|
||||
/* custom error printer for runtime exceptions. Returning 0 stops backtrace */
|
||||
typedef int TCCBtFunc(void *udata, void *pc, const char *file, int line, const char* func, const char *msg);
|
||||
LIBTCCAPI void tcc_set_backtrace_func(TCCState *s1, void* userdata, TCCBtFunc*);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
Binary file not shown.
@ -0,0 +1 @@
|
||||
pip
|
||||
@ -0,0 +1,265 @@
|
||||
Metadata-Version: 2.4
|
||||
Name: msgpack
|
||||
Version: 1.1.2
|
||||
Summary: MessagePack serializer
|
||||
Author-email: Inada Naoki <songofacandy@gmail.com>
|
||||
License-Expression: Apache-2.0
|
||||
Project-URL: Homepage, https://msgpack.org/
|
||||
Project-URL: Documentation, https://msgpack-python.readthedocs.io/
|
||||
Project-URL: Repository, https://github.com/msgpack/msgpack-python/
|
||||
Project-URL: Tracker, https://github.com/msgpack/msgpack-python/issues
|
||||
Project-URL: Changelog, https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst
|
||||
Keywords: msgpack,messagepack,serializer,serialization,binary
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Topic :: File Formats
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Requires-Python: >=3.9
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: COPYING
|
||||
Dynamic: license-file
|
||||
|
||||
# MessagePack for Python
|
||||
|
||||
[](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml)
|
||||
[](https://msgpack-python.readthedocs.io/en/latest/?badge=latest)
|
||||
|
||||
## What is this?
|
||||
|
||||
[MessagePack](https://msgpack.org/) is an efficient binary serialization format.
|
||||
It lets you exchange data among multiple languages like JSON.
|
||||
But it's faster and smaller.
|
||||
This package provides CPython bindings for reading and writing MessagePack data.
|
||||
|
||||
## Install
|
||||
|
||||
```
|
||||
$ pip install msgpack
|
||||
```
|
||||
|
||||
### Pure Python implementation
|
||||
|
||||
The extension module in msgpack (`msgpack._cmsgpack`) does not support PyPy.
|
||||
|
||||
But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy.
|
||||
|
||||
|
||||
### Windows
|
||||
|
||||
If you can't use a binary distribution, you need to install Visual Studio
|
||||
or the Windows SDK on Windows.
|
||||
Without the extension, the pure Python implementation on CPython runs slowly.
|
||||
|
||||
|
||||
## How to use
|
||||
|
||||
### One-shot pack & unpack
|
||||
|
||||
Use `packb` for packing and `unpackb` for unpacking.
|
||||
msgpack provides `dumps` and `loads` as aliases for compatibility with
|
||||
`json` and `pickle`.
|
||||
|
||||
`pack` and `dump` pack to a file-like object.
|
||||
`unpack` and `load` unpack from a file-like object.
|
||||
|
||||
```pycon
|
||||
>>> import msgpack
|
||||
>>> msgpack.packb([1, 2, 3])
|
||||
'\x93\x01\x02\x03'
|
||||
>>> msgpack.unpackb(_)
|
||||
[1, 2, 3]
|
||||
```
|
||||
|
||||
Read the docstring for options.
|
||||
|
||||
|
||||
### Streaming unpacking
|
||||
|
||||
`Unpacker` is a "streaming unpacker". It unpacks multiple objects from one
|
||||
stream (or from bytes provided through its `feed` method).
|
||||
|
||||
```py
|
||||
import msgpack
|
||||
from io import BytesIO
|
||||
|
||||
buf = BytesIO()
|
||||
for i in range(100):
|
||||
buf.write(msgpack.packb(i))
|
||||
|
||||
buf.seek(0)
|
||||
|
||||
unpacker = msgpack.Unpacker(buf)
|
||||
for unpacked in unpacker:
|
||||
print(unpacked)
|
||||
```
|
||||
|
||||
|
||||
### Packing/unpacking of custom data types
|
||||
|
||||
It is also possible to pack/unpack custom data types. Here is an example for
|
||||
`datetime.datetime`.
|
||||
|
||||
```py
|
||||
import datetime
|
||||
import msgpack
|
||||
|
||||
useful_dict = {
|
||||
"id": 1,
|
||||
"created": datetime.datetime.now(),
|
||||
}
|
||||
|
||||
def decode_datetime(obj):
|
||||
if '__datetime__' in obj:
|
||||
obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f")
|
||||
return obj
|
||||
|
||||
def encode_datetime(obj):
|
||||
if isinstance(obj, datetime.datetime):
|
||||
return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")}
|
||||
return obj
|
||||
|
||||
|
||||
packed_dict = msgpack.packb(useful_dict, default=encode_datetime)
|
||||
this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)
|
||||
```
|
||||
|
||||
`Unpacker`'s `object_hook` callback receives a dict; the
|
||||
`object_pairs_hook` callback may instead be used to receive a list of
|
||||
key-value pairs.
|
||||
|
||||
NOTE: msgpack can encode datetime with tzinfo into standard ext type for now.
|
||||
See `datetime` option in `Packer` docstring.
|
||||
|
||||
|
||||
### Extended types
|
||||
|
||||
It is also possible to pack/unpack custom data types using the **ext** type.
|
||||
|
||||
```pycon
|
||||
>>> import msgpack
|
||||
>>> import array
|
||||
>>> def default(obj):
|
||||
... if isinstance(obj, array.array) and obj.typecode == 'd':
|
||||
... return msgpack.ExtType(42, obj.tostring())
|
||||
... raise TypeError("Unknown type: %r" % (obj,))
|
||||
...
|
||||
>>> def ext_hook(code, data):
|
||||
... if code == 42:
|
||||
... a = array.array('d')
|
||||
... a.fromstring(data)
|
||||
... return a
|
||||
... return ExtType(code, data)
|
||||
...
|
||||
>>> data = array.array('d', [1.2, 3.4])
|
||||
>>> packed = msgpack.packb(data, default=default)
|
||||
>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook)
|
||||
>>> data == unpacked
|
||||
True
|
||||
```
|
||||
|
||||
|
||||
### Advanced unpacking control
|
||||
|
||||
As an alternative to iteration, `Unpacker` objects provide `unpack`,
|
||||
`skip`, `read_array_header`, and `read_map_header` methods. The former two
|
||||
read an entire message from the stream, respectively deserializing and returning
|
||||
the result, or ignoring it. The latter two methods return the number of elements
|
||||
in the upcoming container, so that each element in an array, or key-value pair
|
||||
in a map, can be unpacked or skipped individually.
|
||||
|
||||
|
||||
## Notes
|
||||
|
||||
### String and binary types in the old MessagePack spec
|
||||
|
||||
Early versions of msgpack didn't distinguish string and binary types.
|
||||
The type for representing both string and binary types was named **raw**.
|
||||
|
||||
You can pack into and unpack from this old spec using `use_bin_type=False`
|
||||
and `raw=True` options.
|
||||
|
||||
```pycon
|
||||
>>> import msgpack
|
||||
>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=False), raw=True)
|
||||
[b'spam', b'eggs']
|
||||
>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=True), raw=False)
|
||||
[b'spam', 'eggs']
|
||||
```
|
||||
|
||||
### ext type
|
||||
|
||||
To use the **ext** type, pass a `msgpack.ExtType` object to the packer.
|
||||
|
||||
```pycon
|
||||
>>> import msgpack
|
||||
>>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy'))
|
||||
>>> msgpack.unpackb(packed)
|
||||
ExtType(code=42, data='xyzzy')
|
||||
```
|
||||
|
||||
You can use it with `default` and `ext_hook`. See below.
|
||||
|
||||
|
||||
### Security
|
||||
|
||||
When unpacking data received from an unreliable source, msgpack provides
|
||||
two security options.
|
||||
|
||||
`max_buffer_size` (default: `100*1024*1024`) limits the internal buffer size.
|
||||
It is also used to limit preallocated list sizes.
|
||||
|
||||
`strict_map_key` (default: `True`) limits the type of map keys to bytes and str.
|
||||
While the MessagePack spec doesn't limit map key types,
|
||||
there is a risk of a hash DoS.
|
||||
If you need to support other types for map keys, use `strict_map_key=False`.
|
||||
|
||||
|
||||
### Performance tips
|
||||
|
||||
CPython's GC starts when the number of allocated objects grows.
|
||||
This means unpacking may trigger unnecessary GC.
|
||||
You can use `gc.disable()` when unpacking a large message.
|
||||
|
||||
A list is the default sequence type in Python.
|
||||
However, a tuple is lighter than a list.
|
||||
You can use `use_list=False` while unpacking when performance is important.
|
||||
|
||||
|
||||
## Major breaking changes in the history
|
||||
|
||||
### msgpack 0.5
|
||||
|
||||
The package name on PyPI was changed from `msgpack-python` to `msgpack` in 0.5.
|
||||
|
||||
When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before
|
||||
`pip install -U msgpack`.
|
||||
|
||||
|
||||
### msgpack 1.0
|
||||
|
||||
* Python 2 support
|
||||
|
||||
* The extension module no longer supports Python 2.
|
||||
The pure Python implementation (`msgpack.fallback`) is used for Python 2.
|
||||
|
||||
* msgpack 1.0.6 drops official support of Python 2.7, as pip and
|
||||
GitHub Action "setup-python" no longer supports Python 2.7.
|
||||
|
||||
* Packer
|
||||
|
||||
* Packer uses `use_bin_type=True` by default.
|
||||
Bytes are encoded in the bin type in MessagePack.
|
||||
* The `encoding` option is removed. UTF-8 is always used.
|
||||
|
||||
* Unpacker
|
||||
|
||||
* Unpacker uses `raw=False` by default. It assumes str values are valid UTF-8 strings
|
||||
and decodes them to Python str (Unicode) objects.
|
||||
* `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str).
|
||||
* The default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attacks.
|
||||
You need to pass `max_buffer_size=0` if you have large but safe data.
|
||||
* The default value of `strict_map_key` is changed to True to avoid hash DoS.
|
||||
You need to pass `strict_map_key=False` if you have data that contain map keys
|
||||
whose type is neither bytes nor str.
|
||||
@ -0,0 +1,15 @@
|
||||
msgpack-1.1.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
msgpack-1.1.2.dist-info/METADATA,sha256=AzsCYs3CsW_Ihmrc0TKSERKnM0C4ZRHj0obd6hZ7EWQ,8356
|
||||
msgpack-1.1.2.dist-info/RECORD,,
|
||||
msgpack-1.1.2.dist-info/WHEEL,sha256=JLOMsP7F5qtkAkINx5UnzbFguf8CqZeraV8o04b0I8I,101
|
||||
msgpack-1.1.2.dist-info/licenses/COPYING,sha256=T73_QuukWTwW96fqcHiIOytzSHCh4rY2KEzi3OYr9Pc,628
|
||||
msgpack-1.1.2.dist-info/top_level.txt,sha256=2tykSY1pXdiA2xYTDR6jPw0qI5ZGxRihyhf4S5hZyXk,8
|
||||
msgpack/__init__.py,sha256=VyYtXI_OKFlyox4xlPRWvwU74d5ll6L6Oj01CJcxdqg,1164
|
||||
msgpack/__pycache__/__init__.cpython-311.pyc,,
|
||||
msgpack/__pycache__/exceptions.cpython-311.pyc,,
|
||||
msgpack/__pycache__/ext.cpython-311.pyc,,
|
||||
msgpack/__pycache__/fallback.cpython-311.pyc,,
|
||||
msgpack/_cmsgpack.cp311-win_amd64.pyd,sha256=7zWzgU8v1k7_LDlx4HhhNaKcEa6b2p-HBXH988Wr-BY,128000
|
||||
msgpack/exceptions.py,sha256=2fCtczricqQgdT3NtW6cTqmZn3WA7GQtmlPuT-NhLyM,1129
|
||||
msgpack/ext.py,sha256=9gDKxuEHfYWdPRzcpFwFYyuBx0puprlQflDGOaccRhE,5896
|
||||
msgpack/fallback.py,sha256=EAP6g9N7tTWvTw01RtnXXdYg-zZn21FcGVfdBid8aUg,33319
|
||||
@ -0,0 +1,5 @@
|
||||
Wheel-Version: 1.0
|
||||
Generator: setuptools (80.9.0)
|
||||
Root-Is-Purelib: false
|
||||
Tag: cp311-cp311-win_amd64
|
||||
|
||||
@ -0,0 +1,14 @@
|
||||
Copyright (C) 2008-2011 INADA Naoki <songofacandy@gmail.com>
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
@ -0,0 +1 @@
|
||||
msgpack
|
||||
@ -0,0 +1,55 @@
|
||||
# ruff: noqa: F401
|
||||
import os
|
||||
|
||||
from .exceptions import * # noqa: F403
|
||||
from .ext import ExtType, Timestamp
|
||||
|
||||
version = (1, 1, 2)
|
||||
__version__ = "1.1.2"
|
||||
|
||||
|
||||
if os.environ.get("MSGPACK_PUREPYTHON"):
|
||||
from .fallback import Packer, Unpacker, unpackb
|
||||
else:
|
||||
try:
|
||||
from ._cmsgpack import Packer, Unpacker, unpackb
|
||||
except ImportError:
|
||||
from .fallback import Packer, Unpacker, unpackb
|
||||
|
||||
|
||||
def pack(o, stream, **kwargs):
|
||||
"""
|
||||
Pack object `o` and write it to `stream`
|
||||
|
||||
See :class:`Packer` for options.
|
||||
"""
|
||||
packer = Packer(**kwargs)
|
||||
stream.write(packer.pack(o))
|
||||
|
||||
|
||||
def packb(o, **kwargs):
|
||||
"""
|
||||
Pack object `o` and return packed bytes
|
||||
|
||||
See :class:`Packer` for options.
|
||||
"""
|
||||
return Packer(**kwargs).pack(o)
|
||||
|
||||
|
||||
def unpack(stream, **kwargs):
|
||||
"""
|
||||
Unpack an object from `stream`.
|
||||
|
||||
Raises `ExtraData` when `stream` contains extra bytes.
|
||||
See :class:`Unpacker` for options.
|
||||
"""
|
||||
data = stream.read()
|
||||
return unpackb(data, **kwargs)
|
||||
|
||||
|
||||
# alias for compatibility to simplejson/marshal/pickle.
|
||||
load = unpack
|
||||
loads = unpackb
|
||||
|
||||
dump = pack
|
||||
dumps = packb
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue