fix: 增加可交易日和涨跌停标志计算

master
Lxy 3 months ago
parent bcdcf165d2
commit 2bb5ce4a0e

@ -0,0 +1,167 @@
# 股票K线接口响应数据说明
## 接口信息
**接口地址**: `GET /v1/stock/klines/{symbol}`
**请求参数**:
| 参数 | 类型 | 必填 | 说明 |
|------|------|------|------|
| symbol | string | 是 | 标的代码,如 000001.SZ |
| start | string | 是 | 开始日期,格式 YYYYMMDD |
| end | string | 是 | 结束日期,格式 YYYYMMDD |
| freq | string | 否 | 周期,默认 1d (1m/5m/15m/30m/60m/1d/1w/1M) |
| adjust | string | 否 | 复权类型,默认空 (qfq/hfq) |
**请求头**:
```
X-API-Key: your_api_key
```
---
## 响应数据结构
### 顶层结构
```json
{
"code": 0,
"message": "success",
"data": {
"symbol": "000001.SZ",
"name": "平安银行",
"freq": "1d",
"adjust": "NONE",
"count": 10,
"items": [
// K线数据项数组
]
}
}
```
### K线数据项 (items)
每个 K 线数据项包含以下字段:
#### 基础字段
| 字段名 | 类型 | 说明 | 示例 |
|--------|------|------|------|
| symbol | string | 标的代码 | "000001.SZ" |
| time | string (ISO8601) | 时间戳 | "2026-03-01T00:00:00" |
| open | number | 开盘价 | 10.5 |
| high | number | 最高价 | 11.2 |
| low | number | 最低价 | 10.3 |
| close | number | 收盘价 | 10.8 |
| volume | integer | 成交量(股) | 100000 |
| amount | number | 成交金额(元) | 1080000 |
#### 扩展字段
| 字段名 | 类型 | 说明 | 示例 |
|--------|------|------|------|
| trade_date | string | 交易日 (YYYY-MM-DD) | "2026-03-01" |
| is_limit_up | boolean | 是否涨停 | false |
| is_limit_down | boolean | 是否跌停 | false |
| total_market_cap | number | 总市值(元) | 1500000000 |
| float_market_cap | number | 流通市值(元) | 1200000000 |
| inst_holding_ratio | number | 机构持仓占比(%) | 25.5 |
| trading_days | integer | 可交易日数(从上市至今) | 5000 |
| created_at | string (ISO8601) | 数据创建时间 | "2026-03-01T12:00:00" |
---
## 完整响应示例
```json
{
"code": 0,
"message": "success",
"data": {
"symbol": "000001.SZ",
"freq": "1d",
"adjust": "NONE",
"count": 2,
"items": [
{
"symbol": "000001.SZ",
"time": "2026-03-01T00:00:00",
"open": 10.5,
"high": 11.2,
"low": 10.3,
"close": 10.8,
"volume": 100000,
"amount": 1080000,
"trade_date": "2026-03-01",
"is_limit_up": false,
"is_limit_down": false,
"total_market_cap": 1500000000,
"float_market_cap": 1200000000,
"inst_holding_ratio": 25.5,
"trading_days": 5000,
"created_at": "2026-03-01T12:00:00"
},
{
"symbol": "000001.SZ",
"time": "2026-03-02T00:00:00",
"open": 10.8,
"high": 11.5,
"low": 10.6,
"close": 11.2,
"volume": 120000,
"amount": 1344000,
"trade_date": "2026-03-02",
"is_limit_up": true,
"is_limit_down": false,
"total_market_cap": 1550000000,
"float_market_cap": 1240000000,
"inst_holding_ratio": 25.6,
"trading_days": 5001,
"created_at": "2026-03-02T12:00:00"
}
]
}
}
```
---
## 字段说明
### 涨停跌停判断
- **is_limit_up**: 当日是否涨停(收盘价 >= 涨停价)
- **is_limit_down**: 当日是否跌停(收盘价 <= 跌停价)
### 市值数据
- **total_market_cap**: 总市值 = 收盘价 × 总股本
- **float_market_cap**: 流通市值 = 收盘价 × 流通股本
### 机构持仓
- **inst_holding_ratio**: 机构持仓占比,表示机构投资者持有该股票的比例
### 交易日期
- **trade_date**: 格式为 "YYYY-MM-DD",方便前端展示
- **trading_days**: 从上市至今的可交易日数
---
## 测试脚本
使用以下命令测试接口:
```bash
python test_klines_api.py
```
或直接 curl
```bash
curl -X GET "http://localhost:8080/v1/stock/klines/000001.SZ?start=20260301&end=20260310&freq=1d" \
-H "X-API-Key: demo-api-key-2024"
```

@ -82,6 +82,47 @@ class AmazingDataAdapter(DataSourceAdapter):
else: else:
raise ValueError(f"不支持的日期格式: {d}") raise ValueError(f"不支持的日期格式: {d}")
def _get_list_date(self, symbol: str) -> Optional[int]:
"""获取股票上市日期
Returns:
上市日期 (YYYYMMDD格式)如果获取失败返回None
"""
try:
# 方法1尝试从代码信息中获取
code_info_df = self._base_data.get_code_info(security_type=SecurityType.STOCK_A.value)
if symbol in code_info_df.index:
# 尝试不同的字段名
for field in ['list_date', 'LIST_DATE', 'listDate', 'founded_date']:
if field in code_info_df.columns:
list_date_val = code_info_df.loc[symbol, field]
if pd.notna(list_date_val):
# 处理不同格式的日期
if isinstance(list_date_val, str):
return int(list_date_val.replace('-', ''))
elif isinstance(list_date_val, (int, float)):
return int(list_date_val)
elif isinstance(list_date_val, pd.Timestamp):
return int(list_date_val.strftime('%Y%m%d'))
# 方法2尝试从历史代码列表获取
try:
hist_codes = self._base_data.get_hist_code_list(security_type=SecurityType.STOCK_A.value)
if symbol in hist_codes.index and 'list_date' in hist_codes.columns:
list_date_val = hist_codes.loc[symbol, 'list_date']
if pd.notna(list_date_val):
if isinstance(list_date_val, str):
return int(list_date_val.replace('-', ''))
elif isinstance(list_date_val, (int, float)):
return int(list_date_val)
except:
pass
return None
except Exception as e:
print(f"[amazingdata_adapter]获取上市日期失败: {e}")
return None
async def connect(self, config: dict) -> None: async def connect(self, config: dict) -> None:
"""建立连接""" """建立连接"""
try: try:
@ -210,75 +251,167 @@ class AmazingDataAdapter(DataSourceAdapter):
print(f"[amazingdata_adapter _fetch_klines_sync]正在获取K线数据: 代码={codes}, 日期范围={start_date}~{end_date}, 周期={period_value}") print(f"[amazingdata_adapter _fetch_klines_sync]正在获取K线数据: 代码={codes}, 日期范围={start_date}~{end_date}, 周期={period_value}")
# 获取K线数据 - 将周期值转换为 SDK 的常量 # 获取K线数据 - 将周期值转换为 SDK 的常量
print(f"[amazingdata_adapter _fetch_klines_sync]SDK 周期值: {period_value}, type: {type(period_value)}")
kline_dict = self._market_data.query_kline( kline_dict = self._market_data.query_kline(
code_list=codes, code_list=codes,
begin_date=start_int, begin_date=start_int,
end_date=end_int, end_date=end_int,
period=period_value period=period_value
) )
print(f"[amazingdata_adapter _fetch_klines_sync]已同步获取 {symbol}{period_value} 周期数据")
print(f"[amazingdata_adapter _fetch_klines_sync]数据预览: {kline_dict.get(symbol).head() if symbol in kline_dict else '无数据'}")
if symbol not in kline_dict:
info(f"No kline data found for {symbol}")
return []
print(f"[amazingdata_adapter _fetch_klines_sync]获取到 {kline_dict} 的K线数据") # 打印原始数据结构
df = kline_dict[symbol]
print(f"[amazingdata_adapter _fetch_klines_sync]获取到 {len(df)} 条K线数据")
# ============================================
# 1. 获取证券基本信息(涨停价、跌停价)
# ============================================
print(f"[amazingdata_adapter _fetch_klines_sync]正在获取证券基本信息...")
try:
code_info_df = self._base_data.get_code_info(security_type=SecurityType.STOCK_A.value)
# 提取当前股票的涨停价和跌停价
if symbol in code_info_df.index:
high_limited = float(code_info_df.loc[symbol, 'high_limited']) if 'high_limited' in code_info_df.columns else None
low_limited = float(code_info_df.loc[symbol, 'low_limited']) if 'low_limited' in code_info_df.columns else None
print(f"[amazingdata_adapter _fetch_klines_sync]涨停价: {high_limited}, 跌停价: {low_limited}")
else:
high_limited = None
low_limited = None
print(f"[amazingdata_adapter _fetch_klines_sync]未找到 {symbol} 的涨跌停价格")
except Exception as e:
print(f"[amazingdata_adapter _fetch_klines_sync]获取证券信息失败: {e}")
high_limited = None
low_limited = None
# ============================================
# 2. 获取股本结构(总股本、流通股)
# ============================================
print(f"[amazingdata_adapter _fetch_klines_sync]正在获取股本结构...")
try:
equity_dict = self._info_data.get_equity_structure(
code_list=codes,
local_path=self.config.local_path,
is_local=self.config.use_local_cache
)
if symbol in equity_dict:
equity_df = equity_dict[symbol]
# 获取最新的股本数据
if not equity_df.empty:
latest_equity = equity_df.iloc[-1]
tot_share = float(latest_equity.get('TOT_SHARE', 0)) * 10000 # 万股转股
float_share = float(latest_equity.get('FLOAT_SHARE', 0)) * 10000 # 万股转股
print(f"[amazingdata_adapter _fetch_klines_sync]总股本: {tot_share}, 流通股: {float_share}")
else:
tot_share = 0
float_share = 0
else:
tot_share = 0
float_share = 0
except Exception as e:
print(f"[amazingdata_adapter _fetch_klines_sync]获取股本结构失败: {e}")
tot_share = 0
float_share = 0
# ============================================
# 3. 获取交易日历和上市日期
# ============================================
print(f"[amazingdata_adapter _fetch_klines_sync]正在获取交易日历...")
try:
# 获取交易日历
calendar = self._base_data.get_calendar(market=Market.SH.value)
# 获取股票上市日期
list_date = self._get_list_date(symbol)
if list_date is None:
list_date = min(calendar) if calendar else start_int
print(f"[amazingdata_adapter _fetch_klines_sync]上市日期: {list_date}")
except Exception as e:
print(f"[amazingdata_adapter _fetch_klines_sync]获取交易日历失败: {e}")
calendar = []
list_date = start_int
# ============================================
# 4. 处理K线数据并补充字段
# ============================================
results = [] results = []
if symbol in kline_dict: for _, row in df.iterrows():
df = kline_dict[symbol] # 从 kline_time 列获取日期
print(f"[amazingdata_adapter _fetch_klines_sync]DataFrame columns: {df.columns.tolist()}") kline_time = row.get('kline_time')
print(f"[amazingdata_adapter _fetch_klines_sync]DataFrame head:\n{df.head()}") if pd.isna(kline_time) or kline_time is None:
continue
for _, row in df.iterrows():
# 从 kline_time 列获取日期AmazingData 返回的日期字段)
kline_time = row.get('kline_time')
if pd.isna(kline_time) or kline_time is None:
print(f"[amazingdata_adapter _fetch_klines_sync]跳过无效日期: kline_time 为空")
continue
try:
# kline_time 可能是 Timestamp 或整数 YYYYMMDD
if isinstance(kline_time, pd.Timestamp):
ts = int(kline_time.timestamp())
trade_date = kline_time.strftime('%Y-%m-%d')
else:
# 整数格式 YYYYMMDD
date_str = str(int(kline_time))
if len(date_str) != 8:
print(f"[amazingdata_adapter _fetch_klines_sync]跳过无效日期: {date_str}")
continue
dt = datetime.strptime(date_str, "%Y%m%d")
ts = int(dt.timestamp())
trade_date = dt.strftime('%Y-%m-%d')
except (ValueError, TypeError) as e:
print(f"[amazingdata_adapter _fetch_klines_sync]日期解析错误 '{kline_time}' (type: {type(kline_time)}): {e}")
continue
# 从 DataFrame 提取扩展字段(如果存在)
is_limit_up = bool(row.get('is_limit_up')) if 'is_limit_up' in df.columns else None
is_limit_down = bool(row.get('is_limit_down')) if 'is_limit_down' in df.columns else None
total_market_cap = float(row.get('total_market_cap')) if 'total_market_cap' in df.columns and pd.notna(row.get('total_market_cap')) else None
float_market_cap = float(row.get('float_market_cap')) if 'float_market_cap' in df.columns and pd.notna(row.get('float_market_cap')) else None
inst_holding_ratio = float(row.get('inst_holding_ratio')) if 'inst_holding_ratio' in df.columns and pd.notna(row.get('inst_holding_ratio')) else None
trading_days = int(row.get('trading_days')) if 'trading_days' in df.columns and pd.notna(row.get('trading_days')) else None
results.append(KLineData(
symbol=symbol,
time=ts,
open=float(row.get('open', 0)),
high=float(row.get('high', 0)),
low=float(row.get('low', 0)),
close=float(row.get('close', 0)),
volume=int(row.get('volume', 0)),
amount=float(row.get('amount', 0)),
trade_date=trade_date,
is_limit_up=is_limit_up,
is_limit_down=is_limit_down,
total_market_cap=total_market_cap,
float_market_cap=float_market_cap,
inst_holding_ratio=inst_holding_ratio,
trading_days=trading_days
))
info(f"Fetched {len(results)} klines from AmazingData for {symbol}") try:
# 解析日期
if isinstance(kline_time, pd.Timestamp):
ts = int(kline_time.timestamp())
trade_date = kline_time.strftime('%Y-%m-%d')
trade_date_int = int(kline_time.strftime('%Y%m%d'))
else:
date_str = str(int(kline_time))
if len(date_str) != 8:
continue
dt = datetime.strptime(date_str, "%Y%m%d")
ts = int(dt.timestamp())
trade_date = dt.strftime('%Y-%m-%d')
trade_date_int = int(date_str)
except (ValueError, TypeError) as e:
continue
# 获取收盘价
close = float(row.get('close', 0))
# ============================================
# 4.1 判断是否涨跌停
# ============================================
is_limit_up = False
is_limit_down = False
if high_limited and low_limited and close > 0:
# 涨停:收盘价 >= 涨停价 * 0.995允许0.5%误差)
is_limit_up = close >= high_limited * 0.995
# 跌停:收盘价 <= 跌停价 * 1.005允许0.5%误差)
is_limit_down = close <= low_limited * 1.005
# ============================================
# 4.2 计算市值
# ============================================
total_market_cap = close * tot_share if tot_share > 0 and close > 0 else None
float_market_cap = close * float_share if float_share > 0 and close > 0 else None
# ============================================
# 4.3 计算可交易日数
# ============================================
trading_days = None
if calendar and list_date:
# 计算从上市日期到当前交易日的交易日数
trading_days = sum(1 for d in calendar if list_date <= d <= trade_date_int)
# 机构持仓占比( AmazingData K线数据可能包含如果没有则设为None
inst_holding_ratio = None
if 'inst_holding_ratio' in df.columns and pd.notna(row.get('inst_holding_ratio')):
inst_holding_ratio = float(row.get('inst_holding_ratio'))
results.append(KLineData(
symbol=symbol,
time=ts,
open=float(row.get('open', 0)),
high=float(row.get('high', 0)),
low=float(row.get('low', 0)),
close=close,
volume=int(row.get('volume', 0)),
amount=float(row.get('amount', 0)),
trade_date=trade_date,
is_limit_up=is_limit_up,
is_limit_down=is_limit_down,
total_market_cap=total_market_cap,
float_market_cap=float_market_cap,
inst_holding_ratio=inst_holding_ratio,
trading_days=trading_days
))
info(f"Fetched {len(results)} klines with extended fields from AmazingData for {symbol}")
return results return results
async def fetch_symbols(self, asset_type: str) -> List[SymbolInfo]: async def fetch_symbols(self, asset_type: str) -> List[SymbolInfo]:

@ -32,23 +32,25 @@ class StockService:
except ValueError as e: except ValueError as e:
raise ValueError(f"Invalid date format: {e}") raise ValueError(f"Invalid date format: {e}")
# todo 暂时不从数据库获取,后期放开
# 获取K线数据从数据库 # 获取K线数据从数据库
items = self.repository.get_klines( # items = self.repository.get_klines(
req.symbol, # req.symbol,
req.freq, # req.freq,
start, # start,
end, # end,
req.adjust # req.adjust
) # )
items = [] # 先不从数据库获取,直接从适配器获取
# 如果数据库没有数据,尝试从适配器获取 # 如果数据库没有数据,尝试从适配器获取
if not items: if not items:
info(f"No data in DB for {req.symbol}, fetching from adapter...") info(f"No data in DB for {req.symbol}, fetching from adapter...")
items = self._fetch_from_adapter(req.symbol, req.start, req.end, req.freq) items = self._fetch_from_adapter(req.symbol, req.start, req.end, req.freq)
# 保存到数据库 # # 保存到数据库
if items: # if items:
self._save_klines_to_db(req.symbol, req.freq, items) # self._save_klines_to_db(req.symbol, req.freq, items)
# 处理复权(简化实现,实际需要复权系数表) # 处理复权(简化实现,实际需要复权系数表)
if req.adjust != AdjustType.NONE: if req.adjust != AdjustType.NONE:

@ -0,0 +1,112 @@
"""测试股票K线接口返回的字段"""
import requests
import json
# API 配置
BASE_URL = "http://localhost:8080/v1"
API_KEY = "demo-api-key-2024"
# 测试获取股票K线
def test_stock_klines():
"""测试股票K线接口返回的字段"""
url = f"{BASE_URL}/stock/klines/000001.SZ"
headers = {"X-API-Key": API_KEY}
params = {
"start": "20260301",
"end": "20260310",
"freq": "1d"
}
print(f"\n{'='*60}")
print(f"测试接口: GET {url}")
print(f"{'='*60}")
try:
response = requests.get(url, headers=headers, params=params)
data = response.json()
if data.get("code") == 0:
kline_data = data.get("data", {})
items = kline_data.get("items", [])
print(f"\n标的: {kline_data.get('symbol')}")
print(f"周期: {kline_data.get('freq')}")
print(f"数据条数: {len(items)}")
print(f"\n{'='*60}")
if items:
# 显示第一条数据的完整字段
first_item = items[0]
print("\n第一条数据详情:")
print(f"{'-'*60}")
# 基础字段
print(f"时间戳: {first_item.get('time')}")
print(f"开盘价: {first_item.get('open')}")
print(f"最高价: {first_item.get('high')}")
print(f"最低价: {first_item.get('low')}")
print(f"收盘价: {first_item.get('close')}")
print(f"成交量: {first_item.get('volume')}")
print(f"成交额: {first_item.get('amount')}")
# 扩展字段
print(f"\n扩展字段:")
print(f" 交易日: {first_item.get('trade_date')}")
print(f" 是否涨停: {first_item.get('is_limit_up')}")
print(f" 是否跌停: {first_item.get('is_limit_down')}")
print(f" 总市值: {first_item.get('total_market_cap')}")
print(f" 流通市值: {first_item.get('float_market_cap')}")
print(f" 机构持仓占比: {first_item.get('inst_holding_ratio')}")
print(f" 可交易日数: {first_item.get('trading_days')}")
print(f" 创建时间: {first_item.get('created_at')}")
# 验证所有字段是否存在
expected_fields = [
'symbol', 'time', 'open', 'high', 'low', 'close',
'volume', 'amount', 'trade_date', 'is_limit_up',
'is_limit_down', 'total_market_cap', 'float_market_cap',
'inst_holding_ratio', 'trading_days', 'created_at'
]
print(f"\n{'='*60}")
print("字段完整性检查:")
print(f"{'-'*60}")
missing_fields = []
for field in expected_fields:
if field in first_item:
print(f"{field}")
else:
print(f"{field} (缺失)")
missing_fields.append(field)
if missing_fields:
print(f"\n缺失字段: {', '.join(missing_fields)}")
else:
print(f"\n所有字段都存在!")
return True
else:
print("没有获取到数据")
return False
else:
print(f"请求失败: {data.get('message')}")
return False
except Exception as e:
print(f"请求异常: {e}")
return False
if __name__ == "__main__":
print("\n" + "="*60)
print("股票K线接口字段测试")
print("="*60)
success = test_stock_klines()
print(f"\n{'='*60}")
if success:
print("测试完成!")
else:
print("测试失败!")
print("="*60 + "\n")

@ -0,0 +1,133 @@
"""测试K线数据扩展字段获取"""
import asyncio
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from app.adapters.amazingdata_adapter import AmazingDataAdapter
from datetime import datetime
async def test_klines_with_extended_fields():
"""测试获取带有扩展字段的K线数据"""
print("\n" + "="*60)
print("测试K线数据扩展字段")
print("="*60)
adapter = AmazingDataAdapter()
# 连接配置(请根据实际情况修改)
config = {
"username": os.getenv("AMAZINGDATA_USERNAME", "11200008169"),
"password": os.getenv("AMAZINGDATA_PASSWORD", "11200008169@2026"),
"host": os.getenv("AMAZINGDATA_HOST", "140.206.44.234"),
"port": int(os.getenv("AMAZINGDATA_PORT", "8600")),
"local_path": "./amazing_data_cache/",
"use_local_cache": True
}
try:
# 连接适配器
print("\n[1/3] 正在连接 AmazingData...")
await adapter.connect(config)
print("✓ 连接成功")
# 获取K线数据
symbol = "000001.SZ" # 平安银行
start_date = "20260301"
end_date = "20260310"
print(f"\n[2/3] 正在获取 {symbol} 的K线数据 ({start_date} ~ {end_date})...")
klines = await adapter.fetch_klines(symbol, start_date, end_date, "1d")
print(f"✓ 获取到 {len(klines)} 条K线数据")
# 显示第一条数据的完整信息
if klines:
print(f"\n[3/3] 数据字段验证")
print("-"*60)
k = klines[0]
print(f"\n标的代码: {k.symbol}")
print(f"交易日: {k.trade_date}")
print(f"时间戳: {datetime.fromtimestamp(k.time)}")
print(f"\n基础行情:")
print(f" 开盘价: {k.open}")
print(f" 最高价: {k.high}")
print(f" 最低价: {k.low}")
print(f" 收盘价: {k.close}")
print(f" 成交量: {k.volume}")
print(f" 成交额: {k.amount}")
print(f"\n扩展字段:")
print(f" 是否涨停: {k.is_limit_up} {'' if k.is_limit_up is not None else ''}")
print(f" 是否跌停: {k.is_limit_down} {'' if k.is_limit_down is not None else ''}")
print(f" 总市值: {k.total_market_cap:,.0f}" if k.total_market_cap else " 总市值: None ✗")
print(f" 流通市值: {k.float_market_cap:,.0f}" if k.float_market_cap else " 流通市值: None ✗")
print(f" 机构持仓占比: {k.inst_holding_ratio}%" if k.inst_holding_ratio else " 机构持仓占比: None")
print(f" 可交易日数: {k.trading_days} {'' if k.trading_days else ''}")
# 验证字段完整性
print(f"\n{'='*60}")
print("字段完整性检查:")
print("-"*60)
checks = [
("symbol", k.symbol is not None),
("time", k.time > 0),
("open", k.open > 0),
("high", k.high > 0),
("low", k.low > 0),
("close", k.close > 0),
("volume", k.volume > 0),
("amount", k.amount > 0),
("trade_date", k.trade_date is not None),
("is_limit_up", k.is_limit_up is not None),
("is_limit_down", k.is_limit_down is not None),
("total_market_cap", k.total_market_cap is not None and k.total_market_cap > 0),
("float_market_cap", k.float_market_cap is not None and k.float_market_cap > 0),
("trading_days", k.trading_days is not None and k.trading_days > 0),
]
passed = 0
for field, check in checks:
status = "" if check else ""
print(f" {status} {field}")
if check:
passed += 1
print(f"\n通过: {passed}/{len(checks)}")
# 显示涨跌停判断逻辑验证
print(f"\n{'='*60}")
print("涨跌停判断示例:")
print("-"*60)
for k in klines[:3]: # 显示前3条
limit_status = ""
if k.is_limit_up:
limit_status = "📈 涨停"
elif k.is_limit_down:
limit_status = "📉 跌停"
else:
limit_status = ""
print(f" {k.trade_date}: 收盘{k.close} {limit_status}")
# 断开连接
await adapter.close()
print(f"\n{'='*60}")
print("测试完成!")
print("="*60 + "\n")
return True
except Exception as e:
print(f"\n✗ 测试失败: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = asyncio.run(test_klines_with_extended_fields())
sys.exit(0 if success else 1)

@ -0,0 +1,186 @@
Metadata-Version: 2.4
Name: blosc2
Version: 4.1.2
Summary: A fast & compressed ndarray library with a flexible compute engine.
Author-Email: Blosc Development Team <blosc@blosc.org>
Maintainer-Email: Blosc Development Team <blosc@blosc.org>
License-Expression: BSD-3-Clause
License-File: LICENSE.txt
Classifier: Development Status :: 6 - Mature
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Information Technology
Classifier: Intended Audience :: Science/Research
Classifier: Programming Language :: Python
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Operating System :: Microsoft :: Windows
Classifier: Operating System :: Unix
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Project-URL: homepage, https://github.com/Blosc/python-blosc2
Project-URL: documentation, https://www.blosc.org/python-blosc2/python-blosc2.html
Requires-Python: >=3.10
Requires-Dist: numpy>=1.26
Requires-Dist: ndindex
Requires-Dist: msgpack
Requires-Dist: numexpr>=2.14.1; platform_machine != "wasm32"
Requires-Dist: requests
Description-Content-Type: text/x-rst
=============
Python-Blosc2
=============
A fast & compressed ndarray library with a flexible compute engine
==================================================================
:Author: The Blosc development team
:Contact: blosc@blosc.org
:Github: https://github.com/Blosc/python-blosc2
:Actions: |actions|
:PyPi: |version|
:NumFOCUS: |numfocus|
:Code of Conduct: |Contributor Covenant|
.. |version| image:: https://img.shields.io/pypi/v/blosc2.svg
:target: https://pypi.python.org/pypi/blosc2
.. |Contributor Covenant| image:: https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg
:target: https://github.com/Blosc/community/blob/master/code_of_conduct.md
.. |numfocus| image:: https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A
:target: https://numfocus.org
.. |actions| image:: https://github.com/Blosc/python-blosc2/actions/workflows/build.yml/badge.svg
:target: https://github.com/Blosc/python-blosc2/actions/workflows/build.yml
What is Python-Blosc2?
=======================
Python-Blosc2 is a high-performance compressed ndarray library with a flexible
compute engine, using `C-Blosc2 <https://www.blosc.org/c-blosc2/c-blosc2.html>`_
as its compression backend. It allows complex calculations on compressed data,
whether stored in memory, on disk, or over the network (e.g., via
`Caterva2 <https://github.com/ironArray/Caterva2>`_). It uses the
`C-Blosc2 simple and open format
<https://github.com/Blosc/c-blosc2/blob/main/README_FORMAT.rst>`_ for storing
compressed data.
More info: https://www.blosc.org/python-blosc2/getting_started/overview.html
Installing
==========
Binary packages are available for major OSes (Win, Mac, Linux) and platforms.
Install from PyPi using ``pip``:
.. code-block:: console
pip install blosc2 --upgrade
Conda users can install from conda-forge:
.. code-block:: console
conda install -c conda-forge python-blosc2
Documentation
=============
The documentation is available here:
https://blosc.org/python-blosc2/python-blosc2.html
You can find examples at:
https://github.com/Blosc/python-blosc2/tree/main/examples
A tutorial from PyData Global 2025 is available at:
https://github.com/Blosc/PyData-Global-2025-Tutorial
(`Click here <https://www.youtube.com/watch?v=tUvSI3EpTBQ&list=PLGVZCDnMOq0qmerwB1eITnr5AfYRGm0DF&index=81>`_ to watch the video recording of the tutorial)
It contains Jupyter notebooks explaining the main features of Python-Blosc2.
License
=======
This software is licensed under a 3-Clause BSD license. A copy of the
python-blosc2 license can be found in
`LICENSE.txt <https://github.com/Blosc/python-blosc2/tree/main/LICENSE.txt>`_.
Discussion forum
================
Discussion about this package is welcome at:
https://github.com/Blosc/python-blosc2/discussions
Social feeds
------------
Stay informed about the latest developments by following us in
`Mastodon <https://fosstodon.org/@Blosc2>`_,
`Bluesky <https://bsky.app/profile/blosc.org>`_ or
`LinkedIn <https://www.linkedin.com/company/88381936/admin/dashboard/>`_.
Thanks
======
Blosc2 is supported by the `NumFOCUS foundation <https://numfocus.org>`_, the
`LEAPS-INNOV project <https://www.leaps-innov.eu>`_
and `ironArray SLU <https://ironarray.io>`_, among many other donors.
This allowed the following people to have contributed in an important way
to the core development of the Blosc2 library:
- Francesc Alted
- Marta Iborra
- Luke Shaw
- Aleix Alcacer
- Oscar Guiñón
- Juan David Ibáñez
- Ivan Vilata i Balaguer
- Oumaima Ech.Chdig
- Ricardo Sales Piquer
In addition, other people have participated in the project in different
aspects:
- Jan Sellner, contributed the mmap support for NDArray/SChunk objects.
- Dimitri Papadopoulos, contributed a large bunch of improvements to
many aspects of the project. His attention to detail is remarkable.
- And many others that have contributed with bug reports, suggestions and
improvements.
Developed using JetBrains IDEs.
.. image:: https://resources.jetbrains.com/storage/products/company/brand/logos/jetbrains.svg
:target: https://jb.gg/OpenSource
:alt: JetBrains logo.
Citing Blosc
============
You can cite our work on the various libraries under the Blosc umbrella as follows:
.. code-block:: console
@ONLINE{blosc,
author = {{Blosc Development Team}},
title = "{A fast, compressed and persistent data store library}",
year = {2009-2025},
note = {https://blosc.org}
}
Support Blosc for a Sustainable Future
======================================
If you find Blosc useful and want to support its development, please consider
making a `donation or contract to the Blosc Development Team
<https://www.blosc.org/pages/blosc-in-depth/#support-blosc>`_.
Thank you!
**Compress Better, Compute Bigger**

@ -0,0 +1,80 @@
blosc2-4.1.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
blosc2-4.1.2.dist-info/METADATA,sha256=PfwyhaAAq9E0pNey0uX8XLczfOfGhPqQntCe2eKZqCA,6231
blosc2-4.1.2.dist-info/RECORD,,
blosc2-4.1.2.dist-info/WHEEL,sha256=Iwzd8cFJYd34Bw6rlN9JB4hgsVKLIBCVy637sxMRVyo,106
blosc2-4.1.2.dist-info/entry_points.txt,sha256=AQn8qWJhx7sMxZxwNAn9AGT77UMZpT8ZHmZoHsUY4Tw,29
blosc2-4.1.2.dist-info/licenses/LICENSE.txt,sha256=AstwCmS9owvusCU1ghx9pxk64zven0poXe4JQkjWzxg,1655
blosc2/__init__.py,sha256=wfFw2HAsNRTp9qjTQAA-sr5zFnhkONa3EjrqGO2wUR8,21137
blosc2/__pycache__/__init__.cpython-311.pyc,,
blosc2/__pycache__/_wasm_jit.cpython-311.pyc,,
blosc2/__pycache__/c2array.cpython-311.pyc,,
blosc2/__pycache__/core.cpython-311.pyc,,
blosc2/__pycache__/dict_store.cpython-311.pyc,,
blosc2/__pycache__/dsl_kernel.cpython-311.pyc,,
blosc2/__pycache__/embed_store.cpython-311.pyc,,
blosc2/__pycache__/exceptions.cpython-311.pyc,,
blosc2/__pycache__/fft.cpython-311.pyc,,
blosc2/__pycache__/info.cpython-311.pyc,,
blosc2/__pycache__/lazyexpr.cpython-311.pyc,,
blosc2/__pycache__/linalg.cpython-311.pyc,,
blosc2/__pycache__/ndarray.cpython-311.pyc,,
blosc2/__pycache__/proxy.cpython-311.pyc,,
blosc2/__pycache__/schunk.cpython-311.pyc,,
blosc2/__pycache__/storage.cpython-311.pyc,,
blosc2/__pycache__/tree_store.cpython-311.pyc,,
blosc2/__pycache__/utils.cpython-311.pyc,,
blosc2/__pycache__/version.cpython-311.pyc,,
blosc2/_wasm_jit.py,sha256=H_bnZVwr2oQjZUaosoR6ykzxYBO1_HX9dfPSUaTS_Vc,19119
blosc2/blosc2_ext.cp311-win_amd64.pyd,sha256=Pv7H9dTzotXxPSWlqwBNqpcmLClOQada7ixN-xSH5UM,2499072
blosc2/blosc2_ext.pyx,sha256=wfKd0JLxGIld1U9ZrooUbgpTsPIF0KpFMxOF8mBzz04,146229
blosc2/c2array.py,sha256=8X5OAjiPEQk42t7yQTP5JTlHNh-u1Sb-ScXElv8mgFc,16505
blosc2/core.py,sha256=2smXa_RYq4vA44kZ1thA3gGKE6kW1Ol7REH2ibQGhBA,71651
blosc2/dict_store.py,sha256=75JnpeJ9BV65TgWgwtUQydrFAt6Op3dXpecTcMggoik,23149
blosc2/dsl_kernel.py,sha256=XzpIRx4VmcCXUio22xsb4XUabv2VvCLdhrwLmXAMHdY,46111
blosc2/embed_store.py,sha256=LOudUZvwMO1vWEmrGUay6tdaVNpQIMCJX6e_p2Lx3PA,13349
blosc2/exceptions.py,sha256=ZK-SPzS527CEDL7-dw1Xl8wWHt93KW2vp9sg3wIp4BI,557
blosc2/fft.py,sha256=V_8c_-w-DOS61IWfUcx6UgeDlME35PGAyxY5ASgDnG4,979
blosc2/include/b2nd.h,sha256=vLj3QPdmG2xzeS0f6f1xZzoeypEiApfB0MvUGjVZdLA,24902
blosc2/include/blosc2.h,sha256=Ii9DzbSgjVdhA2Sz6oHmT8InCZEFuip9DbR_aotdFzA,102880
blosc2/include/blosc2/blosc2-common.h,sha256=5yQGyQpkwbT-bBzurP5p155ZAWv3GKzyx43ZwilSyQM,2719
blosc2/include/blosc2/blosc2-export.h,sha256=hsi3IiPDgyWVIhpuV8oNhgN4EYWIA3j_EvRBrIqZxck,1855
blosc2/include/blosc2/blosc2-stdio.h,sha256=jGU3e3cgXbxW7t5E1TOt5y-Rga1MSWLk6ebo93UOkKI,4650
blosc2/include/blosc2/codecs-registry.h,sha256=-bikcAq5rZOZzA9lZkDrvsGTTmuX4c3qPMH_qG0iPPw,2043
blosc2/include/blosc2/filters-registry.h,sha256=TzV0nTUifbKSK6fs4NkD_CtcOHb7m5W18weVO5n4KxU,1833
blosc2/include/blosc2/tuners-registry.h,sha256=uQ3TWsbf0QGXHP5qYRv9iqmpXcb7z5MUSmnsF7MVcv4,842
blosc2/info.py,sha256=5eaD2K3jfyLT1y0WAQQ7tgyJmfwN5FVJ9rUInWfLfbU,2085
blosc2/lazyexpr.py,sha256=QYfuhvxDpYfFMcPZ9zm264wH2I8qDMkxF5of5Cre62g,188780
blosc2/lib/blosc2.lib,sha256=UxKvQdGCBE42neuY7QTq9AF1YImoo_tGjQs7G4u0khY,35744
blosc2/lib/cmake/blosc2/Blosc2Config.cmake,sha256=64up3utTNCiNvhLYfBhRLIJw0ZbtOmyZ8LY9UOMd-3E,4351
blosc2/lib/cmake/blosc2/Blosc2ConfigVersion.cmake,sha256=z0xZRkTV_dFBNx6fg9lJrm40x_zFwQ8JdQ_DuoiVNV0,2830
blosc2/lib/cmake/blosc2/Blosc2Targets-release.cmake,sha256=qqvx-rb3WsAOPd_grd3xFiSJj9IE67qxwYXKsL49bn0,1771
blosc2/lib/cmake/blosc2/Blosc2Targets.cmake,sha256=ero0ELloxyV-njwSbFuxyQHw02lqPUCUF8V3rz57-lI,4787
blosc2/lib/cmake/blosc2/Modules/FindIPP.cmake,sha256=CuPjgtqtJ9AZ7QAKA7z5nzKBPjTEsRv20oQiN-mdoPQ,2165
blosc2/lib/cmake/blosc2/Modules/FindLZ4.cmake,sha256=nGGvXB3GSR4Mbct1z4xv8Y3HToo6NUnpNZMGCkCTvmg,288
blosc2/lib/cmake/blosc2/Modules/FindSIMD.cmake,sha256=VZEaOjgaV6XHvMLxnp7ahOF_wbOEEN3MPmjO_JGHpEw,2268
blosc2/lib/cmake/blosc2/Modules/FindZLIB_NG.cmake,sha256=Ehvjx61hvBcSUdJ6SEKgQfip2X1JlCIk-owmQ8EzRWg,2105
blosc2/lib/cmake/blosc2/Modules/FindZSTD.cmake,sha256=pxIgMvnUjLVP-3rntB-70f-tzzfuXtBUwBtOotvoQOo,212
blosc2/lib/cmake/blosc2/Modules/toolchain-aarch64.cmake,sha256=Kmg8bElwPy5FBF5LW2N8veU-7igNTyscDMbG_J14ajc,985
blosc2/lib/cmake/blosc2/Modules/toolchain-armhf.cmake,sha256=2bRK-4gla53SFgDV6dTuqXn6g7Wpbb_fDcc7hFd5kHo,1022
blosc2/lib/cmake/blosc2/Modules/toolchain-armsf.cmake,sha256=RkIP5XvfdbwfzIYZJgcopLv6D4ABWOLUDOuFB1VqanI,1128
blosc2/lib/libblosc2.dll,sha256=ZWwatYusKIPBNQqVKbeHQpviAYSO_pV9l0mRXcDdZ1k,1338368
blosc2/lib/libblosc2.lib,sha256=gtCUPjUM8vgkydQD7Uq1JFGEBenCtErcM-SdbV1a0GA,2626708
blosc2/lib/pkgconfig/blosc2.pc,sha256=EoRO6O8iLRBVas-RCp5koYWFiIFZzNNYgkdmMUK03qY,497
blosc2/lib/tcc.dll,sha256=k7MezEkky81NA7O5aQTydJ18_elH_gNSIONioBGywNw,350720
blosc2/linalg.py,sha256=H8muaJ96U4rMCL81MNxYe_-i7Fqs7bHRo0bhVsZMIZA,31541
blosc2/me_jit_glue.js,sha256=9uyc98wu83S9PW1WJaL9KDtA5z9hTlTxg2_maHNnx7Y,23314
blosc2/ndarray.py,sha256=dAzB6bAFA-_4sdzPcr-f58OCtlaTKPUfmTTIZqRRTfI,228824
blosc2/proxy.py,sha256=Om2lB-w31njTJhycqXEiTv8YHmgiYGEXgIsVmKvD35c,29361
blosc2/schunk.py,sha256=MgwCrzLI-Td9bE8Tduh42uxf_TD7xid5FOp2RkSCGds,65044
blosc2/storage.py,sha256=k7DHIzbLphHHE_xytaQ6eawuqzoT6FtF_80rRJp_pQU,10525
blosc2/tree_store.py,sha256=khU10mInKHJfSQhai7l_cae2V7ds7vlNmmx4tzxK6Bw,27200
blosc2/utils.py,sha256=li0oyxsDiBZLtpBdkUKoFB-pEYwRA_kF2-GtWYw7-B4,37192
blosc2/version.py,sha256=L7zCFLtbyYe-_iP0M_1XaXP8Dexs06fJx0XFT65XNyk,58
include/libtcc.h,sha256=OEtbE6mBTCh2v8EeBtL5RVrI10pr_IaHbE51hy598XY,5096
lib/tcc.dll,sha256=k7MezEkky81NA7O5aQTydJ18_elH_gNSIONioBGywNw,350720
share/miniexpr/licenses/LICENSE,sha256=amIGietrCmqJGJurywOAM2NHty7iPWtEMhyfRprJuMU,1568
share/miniexpr/licenses/LICENSE-LIBTCC,sha256=xOx_Td8HPikwzbvz1mnOo_J1-wLIws4bNI_Qe4059ws,27111
share/miniexpr/licenses/LICENSE-SLEEF,sha256=vrjkLp1rQoTgMwTQWoGgdVIAqWX8jQpeCuoehM-AXW4,1361
share/miniexpr/licenses/LICENSE-TINYEXPR,sha256=wcAtE2y-ldvajJ6rOIdTGJc2ZRHk2GXCKximY2q2VZE,897
share/miniexpr/licenses/THIRD_PARTY_NOTICES.md,sha256=rIjDpvDa2FXLO-zTDl7ThLkhfcrkmqf-Obl5_9Cu4FI,867
share/miniexpr/third_party/tinycc/COPYING,sha256=mxg-fwNWw5jMCmXEotLNVvIUmo4kQmTE0mrFnp2to-g,26932

@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: scikit-build-core 0.12.1
Root-Is-Purelib: false
Tag: cp311-cp311-win_amd64

@ -0,0 +1,31 @@
BSD 3-Clause License
For Blosc - A blocking, shuffling and lossless compression library
Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,940 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
# Hey Ruff, please ignore the next violations
# ruff: noqa: E402 - Module level import not at top of file
# ruff: noqa: F401 - `var` imported but unused
import contextlib
import importlib.util
import os
import platform
from enum import Enum
from pathlib import Path
import numpy as np
_HAS_NUMBA = False
try:
import numba
_HAS_NUMBA = True
except ImportError:
pass
# Do the platform check once at module level
IS_WASM = platform.machine() == "wasm32"
# IS_WASM = True # for testing (comment this line out for production)
"""
Flag for WebAssembly platform.
"""
if not IS_WASM:
import numexpr
from .version import __array_api_version__, __version__
def _configure_libtcc_runtime_path():
"""Best-effort configuration so miniexpr can find bundled libtcc at runtime."""
if IS_WASM:
return
if os.environ.get("ME_DSL_JIT_LIBTCC_PATH"):
return
spec = importlib.util.find_spec("blosc2.blosc2_ext")
origin = None if spec is None else spec.origin
if not origin:
return
ext_dir = Path(origin).resolve().parent
candidate_dirs = (
ext_dir,
ext_dir / "lib",
ext_dir.parent / "lib",
)
if platform.system() == "Darwin":
names = ("libtcc.dylib",)
elif platform.system() == "Windows":
names = ("tcc.dll", "libtcc.dll")
else:
names = ("libtcc.so", "libtcc.so.1")
for cdir in candidate_dirs:
for name in names:
candidate = cdir / name
if candidate.is_file():
os.environ["ME_DSL_JIT_LIBTCC_PATH"] = str(candidate)
return
_configure_libtcc_runtime_path()
_WASM_MINIEXPR_ENABLED = not IS_WASM
__version__ = __version__
__array_api_version__ = __array_api_version__
"""
Python-Blosc2 version.
"""
class Codec(Enum):
"""
Available codecs.
"""
BLOSCLZ = 0
LZ4 = 1
LZ4HC = 2
ZLIB = 4
ZSTD = 5
NDLZ = 32
ZFP_ACC = 33
ZFP_PREC = 34
ZFP_RATE = 35
#: Needs to be installed with ``pip install blosc2-openhtj2k``
OPENHTJ2K = 36
#: Needs to be installed with ``pip install blosc2-grok``
GROK = 37
#: Needs to be installed with ``pip install blosc2-openzl``
OPENZL = 38
class Filter(Enum):
"""
Available filters.
For each of the filters, the integer value passed to ``filters_meta`` has the following meaning:
- NOFILTER: Not used
- SHUFFLE: Number of byte streams for shuffle (if 0 defaults to typesize of array).
- BITSHUFFLE: Not used
- DELTA: Not used (bitwise XOR)
- TRUNC_PREC: Number of bits to which to truncate float
- NDCELL: Cellshape (i.e. for a 3-dim dataset, meta = 4 implies cellshape is 4x4x4)
- NDMEAN: Cellshape (i.e. for a 3-dim dataset, meta = 4 implies cellshape is 4x4x4)
- BYTEDELTA: Number of byte streams for delta
- INT_TRUNC: Number of bits to which to truncate integer
For TRUNC_PREC and INT_TRUNC, positive values specify number of bits to keep; negative values specify number of bits to zero.
For NDCELL/NDMEAN see this explanation for `NDCELL <https://github.com/Blosc/c-blosc2/blob/main/plugins/filters/ndcell/README.md>`_ and this for `NDMEAN <https://github.com/Blosc/c-blosc2/blob/main/plugins/filters/ndmean/README.md>`_.
"""
NOFILTER = 0
SHUFFLE = 1
BITSHUFFLE = 2
DELTA = 3
TRUNC_PREC = 4
NDCELL = 32
NDMEAN = 33
BYTEDELTA = 35
INT_TRUNC = 36
class SplitMode(Enum):
"""
Available split modes.
"""
ALWAYS_SPLIT = 1
NEVER_SPLIT = 2
AUTO_SPLIT = 3
FORWARD_COMPAT_SPLIT = 4
class SpecialValue(Enum):
"""
Possible special values in a chunk.
"""
NOT_SPECIAL = 0
ZERO = 1
NAN = 2
VALUE = 3
UNINIT = 4
class Tuner(Enum):
"""
Available tuners.
"""
#: A 'simple' tuner. This is the default in the Blosc2 library
STUNE = 0
#: A more sophisticated tuner that can select different codecs/filters for different chunks
#: (more info `here <https://github.com/Blosc/blosc2_btune/>`_); Needs to be installed with
#: ``pip install blosc2-btune``
BTUNE = 32
class FPAccuracy(Enum):
"""
Floating point accuracy modes for Blosc2 computing with lazy expressions.
This is only relevant when using floating point dtypes with miniexpr.
"""
#: Use 1.0 ULPs (Units in the Last Place) for floating point functions
HIGH = 1
#: Use 3.5 ULPs (Units in the Last Place) for floating point functions
MEDIUM = 2
#: Use default accuracy. This is MEDIUM, which should be enough for most applications.
DEFAULT = MEDIUM
from .blosc2_ext import (
DEFINED_CODECS_STOP,
EXTENDED_HEADER_LENGTH,
GLOBAL_REGISTERED_CODECS_STOP,
MAX_BLOCKSIZE,
MAX_BUFFERSIZE,
MAX_DIM,
MAX_OVERHEAD,
MAX_TYPESIZE,
MIN_HEADER_LENGTH,
USER_REGISTERED_CODECS_STOP,
VERSION_DATE,
VERSION_STRING,
)
DEFINED_CODECS_STOP = DEFINED_CODECS_STOP
"""
Maximum possible Blosc2-defined codec id."""
GLOBAL_REGISTERED_CODECS_STOP = GLOBAL_REGISTERED_CODECS_STOP
"""
Maximum possible Blosc2 global registered codec id."""
USER_REGISTERED_CODECS_STOP = USER_REGISTERED_CODECS_STOP
"""
Maximum possible Blosc2 user registered codec id."""
EXTENDED_HEADER_LENGTH = EXTENDED_HEADER_LENGTH
"""
Blosc2 extended header length in bytes."""
MAX_BUFFERSIZE = MAX_BUFFERSIZE
"""
Maximum buffer size in bytes for a Blosc2 chunk."""
MAX_FAST_PATH_SIZE = 2**30
"""
Maximum size in bytes for a fast path evaluation.
"""
MAX_OVERHEAD = MAX_OVERHEAD
"""
Maximum overhead during compression (in bytes). This is
equal to :py:obj:`blosc2.EXTENDED_HEADER_LENGTH <EXTENDED_HEADER_LENGTH>`."""
MAX_TYPESIZE = MAX_TYPESIZE
"""
Blosc2 maximum type size (in bytes)."""
MIN_HEADER_LENGTH = MIN_HEADER_LENGTH
"""
Blosc2 minimum header length (in bytes)."""
VERSION_DATE = VERSION_DATE
"""
The C-Blosc2 version's date."""
VERSION_STRING = VERSION_STRING
"""
The C-Blosc2 version's string."""
if IS_WASM:
from ._wasm_jit import init_wasm_jit_helpers
_WASM_MINIEXPR_ENABLED = init_wasm_jit_helpers()
# For array-api compatibility
iinfo = np.iinfo
finfo = np.finfo
def isdtype(a_dtype: np.dtype, kind: str | np.dtype | tuple):
"""
Returns a boolean indicating whether a provided dtype is of a specified data type "kind".
Parameters
----------
dtype: dtype
The input dtype.
kind: str | dtype | Tuple[str, dtype]
Data type kind.
If kind is a dtype, return boolean indicating whether the input dtype is equal to the dtype specified by kind.
If kind is a string, return boolean indicating whether the input dtype is of a specified data type kind.
The following dtype kinds are supporte:
* 'bool': boolean data types (e.g., bool).
* 'signed integer': signed integer data types (e.g., int8, int16, int32, int64).
* 'unsigned integer': unsigned integer data types (e.g., uint8, uint16, uint32, uint64).
* 'integral': integer data types. Shorthand for ('signed integer', 'unsigned integer').
* 'real floating': real-valued floating-point data types (e.g., float32, float64).
* 'complex floating': complex floating-point data types (e.g., complex64, complex128).
* 'numeric': numeric data types. Shorthand for ('integral', 'real floating', 'complex floating').
Returns
-------
out: bool
Boolean indicating whether a provided dtype is of a specified data type kind.
"""
kind = (kind,) if not isinstance(kind, tuple) else kind
for _ in kind:
if a_dtype == kind:
return True
_complex, _signedint, _uint, _rfloat = False, False, False, False
if a_dtype in (complex64, complex128):
_complex = True
if "complex floating" in kind:
return True
if a_dtype == bool_ and "bool" in kind:
return True
if a_dtype in (int8, int16, int32, int64):
_signedint = True
if "signed integer" in kind:
return True
if a_dtype in (uint8, uint16, uint32, uint64):
_uint = True
if "unsigned integer" in kind:
return True
if a_dtype in (float16, float32, float64):
_rfloat = True
if "real floating" in kind:
return True
if "integral" in kind and (_signedint or _uint):
return True
return "numeric" in kind and (
_signedint or _uint or _rfloat or _complex
) # checked everything, otherwise False
# dtypes for array-api
str_ = np.str_
bytes_ = np.bytes_
object_ = np.object_
from numpy import (
bool_,
complex64,
complex128,
e,
euler_gamma,
float16,
float32,
float64,
inf,
int8,
int16,
int32,
int64,
nan,
newaxis,
pi,
uint8,
uint16,
uint32,
uint64,
)
bool = bool
DEFAULT_COMPLEX = complex128
"""
Default complex floating dtype."""
DEFAULT_FLOAT = float64
"""
Default real floating dtype."""
DEFAULT_INT = int64
"""
Default integer dtype."""
DEFAULT_INDEX = int64
"""
Default indexing dtype."""
class Info:
def __init__(self, **kwargs):
for key, value in kwargs.items():
setattr(self, key, value)
def __array_namespace_info__() -> Info:
"""
Return information about the array namespace following the Array API specification.
"""
def _raise(exc):
raise exc
return Info(
capabilities=lambda: {
"boolean indexing": True,
"data-dependent shapes": False,
"max dimensions": MAX_DIM,
},
default_device=lambda: "cpu",
default_dtypes=lambda device=None: (
{
"real floating": DEFAULT_FLOAT,
"complex floating": DEFAULT_COMPLEX,
"integral": DEFAULT_INT,
"indexing": DEFAULT_INDEX,
}
if (device == "cpu" or device is None)
else _raise(ValueError("Only cpu devices allowed"))
),
dtypes=lambda device=None, kind=None: (
np.__array_namespace_info__().dtypes(kind=kind, device=device)
if (device == "cpu" or device is None)
else _raise(ValueError("Only cpu devices allowed"))
),
devices=lambda: ["cpu"],
name="blosc2",
version=__version__,
)
# Public API for container module
from .core import (
clib_info,
compress,
compress2,
compressor_list,
compute_chunks_blocks,
decompress,
decompress2,
detect_number_of_cores,
free_resources,
from_cframe,
get_blocksize,
get_cbuffer_sizes,
get_clib,
get_compressor,
get_cpu_info,
load_array,
load_tensor,
ndarray_from_cframe,
pack,
pack_array,
pack_array2,
pack_tensor,
print_versions,
register_codec,
register_filter,
remove_urlpath,
save_array,
save_tensor,
schunk_from_cframe,
set_blocksize,
set_compressor,
set_nthreads,
set_releasegil,
unpack,
unpack_array,
unpack_array2,
unpack_tensor,
)
# Internal Blosc threading
# Get CPU info
cpu_info = get_cpu_info()
nthreads = ncores = cpu_info.get("count", 1)
"""Number of threads to be used in compression/decompression.
"""
# Protection against too many threads
nthreads = min(nthreads, 64)
if IS_WASM:
nthreads = 1
# Keep C-side runtime in sync with Python-level default in wasm32.
set_nthreads(1)
else:
# Experiments say that, when using a large number of threads, it is better to not use them all
if nthreads > 16:
nthreads -= nthreads // 8
# Only call set_num_threads if within NUMEXPR_MAX_THREADS limit to avoid warning
numexpr_max_env = os.environ.get("NUMEXPR_MAX_THREADS")
numexpr_max: int | None = None
if numexpr_max_env is not None:
with contextlib.suppress(ValueError):
numexpr_max = int(numexpr_max_env)
if numexpr_max is None or nthreads <= numexpr_max:
numexpr.set_num_threads(nthreads)
# This import must be before ndarray and schunk
from .storage import ( # noqa: I001
CParams,
cparams_dflts,
DParams,
dparams_dflts,
Storage,
storage_dflts,
)
from .ndarray import (
Array,
NDArray,
NDField,
Operand,
are_partitions_aligned,
are_partitions_behaved,
arange,
broadcast_to,
linspace,
eye,
asarray,
astype,
indices,
sort,
reshape,
copy,
concat,
expand_dims,
empty,
empty_like,
frombuffer,
fromiter,
get_slice_nchunks,
meshgrid,
nans,
uninit,
zeros,
zeros_like,
ones,
ones_like,
full,
full_like,
save,
stack,
)
from .embed_store import EmbedStore, estore_from_cframe
from .dict_store import DictStore
from .tree_store import TreeStore
from .c2array import c2context, C2Array, URLPath
from .dsl_kernel import DSLSyntaxError, DSLKernel, dsl_kernel, validate_dsl
from .lazyexpr import (
LazyExpr,
lazyudf,
lazyexpr,
LazyArray,
LazyUDF,
_open_lazyarray,
get_expr_operands,
validate_expr,
evaluate,
result_type,
can_cast,
)
from .proxy import Proxy, ProxySource, ProxyNDSource, ProxyNDField, SimpleProxy, jit, as_simpleproxy
from .schunk import SChunk, open
from . import linalg
from .linalg import tensordot, vecdot, permute_dims, matrix_transpose, matmul, transpose, diagonal, outer
from .utils import linalg_funcs as linalg_funcs_list
from . import fft
# Registry for postfilters
postfilter_funcs = {}
"""
Registry for postfilter functions. For more info see
:func:`SChunk.postfilter <blosc2.schunk.SChunk.postfilter>`"""
# Registry for prefilters
prefilter_funcs = {}
"""
Registry for prefilter functions. For more info see
:func:`SChunk.prefilter <blosc2.schunk.SChunk.prefilter>`"""
# Registry for user-defined codecs
ucodecs_registry = {}
"""
Registry for user-defined codecs. For more info see
:func:`blosc2.register_codec <blosc2.register_codec>`"""
# Registry for user-defined filters
ufilters_registry = {}
"""
Registry for user-defined filters. For more info see
:func:`blosc2.register_filter <blosc2.register_filter>`"""
blosclib_version = f"{VERSION_STRING} ({VERSION_DATE})"
"""
The blosc2 version + date.
"""
# Private global variables
_disable_overloaded_equal = False
"""
Disable the overloaded equal operator.
"""
# Delayed imports for avoiding overwriting of python builtins
from .ndarray import (
abs,
acos,
acosh,
add,
all,
any,
arccos,
arccosh,
arcsin,
arcsinh,
arctan,
arctan2,
arctanh,
argmax,
argmin,
array_from_ffi_ptr,
asin,
asinh,
atan,
atan2,
atanh,
bitwise_and,
bitwise_invert,
bitwise_left_shift,
bitwise_or,
bitwise_right_shift,
bitwise_xor,
ceil,
clip,
conj,
contains,
copysign,
cos,
cosh,
count_nonzero,
cumulative_prod,
cumulative_sum,
divide,
endswith,
equal,
exp,
expm1,
floor,
floor_divide,
greater,
greater_equal,
hypot,
imag,
isfinite,
isinf,
isnan,
lazywhere,
less,
less_equal,
log,
log1p,
log2,
log10,
logaddexp,
logical_and,
logical_not,
logical_or,
logical_xor,
lower,
max,
maximum,
mean,
min,
minimum,
multiply,
negative,
nextafter,
not_equal,
positive,
pow,
prod,
real,
reciprocal,
remainder,
round,
sign,
signbit,
sin,
sinh,
sqrt,
square,
squeeze,
startswith,
std,
subtract,
sum,
take,
take_along_axis,
tan,
tanh,
trunc,
upper,
var,
where,
)
__all__ = [ # noqa : RUF022
# Constants
"EXTENDED_HEADER_LENGTH",
"MAX_BUFFERSIZE",
"MAX_TYPESIZE",
"MIN_HEADER_LENGTH",
"VERSION_DATE",
"VERSION_STRING",
# Default dtypes
"DEFAULT_COMPLEX",
"DEFAULT_FLOAT",
"DEFAULT_INDEX",
"DEFAULT_INT",
# Mathematical constants
"e",
"pi",
"inf",
"nan",
"newaxis",
# Classes
"C2Array",
"CParams",
# Enums
"Codec",
"DParams",
"DictStore",
"EmbedStore",
"Filter",
"LazyArray",
"DSLKernel",
"DSLSyntaxError",
"LazyExpr",
"LazyUDF",
"NDArray",
"NDField",
"Operand",
"Proxy",
"ProxyNDField",
"ProxyNDSource",
"ProxySource",
"SChunk",
"SimpleProxy",
"SpecialValue",
"SplitMode",
"Storage",
"TreeStore",
"Tuner",
"URLPath",
# Version
"__version__",
# Utils
"linalg_funcs_list",
# Functions
"abs",
"acos",
"acosh",
"add",
"all",
"any",
"arange",
"arccos",
"arccosh",
"arcsin",
"arcsinh",
"arctan",
"arctan2",
"arctanh",
"are_partitions_aligned",
"are_partitions_behaved",
"argmax",
"argmin",
"array_from_ffi_ptr",
"asarray",
"asin",
"asinh",
"as_simpleproxy",
"astype",
"atan",
"atan2",
"atanh",
"bitwise_and",
"bitwise_invert",
"bitwise_left_shift",
"bitwise_or",
"bitwise_right_shift",
"bitwise_xor",
"broadcast_to",
"can_cast",
"ceil",
"clib_info",
"clip",
"compress",
"compress2",
"compressor_list",
"compute_chunks_blocks",
"concat",
"conj",
"contains",
"copy",
"copysign",
"cos",
"cosh",
"count_nonzero",
"cparams_dflts",
"cpu_info",
"cumulative_prod",
"cumulative_sum",
"decompress",
"decompress2",
"detect_number_of_cores",
"divide",
"dparams_dflts",
"endswith",
"empty",
"empty_like",
"equal",
"estore_from_cframe",
"exp",
"expand_dims",
"expm1",
"eye",
"finfo",
"floor",
"floor_divide",
"free_resources",
"from_cframe",
"frombuffer",
"fromiter",
"full",
"full_like",
"get_blocksize",
"get_cbuffer_sizes",
"get_clib",
"get_compressor",
"get_cpu_info",
"get_expr_operands",
"get_slice_nchunks",
"greater",
"greater_equal",
"hypot",
"imag",
"iinfo",
"indices",
"isdtype",
"isfinite",
"isinf",
"isnan",
"jit",
"lazyexpr",
"dsl_kernel",
"validate_dsl",
"lazyudf",
"lazywhere",
"less",
"less_equal",
"linspace",
"load_array",
"load_tensor",
"log",
"log1p",
"log2",
"log10",
"logaddexp",
"logical_and",
"logical_not",
"logical_or",
"logical_xor",
"lower",
"matmul",
"matrix_transpose",
"max",
"maximum",
"mean",
"meshgrid",
"min",
"minimum",
"multiply",
"nans",
"ndarray_from_cframe",
"negative",
"nextafter",
"not_equal",
"ones",
"ones_like",
"open",
"pack",
"pack_array",
"pack_array2",
"pack_tensor",
"permute_dims",
"positive",
"postfilter_funcs",
"pow",
"prefilter_funcs",
"print_versions",
"prod",
"real",
"reciprocal",
"register_codec",
"register_filter",
"remainder",
"remove_urlpath",
"reshape",
"result_type",
"round",
"save",
"save_array",
"save_tensor",
"schunk_from_cframe",
"set_blocksize",
"set_compressor",
"set_nthreads",
"set_releasegil",
"sign",
"signbit",
"sin",
"sinh",
"sort",
"sqrt",
"square",
"squeeze",
"stack",
"startswith",
"std",
"storage_dflts",
"subtract",
"sum",
"take",
"take_along_axis",
"tan",
"tanh",
"tensordot",
"transpose",
"trunc",
"uninit",
"unpack",
"unpack_array",
"unpack_array2",
"unpack_tensor",
"upper",
"validate_expr",
"var",
"vecdot",
"where",
"zeros",
"zeros_like",
]

@ -0,0 +1,627 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
from __future__ import annotations
import os
from pathlib import Path
_HELPERS_REGISTERED = False
_REGISTER_HELPERS_JS = r"""
(() => {
const g = globalThis;
if (g.__blosc2_me_jit_helper_ptrs) {
return g.__blosc2_me_jit_helper_ptrs;
}
const candidates = [];
const addCandidate = (name, obj) => {
if (!obj || (typeof obj !== "object" && typeof obj !== "function")) {
return;
}
candidates.push({ name, obj });
};
const addDerivedCandidates = (baseName, obj) => {
if (!obj || (typeof obj !== "object" && typeof obj !== "function")) {
return;
}
addCandidate(`${baseName}._module`, obj._module);
addCandidate(`${baseName}.module`, obj.module);
addCandidate(`${baseName}.Module`, obj.Module);
addCandidate(`${baseName}.asm`, obj.asm);
addCandidate(`${baseName}.wasmExports`, obj.wasmExports);
addCandidate(`${baseName}.wasm`, obj.wasm);
addCandidate(`${baseName}.__wasm`, obj.__wasm);
addCandidate(`${baseName}.pyodide`, obj.pyodide);
addCandidate(`${baseName}._api`, obj._api);
};
addCandidate("globalThis", g);
addCandidate("globalThis.Module", g.Module);
addCandidate("globalThis.__blosc2_pyodide_module", g.__blosc2_pyodide_module);
addCandidate("globalThis.__blosc2_pyodide_api", g.__blosc2_pyodide_api);
addCandidate("globalThis.pyodide", g.pyodide);
addCandidate("globalThis.pyodide._module", g.pyodide && g.pyodide._module);
addCandidate("globalThis.pyodide.module", g.pyodide && g.pyodide.module);
addCandidate("globalThis.pyodide.Module", g.pyodide && g.pyodide.Module);
addCandidate("globalThis.pyodide._api", g.pyodide && g.pyodide._api);
addCandidate("globalThis.pyodide._api._module", g.pyodide && g.pyodide._api && g.pyodide._api._module);
addCandidate("globalThis.pyodide._api.Module", g.pyodide && g.pyodide._api && g.pyodide._api.Module);
addDerivedCandidates("globalThis", g);
addDerivedCandidates("globalThis.pyodide", g.pyodide);
addDerivedCandidates("globalThis.__blosc2_pyodide_module", g.__blosc2_pyodide_module);
addDerivedCandidates("globalThis.__blosc2_pyodide_api", g.__blosc2_pyodide_api);
const resolve = (name) => {
for (const cand of candidates) {
let value;
try {
value = cand.obj[name];
} catch (_e) {
value = undefined;
}
if (value !== undefined && value !== null) {
if (typeof value === "function") {
return value.bind(cand.obj);
}
return value;
}
}
if (g[name] !== undefined && g[name] !== null) {
return g[name];
}
return null;
};
const wasmExports = resolve("wasmExports") || resolve("exports");
const asmObj = resolve("asm");
const isWasmMemory = (value) =>
typeof WebAssembly !== "undefined" &&
typeof WebAssembly.Memory !== "undefined" &&
value instanceof WebAssembly.Memory;
const isWasmTable = (value) =>
typeof WebAssembly !== "undefined" &&
typeof WebAssembly.Table !== "undefined" &&
value instanceof WebAssembly.Table;
const heapU8ForProbe = resolve("HEAPU8");
const heapBufferForProbe = heapU8ForProbe && heapU8ForProbe.buffer ? heapU8ForProbe.buffer : null;
const heapBufferLenForProbe =
heapBufferForProbe && typeof heapBufferForProbe.byteLength === "number"
? heapBufferForProbe.byteLength
: -1;
const isMemoryLike = (value) => {
if (!value) {
return false;
}
if (isWasmMemory(value)) {
return true;
}
let buf = null;
try {
buf = value.buffer;
} catch (_e) {
buf = null;
}
if (!buf || typeof buf.byteLength !== "number") {
return false;
}
if (typeof value.grow !== "function") {
return false;
}
if (heapBufferForProbe && buf !== heapBufferForProbe) {
const bufLen = typeof buf.byteLength === "number" ? buf.byteLength : -1;
if (heapBufferLenForProbe > 0 && bufLen > 0 && bufLen < heapBufferLenForProbe) {
return false;
}
}
return true;
};
const isTableLike = (value) => {
if (!value) {
return false;
}
if (isWasmTable(value)) {
return true;
}
return (
typeof value.get === "function" &&
typeof value.grow === "function" &&
typeof value.length === "number"
);
};
const findMemoryOrTableByType = (wantMemory) => {
const isObj = (v) => v && (typeof v === "object" || typeof v === "function");
const seen = new Set();
const queue = [];
const maxDepth = 6;
const maxVisited = 5000;
for (const cand of candidates) {
if (isObj(cand.obj)) {
queue.push({ value: cand.obj, depth: 0 });
}
}
while (queue.length > 0 && seen.size < maxVisited) {
const node = queue.shift();
const obj = node.value;
const depth = node.depth;
if (!isObj(obj) || seen.has(obj)) {
continue;
}
seen.add(obj);
if (wantMemory && isMemoryLike(obj)) {
return obj;
}
if (!wantMemory && isTableLike(obj)) {
return obj;
}
if (depth >= maxDepth) {
continue;
}
let keys = [];
try {
keys = Object.getOwnPropertyNames(obj);
} catch (_e) {
keys = [];
}
let symKeys = [];
try {
symKeys = Object.getOwnPropertySymbols(obj);
} catch (_e) {
symKeys = [];
}
const allKeys = keys.concat(symKeys);
for (const key of allKeys) {
let value;
try {
value = obj[key];
} catch (_e) {
continue;
}
if (wantMemory && isMemoryLike(value)) {
return value;
}
if (!wantMemory && isTableLike(value)) {
return value;
}
if (isObj(value)) {
if (wantMemory && isMemoryLike(value.memory)) {
return value.memory;
}
if (!wantMemory && isTableLike(value.__indirect_function_table)) {
return value.__indirect_function_table;
}
queue.push({ value, depth: depth + 1 });
}
}
let proto = null;
try {
proto = Object.getPrototypeOf(obj);
} catch (_e) {
proto = null;
}
if (isObj(proto)) {
queue.push({ value: proto, depth: depth + 1 });
}
}
return null;
};
const captureMemoryViaGrowHook = () => {
if (
typeof WebAssembly === "undefined" ||
typeof WebAssembly.Memory === "undefined" ||
!WebAssembly.Memory.prototype ||
typeof WebAssembly.Memory.prototype.grow !== "function"
) {
return null;
}
const growMemory = resolve("growMemory");
const resizeHeap = resolve("_emscripten_resize_heap");
if (typeof growMemory !== "function" && typeof resizeHeap !== "function") {
return null;
}
const heapU8 = resolve("HEAPU8");
const currentBytes =
heapU8 && heapU8.buffer && typeof heapU8.buffer.byteLength === "number"
? heapU8.buffer.byteLength
: 0;
if (currentBytes <= 0) {
return null;
}
const onePage = 64 * 1024;
let targetBytes = currentBytes + onePage;
const getHeapMax = resolve("getHeapMax");
if (typeof getHeapMax === "function") {
try {
const maxBytes = getHeapMax();
if (typeof maxBytes === "number" && maxBytes > 0) {
targetBytes = Math.min(targetBytes, maxBytes);
}
} catch (_e) {
/* ignore */
}
}
if (targetBytes <= currentBytes) {
return null;
}
let captured = null;
const originalGrow = WebAssembly.Memory.prototype.grow;
WebAssembly.Memory.prototype.grow = function patchedGrow(pages) {
captured = this;
return originalGrow.call(this, pages);
};
try {
if (typeof growMemory === "function") {
growMemory(targetBytes);
} else if (typeof resizeHeap === "function") {
resizeHeap(targetBytes);
}
} catch (_e) {
/* best effort only */
} finally {
WebAssembly.Memory.prototype.grow = originalGrow;
}
if (captured && isMemoryLike(captured)) {
return captured;
}
return null;
};
const deriveRuntimeFromAdjustedImports = () => {
for (const cand of candidates) {
const obj = cand.obj;
if (!obj || typeof obj.adjustWasmImports !== "function") {
continue;
}
try {
const importsObj = { env: {} };
const adjustedMaybe = obj.adjustWasmImports(importsObj);
const adjusted =
adjustedMaybe && (typeof adjustedMaybe === "object" || typeof adjustedMaybe === "function")
? adjustedMaybe
: importsObj;
const env =
(adjusted && adjusted.env) ||
(importsObj && importsObj.env) ||
null;
if (!env) {
continue;
}
const mem =
env.memory ||
env.wasmMemory ||
(adjusted && (adjusted.memory || adjusted.wasmMemory)) ||
null;
const tbl =
env.__indirect_function_table ||
env.wasmTable ||
(adjusted && (adjusted.__indirect_function_table || adjusted.wasmTable)) ||
null;
if (mem || tbl) {
return { memory: mem, table: tbl };
}
} catch (_e) {
continue;
}
}
return null;
};
const adjustedRuntime = deriveRuntimeFromAdjustedImports();
const wasmMemory =
resolve("wasmMemory") ||
resolve("memory") ||
resolve("wasmMemoryObject") ||
resolve("__wasmMemory") ||
(asmObj && asmObj.memory) ||
(asmObj && asmObj.wasmMemory) ||
(wasmExports && wasmExports.memory) ||
(adjustedRuntime && adjustedRuntime.memory) ||
captureMemoryViaGrowHook() ||
findMemoryOrTableByType(true) ||
null;
const wasmTable =
resolve("wasmTable") ||
resolve("__indirect_function_table") ||
(asmObj && asmObj.__indirect_function_table) ||
(asmObj && asmObj.wasmTable) ||
(wasmExports && wasmExports.__indirect_function_table) ||
(adjustedRuntime && adjustedRuntime.table) ||
findMemoryOrTableByType(false) ||
null;
const runtime = {
HEAPF32: resolve("HEAPF32"),
HEAPF64: resolve("HEAPF64"),
HEAPU8: heapU8ForProbe,
wasmMemory,
wasmTable,
addFunction: resolve("addFunction"),
removeFunction: resolve("removeFunction"),
stackSave: resolve("stackSave"),
stackAlloc: resolve("stackAlloc"),
stackRestore: resolve("stackRestore"),
lengthBytesUTF8: resolve("lengthBytesUTF8"),
stringToUTF8: resolve("stringToUTF8"),
err: resolve("err"),
};
const required = [
"HEAPF32",
"HEAPF64",
"HEAPU8",
"wasmMemory",
"wasmTable",
"addFunction",
"removeFunction",
"stackSave",
"stackAlloc",
"stackRestore",
"lengthBytesUTF8",
"stringToUTF8",
];
const missing = required.filter((name) => !runtime[name]);
if (missing.length > 0) {
const aliasKeys = [
"wasmMemory",
"memory",
"wasmExports",
"asm",
"__indirect_function_table",
"wasmTable",
"adjustWasmImports",
];
const keyRegex = /(mem|wasm|asm|module|heap)/i;
const diag = candidates.map((cand) => {
const have = required.filter((name) => {
try {
return !!cand.obj[name];
} catch (_e) {
return false;
}
});
const aliases = aliasKeys.filter((name) => {
try {
return cand.obj[name] !== undefined && cand.obj[name] !== null;
} catch (_e) {
return false;
}
});
let ownKeys = [];
try {
ownKeys = Object.getOwnPropertyNames(cand.obj);
} catch (_e) {
ownKeys = [];
}
const interesting = ownKeys.filter((k) => keyRegex.test(k)).slice(0, 20);
return `${cand.name}=[${have.join(",")}],aliases=[${aliases.join(",")}],keys=[${interesting.join(",")}]`;
}).join(" | ");
return {
instantiatePtr: 0,
freePtr: 0,
error: `missing runtime members: ${missing.join(", ")}; candidates: ${diag}`,
};
}
if (typeof g._meJitInstantiate !== "function" || typeof g._meJitFreeFn !== "function") {
return { instantiatePtr: 0, freePtr: 0, error: "me_jit_glue exports unavailable" };
}
const refreshRuntimeViews = () => {
const updater = resolve("updateMemoryViews");
if (typeof updater === "function") {
try {
updater();
} catch (_e) {
/* best effort only */
}
runtime.HEAPU8 = resolve("HEAPU8") || runtime.HEAPU8;
runtime.HEAPF32 = resolve("HEAPF32") || runtime.HEAPF32;
runtime.HEAPF64 = resolve("HEAPF64") || runtime.HEAPF64;
}
const mem = runtime.wasmMemory;
const buffer = mem && mem.buffer ? mem.buffer : null;
if (!buffer || typeof buffer.byteLength !== "number" || buffer.byteLength === 0) {
return null;
}
const heapU8 = runtime.HEAPU8;
if (!heapU8 || heapU8.buffer !== buffer || heapU8.byteLength === 0) {
runtime.HEAPU8 = new Uint8Array(buffer);
}
const heapF32 = runtime.HEAPF32;
if (!heapF32 || heapF32.buffer !== buffer || heapF32.byteLength === 0) {
runtime.HEAPF32 = new Float32Array(buffer);
}
const heapF64 = runtime.HEAPF64;
if (!heapF64 || heapF64.buffer !== buffer || heapF64.byteLength === 0) {
runtime.HEAPF64 = new Float64Array(buffer);
}
return runtime.HEAPU8;
};
const instantiateWrapper = (wasmPtr, wasmLen, bridgeLookupFnIdx) => {
const start = wasmPtr >>> 0;
const len = wasmLen >>> 0;
if (start === 0 || len === 0) {
return 0;
}
const heapU8 = refreshRuntimeViews();
if (!heapU8) {
return 0;
}
const end = (start + len) >>> 0;
if (end > heapU8.byteLength || end < start) {
return 0;
}
const wasmBytes = new Uint8Array(len);
wasmBytes.set(heapU8.subarray(start, end));
return g._meJitInstantiate(runtime, wasmBytes, bridgeLookupFnIdx | 0) | 0;
};
const freeWrapper = (fnIdx) => {
g._meJitFreeFn(runtime, fnIdx | 0);
};
const instantiatePtr = runtime.addFunction(instantiateWrapper, "iiii");
const freePtr = runtime.addFunction(freeWrapper, "vi");
g.__blosc2_me_jit_helper_ptrs = {
instantiatePtr,
freePtr,
instantiateWrapper,
freeWrapper,
runtime,
};
return g.__blosc2_me_jit_helper_ptrs;
})()
"""
def _trace_enabled() -> bool:
value = os.environ.get("ME_DSL_TRACE", "")
return value.lower() in {"1", "true", "on", "yes"}
def _trace(message: str) -> None:
if _trace_enabled():
print(f"[blosc2.wasm-jit] {message}")
def _js_eval(js_mod, source: str):
evaluator = getattr(js_mod, "eval", None)
if evaluator is not None:
return evaluator(source)
return js_mod.globalThis.eval(source)
def _load_glue_once(js_mod) -> bool:
has_exports = _js_eval(
js_mod,
"typeof globalThis._meJitInstantiate === 'function' && "
"typeof globalThis._meJitFreeFn === 'function'",
)
if bool(has_exports):
return True
glue_path = Path(__file__).with_name("me_jit_glue.js")
try:
glue_source = glue_path.read_text(encoding="utf-8")
except OSError as exc:
_trace(f"could not read {glue_path.name}: {exc}")
return False
try:
_js_eval(js_mod, glue_source)
except Exception as exc: # pragma: no cover - pyodide-specific error path
_trace(f"failed to evaluate {glue_path.name}: {exc}")
return False
has_exports = _js_eval(
js_mod,
"typeof globalThis._meJitInstantiate === 'function' && "
"typeof globalThis._meJitFreeFn === 'function'",
)
return bool(has_exports)
def _inject_pyodide_runtime_handles(js_mod) -> None:
try:
import pyodide_js
except ImportError:
return
module_obj = None
for name in ("_module", "module", "Module"):
module_obj = getattr(pyodide_js, name, None)
if module_obj is not None:
break
if module_obj is not None:
js_mod.globalThis.__blosc2_pyodide_module = module_obj
_trace("captured pyodide_js module handle")
api_obj = getattr(pyodide_js, "_api", None)
if api_obj is not None:
js_mod.globalThis.__blosc2_pyodide_api = api_obj
_trace("captured pyodide_js API handle")
def _create_helper_ptrs(js_mod) -> tuple[int, int] | None:
try:
result = _js_eval(js_mod, _REGISTER_HELPERS_JS)
except Exception as exc: # pragma: no cover - pyodide-specific error path
_trace(f"helper setup JS failed: {exc}")
return None
try:
instantiate_ptr = int(result.instantiatePtr)
free_ptr = int(result.freePtr)
except Exception as exc: # pragma: no cover - pyodide-specific error path
_trace(f"unexpected helper setup result: {exc}")
return None
if instantiate_ptr == 0 or free_ptr == 0:
with_error = getattr(result, "error", None)
if with_error:
_trace(str(with_error))
return None
return instantiate_ptr, free_ptr
def init_wasm_jit_helpers() -> bool:
global _HELPERS_REGISTERED
if _HELPERS_REGISTERED:
return True
try:
import js
except ImportError:
return False
from . import blosc2_ext
if not hasattr(blosc2_ext, "_register_wasm_jit_helpers"):
_trace("extension does not expose _register_wasm_jit_helpers")
return False
_inject_pyodide_runtime_handles(js)
if not _load_glue_once(js):
_trace("me_jit_glue.js was not loaded")
return False
helper_ptrs = _create_helper_ptrs(js)
if helper_ptrs is None:
_trace("could not allocate addFunction helper pointers")
return False
instantiate_ptr, free_ptr = helper_ptrs
try:
blosc2_ext._register_wasm_jit_helpers(instantiate_ptr, free_ptr)
except Exception as exc: # pragma: no cover - pyodide-specific error path
_trace(f"C helper registration failed: {exc}")
return False
_HELPERS_REGISTERED = True
_trace(f"registered wasm JIT helper pointers instantiate={instantiate_ptr} free={free_ptr}")
return True

File diff suppressed because it is too large Load Diff

@ -0,0 +1,465 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
from __future__ import annotations
import os
from contextlib import contextmanager
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from collections.abc import Sequence
import numpy as np
import requests
import blosc2
from blosc2.info import InfoReporter
_subscriber_data = {
"urlbase": os.environ.get("BLOSC_C2URLBASE"),
"auth_token": "",
}
"""Caterva2 subscriber data saved by context manager."""
TIMEOUT = 15
"""Default timeout for HTTP requests."""
@contextmanager
def c2context(
*,
urlbase: (str | None) = None,
username: (str | None) = None,
password: (str | None) = None,
auth_token: (str | None) = None,
) -> None:
"""
Context manager that sets parameters in Caterva2 subscriber requests.
A parameter not specified or set to ``None`` will inherit the value from the
previous context manager, defaulting to an environment variable (see
below) if supported by that parameter. Parameters set to an empty string
will not be used in requests (without a default either).
If the subscriber requires authorization for requests, you can either
provide an `auth_token` (which you should have obtained previously from the
subscriber), or both `username` and `password` to obtain the token by
logging in to the subscriber. The token will be reused until it is explicitly
reset or requested again in a later context manager invocation.
Please note that this manager is reentrant but not safe for concurrent use.
Parameters
----------
urlbase : str | None
The base URL to be used when a C2Array instance does not have a subscriber
URL base set. If not specified, it defaults to the value of the
``BLOSC_C2URLBASE`` environment variable.
username : str | None
The username for logging in to the subscriber to obtain an authorization token.
If not specified, it defaults to the value of the ``BLOSC_C2USERNAME`` environment variable.
password : str | None
The password for logging in to the subscriber to obtain an authorization token.
If not specified, it defaults to the value of the ``BLOSC_C2PASSWORD`` environment variable.
auth_token : str | None
The authorization token to be used when a C2Array instance does not have an
authorization token set.
Yields
------
out: None
"""
global _subscriber_data
print("_subscriber_data", _subscriber_data)
# Perform login to get an authorization token.
if not auth_token:
username = username or os.environ.get("BLOSC_C2USERNAME")
password = password or os.environ.get("BLOSC_C2PASSWORD")
if username or password:
if auth_token:
raise ValueError("Either provide a username/password or an authorization token")
auth_token = login(username, password, urlbase)
try:
old_sub_data = _subscriber_data
new_sub_data = old_sub_data.copy() # inherit old values
if urlbase is not None:
new_sub_data["urlbase"] = urlbase
elif old_sub_data["urlbase"] is None:
# The variable may have gotten a value after program start.
new_sub_data["urlbase"] = os.environ.get("BLOSC_C2URLBASE")
if auth_token is not None:
new_sub_data["auth_token"] = auth_token
_subscriber_data = new_sub_data
yield
finally:
_subscriber_data = old_sub_data
def _xget(url, params=None, headers=None, auth_token=None, timeout=TIMEOUT):
auth_token = auth_token or _subscriber_data["auth_token"]
if auth_token:
headers = headers.copy() if headers else {}
headers["Cookie"] = auth_token
response = requests.get(url, params=params, headers=headers, timeout=timeout)
response.raise_for_status()
return response
def _xpost(url, json=None, auth_token=None, timeout=TIMEOUT):
auth_token = auth_token or _subscriber_data["auth_token"]
headers = {"Cookie": auth_token} if auth_token else None
response = requests.post(url, json=json, headers=headers, timeout=timeout)
response.raise_for_status()
return response.json()
def _sub_url(urlbase, path):
urlbase = urlbase or _subscriber_data["urlbase"]
if not urlbase:
raise RuntimeError("No default Caterva2 subscriber set")
return f"{urlbase}{path}" if urlbase.endswith("/") else f"{urlbase}/{path}"
def login(username, password, urlbase):
url = _sub_url(urlbase, "auth/jwt/login")
creds = {"username": username, "password": password}
resp = requests.post(url, data=creds, timeout=TIMEOUT)
resp.raise_for_status()
return "=".join(list(resp.cookies.items())[0])
def info(path, urlbase, params=None, headers=None, model=None, auth_token=None):
url = _sub_url(urlbase, f"api/info/{path}")
response = _xget(url, params, headers, auth_token)
json = response.json()
return json if model is None else model(**json)
def fetch_data(path, urlbase, params, auth_token=None, as_blosc2=False):
url = _sub_url(urlbase, f"api/fetch/{path}")
response = _xget(url, params=params, auth_token=auth_token)
data = response.content
# Try different deserialization methods
try:
data = blosc2.ndarray_from_cframe(data)
except RuntimeError:
data = blosc2.schunk_from_cframe(data)
if as_blosc2:
return data
if hasattr(data, "ndim"): # if b2nd or b2frame
# catch 0d case where [:] fails
return data[()] if data.ndim == 0 else data[:]
else:
return data[:]
def slice_to_string(slice_):
if slice_ is None or slice_ == () or slice_ == slice(None):
return ""
slice_parts = []
if not isinstance(slice_, tuple):
slice_ = (slice_,)
for index in slice_:
if isinstance(index, int):
slice_parts.append(str(index))
elif isinstance(index, slice):
start = index.start or ""
stop = index.stop or ""
if index.step not in (1, None):
raise IndexError("Only step=1 is supported")
# step = index.step or ''
slice_parts.append(f"{start}:{stop}")
return ", ".join(slice_parts)
class C2Array(blosc2.Operand):
def __init__(self, path: str, /, urlbase: str | None = None, auth_token: str | None = None):
"""Create an instance of a remote NDArray.
Remote NDArrays can be accessed via HTTP from a Caterva2 server
(e.g., https://cat2.cloud). More information about Caterva2 at:
https://ironarray.io/caterva2.
Parameters
----------
path: str
The path to the remote NDArray file (root + file path) as
a posix path.
urlbase: str
The base URL (slash-terminated) of the subscriber to query.
auth_token: str
An optional token to authorize requests via HTTP. Currently, it
will be sent as an HTTP cookie.
Returns
-------
out: C2Array
Examples
--------
>>> import blosc2
>>> urlbase = "https://cat2.cloud/demo"
>>> path = "@public/examples/dir1/ds-3d.b2nd"
>>> remote_array = blosc2.C2Array(path, urlbase=urlbase)
>>> remote_array.shape
(3, 4, 5)
>>> remote_array.chunks
(2, 3, 4)
>>> remote_array.blocks
(2, 2, 2)
>>> remote_array.dtype
dtype('float32')
"""
if path.startswith("/"):
raise ValueError("The path should start with a root name, not a slash")
self.path = path
if urlbase and not urlbase.endswith("/"):
urlbase += "/"
self.urlbase = urlbase
self.auth_token = auth_token
# Try to 'open' the remote path
try:
self.meta = info(self.path, self.urlbase, auth_token=self.auth_token)
except requests.HTTPError as err:
raise FileNotFoundError(f"Remote path not found: {path}.\nError was: {err}") from err
cparams = self.meta["schunk"]["cparams"]
# Remove "filters, meta" from cparams; this is an artifact from the server
cparams.pop("filters, meta", None)
self._cparams = blosc2.CParams(**cparams)
def __getitem__(self, slice_: int | slice | Sequence[slice]) -> np.ndarray:
"""
Get a slice of the array (returning NumPy array).
Parameters
----------
slice_ : int, slice, tuple of ints and slices, or None
The slice to fetch.
Returns
-------
out: numpy.ndarray
A numpy.ndarray containing the data slice.
Examples
--------
>>> import blosc2
>>> urlbase = "https://cat2.cloud/demo"
>>> path = "@public/examples/dir1/ds-2d.b2nd"
>>> remote_array = blosc2.C2Array(path, urlbase=urlbase)
>>> data_slice = remote_array[3:5, 1:4]
>>> data_slice.shape
(2, 3)
>>> data_slice[:]
array([[61, 62, 63],
[81, 82, 83]], dtype=uint16)
"""
slice_ = slice_to_string(slice_)
return fetch_data(
self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token, as_blosc2=False
)
def slice(self, slice_: int | slice | Sequence[slice]) -> blosc2.NDArray:
"""
Get a slice of the array (returning blosc2 NDArray array).
Parameters
----------
slice_ : int, slice, tuple of ints and slices, or None
The slice to fetch.
Returns
-------
out: blosc2.NDArray
A blosc2.NDArray containing the data slice.
Examples
--------
>>> import blosc2
>>> urlbase = "https://cat2.cloud/demo"
>>> path = "@public/examples/dir1/ds-2d.b2nd"
>>> remote_array = blosc2.C2Array(path, urlbase=urlbase)
>>> data_slice = remote_array.slice((slice(3,5), slice(1,4)))
>>> data_slice.shape
(2, 3)
>>> type(data_slice)
blosc2.ndarray.NDArray
"""
slice_ = slice_to_string(slice_)
return fetch_data(
self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token, as_blosc2=True
)
def __len__(self) -> int:
"""Returns the length of the first dimension of the array.
This is equivalent to ``self.shape[0]``.
"""
return self.shape[0]
def get_chunk(self, nchunk: int) -> bytes:
"""
Get the compressed unidimensional chunk of a :ref:`C2Array`.
Parameters
----------
nchunk: int
The index of the unidimensional chunk to retrieve.
Returns
-------
out: bytes
The requested compressed chunk.
Examples
--------
>>> import numpy as np
>>> import blosc2
>>> urlbase = "https://cat2.cloud/demo"
>>> path = "@public/examples/dir1/ds-3d.b2nd"
>>> a = blosc2.C2Array(path, urlbase)
>>> # Get the compressed chunk from array 'a' for index 0
>>> compressed_chunk = a.get_chunk(0)
>>> f"Size of chunk {0} from a: {len(compressed_chunk)} bytes"
Size of chunk 0 from a: 160 bytes
>>> # Decompress the chunk and convert it to a NumPy array
>>> decompressed_chunk = blosc2.decompress(compressed_chunk)
>>> np.frombuffer(decompressed_chunk, dtype=a.dtype)
array([ 0., 1., 5., 6., 20., 21., 25., 26., 2., 3., 7., 8., 22.,
23., 27., 28., 10., 11., 0., 0., 30., 31., 0., 0., 12., 13.,
0., 0., 32., 33., 0., 0.], dtype=float32)
"""
url = _sub_url(self.urlbase, f"api/chunk/{self.path}")
params = {"nchunk": nchunk}
response = _xget(url, params=params, auth_token=self.auth_token)
return response.content
@property
def shape(self) -> tuple[int]:
"""The shape of the remote array"""
return tuple(self.meta["shape"])
@property
def chunks(self) -> tuple[int]:
"""The chunks of the remote array"""
return tuple(self.meta["chunks"])
@property
def blocks(self) -> tuple[int]:
"""The blocks of the remote array"""
return tuple(self.meta["blocks"])
@property
def dtype(self) -> np.dtype:
"""The dtype of the remote array"""
return np.dtype(self.meta["dtype"])
@property
def cparams(self) -> blosc2.CParams:
"""The compression parameters of the remote array"""
return self._cparams
@property
def nbytes(self) -> int:
"""The number of bytes of the remote array"""
return self.meta["schunk"]["nbytes"]
@property
def cbytes(self) -> int:
"""The number of compressed bytes of the remote array"""
return self.meta["schunk"]["cbytes"]
@property
def cratio(self) -> float:
"""The compression ratio of the remote array"""
return self.meta["schunk"]["cratio"]
# TODO: Add these to SChunk model in srv_utils and then access them here
# @property
# def dparams(self) -> float:
# """The dparams of the remote array"""
# return
#
# @property
# def meta(self) -> float:
# """The meta of the remote array"""
# return
# TODO: This seems to cause problems for proxy sources (see tests/ndarray/test_proxy_c2array.py::test_open)
# @property
# def urlpath(self) -> str:
# """The URL path of the remote array"""
# return self.meta["schunk"]["urlpath"]
@property
def vlmeta(self) -> dict:
"""The variable-length metadata f the remote array"""
return self.meta["schunk"]["vlmeta"]
@property
def info(self) -> InfoReporter:
"""
Print information about this remote array.
"""
return InfoReporter(self)
@property
def info_items(self) -> list:
"""A list of tuples with the information about the remote array.
Each tuple contains the name of the attribute and its value.
"""
items = []
items += [("type", f"{self.__class__.__name__}")]
items += [("shape", self.shape)]
items += [("chunks", self.chunks)]
items += [("blocks", self.blocks)]
items += [("dtype", self.dtype)]
items += [("nbytes", self.nbytes)]
items += [("cbytes", self.cbytes)]
items += [("cratio", f"{self.cratio:.2f}")]
items += [("cparams", self.cparams)]
# items += [("dparams", self.dparams)]
return items
# TODO: Access chunksize, size, ext_chunks, etc.
# @property
# def size(self) -> int:
# """The size (in bytes) for this container."""
# return self.cbytes
# @property
# def chunksize(self) -> int:
# """NOT the same as `SChunk.chunksize <blosc2.schunk.SChunk.chunksize>`
# in case :attr:`chunks` is not multiple in
# each dimension of :attr:`blocks` (or equivalently, if :attr:`chunks` is
# not the same as :attr:`ext_chunks`).
# """
# return
@property
def blocksize(self) -> int:
"""The block size (in bytes) for the remote container."""
return self.meta["schunk"]["blocksize"]
class URLPath:
def __init__(self, path: str, /, urlbase: str | None = None, auth_token: str | None = None):
"""
Create an instance of a remote data file (aka :ref:`C2Array <C2Array>`) urlpath.
This is meant to be used in the :func:`blosc2.open` function.
The parameters are the same as for the :meth:`C2Array.__init__`.
"""
self.path = path
self.urlbase = urlbase
self.auth_token = auth_token

File diff suppressed because it is too large Load Diff

@ -0,0 +1,547 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
import os
import shutil
import tempfile
import zipfile
from collections.abc import Iterator, Set
from typing import Any
import numpy as np
import blosc2
from blosc2.c2array import C2Array
from blosc2.embed_store import EmbedStore
from blosc2.schunk import SChunk
class DictStore:
"""
Directory-based storage for compressed data using Blosc2.
Manages arrays in a directory (.b2d) or zip (.b2z) format.
Supports the following types:
- blosc2.NDArray: n-dimensional arrays. When persisted externally they
are stored as .b2nd files.
- blosc2.SChunk: super-chunks. When persisted externally they are stored
as .b2f files.
- blosc2.C2Array: columnar containers. These are always kept inside the
embedded store (never externalized).
- numpy.ndarray: converted to blosc2.NDArray on assignment.
Parameters
----------
localpath : str
Local path for the directory (".b2d") or file (".b2z"); other extensions
are not supported. If a directory is specified, it will be treated as
a Blosc2 directory format (B2DIR). If a file is specified, it
will be treated as a Blosc2 zip format (B2ZIP).
mode : str, optional
File mode ('r', 'w', 'a'). Default is 'a'.
mmap_mode : str or None, optional
Memory mapping mode for read access. For now, only ``"r"`` is supported,
and only when ``mode="r"``. Default is None.
tmpdir : str or None, optional
Temporary directory to use when working with ".b2z" files. If None,
a system temporary directory will be managed. Default is None.
cparams : dict or None, optional
Compression parameters for the internal embed store.
If None, the default Blosc2 parameters are used.
dparams : dict or None, optional
Decompression parameters for the internal embed store.
If None, the default Blosc2 parameters are used.
storage : blosc2.Storage or None, optional
Storage properties for the internal embed store.
If None, the default Blosc2 storage properties are used.
threshold : int or None, optional
Threshold (in bytes of uncompressed data) under which values are kept
in the embedded store. If None, in-memory arrays are stored in the
embedded store and on-disk arrays are stored as separate files.
C2Array objects will always be stored in the embedded store,
regardless of their size.
Examples
--------
>>> dstore = DictStore(localpath="my_dstore.b2z", mode="w")
>>> dstore["/node1"] = np.array([1, 2, 3]) # goes to embed store
>>> dstore["/node2"] = blosc2.ones(2) # goes to embed store
>>> arr_external = blosc2.arange(3, urlpath="ext_node3.b2nd", mode="w")
>>> dstore["/dir1/node3"] = arr_external # external file in dir1 (.b2nd)
>>> schunk = blosc2.SChunk(chunksize=32)
>>> schunk.append_data(b"abcd")
4
>>> dstore["/dir1/schunk1"] = schunk # externalized as .b2f if above threshold
>>> dstore.to_b2z() # persist to the zip file; external files are copied in
>>> print(sorted(dstore.keys()))
['/dir1/node3', '/dir1/schunk1', '/node1', '/node2']
>>> print(dstore["/node1"][:]))
array([1, 2, 3])
Notes
-----
- External persistence uses the following file extensions:
.b2nd for NDArray and .b2f for SChunk.
"""
def __init__(
self,
localpath: os.PathLike[Any] | str | bytes,
mode: str = "a",
tmpdir: str | None = None,
cparams: blosc2.CParams | None = None,
dparams: blosc2.DParams | None = None,
storage: blosc2.Storage | None = None,
threshold: int | None = 2**13,
*,
mmap_mode: str | None = None,
_storage_meta: dict | None = None,
):
"""
See :class:`DictStore` for full documentation of parameters.
"""
self.localpath = localpath if isinstance(localpath, (str, bytes)) else str(localpath)
if not self.localpath.endswith((".b2z", ".b2d")):
raise ValueError(f"localpath must have a .b2z or .b2d extension; you passed: {self.localpath}")
if mode not in ("r", "w", "a"):
raise ValueError("For DictStore containers, mode must be 'r', 'w', or 'a'")
if mmap_mode not in (None, "r"):
raise ValueError("For DictStore containers, mmap_mode must be None or 'r'")
if mmap_mode == "r" and mode != "r":
raise ValueError("For DictStore containers, mmap_mode='r' requires mode='r'")
self.mode = mode
self.mmap_mode = mmap_mode
self.threshold = threshold
self.cparams = cparams or blosc2.CParams()
self.dparams = dparams or blosc2.DParams()
self.storage = storage or blosc2.Storage()
if _storage_meta:
self.storage.meta = _storage_meta
else:
# Mark this storage as a b2dict object
self.storage.meta = {"b2dict": {"version": 1}}
self.offsets = {}
self.map_tree = {}
self._temp_dir_obj = None
self._setup_paths_and_dirs(tmpdir)
if self.mode == "r":
self._init_read_mode(self.dparams)
else:
self._init_write_append_mode(self.cparams, self.dparams, storage)
def _setup_paths_and_dirs(self, tmpdir: str | None):
"""Set up working directories and paths."""
self.is_zip_store = self.localpath.endswith(".b2z")
if self.is_zip_store:
if tmpdir is None:
self._temp_dir_obj = tempfile.TemporaryDirectory()
self.working_dir = self._temp_dir_obj.name
else:
self.working_dir = tmpdir
os.makedirs(tmpdir, exist_ok=True)
self.b2z_path = self.localpath
else: # .b2d
self.working_dir = self.localpath
if self.mode in ("w", "a"):
os.makedirs(self.working_dir, exist_ok=True)
self.b2z_path = self.localpath[:-4] + ".b2z"
self.estore_path = os.path.join(self.working_dir, "embed.b2e")
def _init_read_mode(self, dparams: blosc2.DParams | None = None):
"""Initialize store in read mode."""
if not os.path.exists(self.localpath):
raise FileNotFoundError(f"dir/zip file {self.localpath} does not exist.")
if self.is_zip_store:
self.offsets = self._get_zip_offsets()
if "embed.b2e" not in self.offsets:
raise FileNotFoundError("Embed file embed.b2e not found in store.")
estore_offset = self.offsets["embed.b2e"]["offset"]
schunk = blosc2.blosc2_ext.open(
self.b2z_path,
mode="r",
offset=estore_offset,
mmap_mode=self.mmap_mode,
dparams=dparams,
)
for filepath in self.offsets:
if filepath.endswith((".b2nd", ".b2f")):
key = "/" + filepath[: -5 if filepath.endswith(".b2nd") else -4]
self.map_tree[key] = filepath
else: # .b2d
if not os.path.isdir(self.localpath):
raise FileNotFoundError(f"Directory {self.localpath} does not exist for reading.")
schunk = blosc2.blosc2_ext.open(
self.estore_path,
mode="r",
offset=0,
mmap_mode=self.mmap_mode,
dparams=dparams,
)
self._update_map_tree()
self._estore = EmbedStore(_from_schunk=schunk)
self.storage.meta = self._estore.storage.meta
def _init_write_append_mode(
self,
cparams: blosc2.CParams | None,
dparams: blosc2.DParams | None,
storage: blosc2.Storage | None,
):
"""Initialize store in write/append mode."""
if self.mode == "a" and os.path.exists(self.localpath):
if self.is_zip_store:
with zipfile.ZipFile(self.localpath, "r") as zf:
zf.extractall(self.working_dir)
elif not os.path.isdir(self.working_dir):
raise FileNotFoundError(f"Directory {self.working_dir} does not exist for reading.")
self._estore = EmbedStore(
urlpath=self.estore_path,
mode=self.mode,
cparams=cparams,
dparams=dparams,
storage=storage,
meta=self.storage.meta,
)
self._update_map_tree()
def _update_map_tree(self):
# Build map_tree from .b2nd and .b2f files in working dir
for root, _, files in os.walk(self.working_dir):
for file in files:
filepath = os.path.join(root, file)
if filepath.endswith((".b2nd", ".b2f")):
# Convert filename to key: remove extension and ensure starts with /
rel_path = os.path.relpath(filepath, self.working_dir)
# Normalize path separators to forward slashes for cross-platform consistency
rel_path = rel_path.replace(os.sep, "/")
if rel_path.endswith(".b2nd"):
key = rel_path[:-5]
elif rel_path.endswith(".b2f"):
key = rel_path[:-4]
else:
continue
if not key.startswith("/"):
key = "/" + key
self.map_tree[key] = rel_path
@property
def estore(self) -> EmbedStore:
"""Access the underlying EmbedStore."""
return self._estore
def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None:
"""Add a node to the DictStore."""
if isinstance(value, np.ndarray):
value = blosc2.asarray(value, cparams=self.cparams, dparams=self.dparams)
# C2Array should always go to embed store; let estore handle it directly
if isinstance(value, C2Array):
self._estore[key] = value
return
exceeds_threshold = self.threshold is not None and value.nbytes >= self.threshold
# Consider both NDArray and SChunk external files (have urlpath)
external_file = isinstance(value, (blosc2.NDArray, SChunk)) and getattr(value, "urlpath", None)
if exceeds_threshold or (external_file and self.threshold is None):
# Choose extension based on type
ext = ".b2f" if isinstance(value, SChunk) else ".b2nd"
# Convert key to a proper file path within the tree directory
rel_key = key.lstrip("/")
dest_path = os.path.join(self.working_dir, rel_key + ext)
# Ensure the parent directory exists
parent_dir = os.path.dirname(dest_path)
if parent_dir and not os.path.exists(parent_dir):
os.makedirs(parent_dir, exist_ok=True)
# Save the value to the destination path
if not external_file:
if hasattr(value, "save"):
value.save(urlpath=dest_path)
else:
# An SChunk does not have a save() method
with open(dest_path, "wb") as f:
f.write(value.to_cframe())
else:
# This should be faster than using value.save() ?
shutil.copy2(value.urlpath, dest_path)
# Store relative path from tree directory
rel_path = os.path.relpath(dest_path, self.working_dir)
# Normalize to forward slashes
rel_path = rel_path.replace(os.sep, "/")
self.map_tree[key] = rel_path
else:
if external_file:
# Embed a copy by using cframe
value = blosc2.from_cframe(value.to_cframe())
self._estore[key] = value
def __getitem__(self, key: str) -> blosc2.NDArray | SChunk | C2Array:
"""Retrieve a node from the DictStore."""
# Check map_tree first
if key in self.map_tree:
filepath = self.map_tree[key]
if filepath in self.offsets:
offset = self.offsets[filepath]["offset"]
return blosc2.blosc2_ext.open(
self.b2z_path,
mode="r",
offset=offset,
mmap_mode=self.mmap_mode,
dparams=self.dparams,
)
else:
urlpath = os.path.join(self.working_dir, filepath)
if os.path.exists(urlpath):
return blosc2.open(
urlpath,
mode="r" if self.mode == "r" else "a",
mmap_mode=self.mmap_mode if self.mode == "r" else None,
dparams=self.dparams,
)
else:
raise KeyError(f"File for key '{key}' not found in offsets or temporary directory.")
# Fall back to EmbedStore
return self._estore[key]
def get(self, key: str, default: Any = None) -> blosc2.NDArray | SChunk | C2Array | Any:
"""Retrieve a node, or default if not found."""
try:
return self[key]
except KeyError:
return default
def __delitem__(self, key: str) -> None:
"""Remove a node from the DictStore."""
if key in self.map_tree:
# Remove from map_tree and delete the external file
filepath = self.map_tree[key]
del self.map_tree[key]
# Delete the physical file if it exists
full_path = os.path.join(self.working_dir, filepath)
if os.path.exists(full_path):
os.remove(full_path)
elif key in self._estore:
del self._estore[key]
else:
raise KeyError(f"Key '{key}' not found")
def __contains__(self, key: str) -> bool:
"""Check if a key exists."""
return key in self.map_tree or key in self._estore
def __len__(self) -> int:
"""Return number of nodes."""
return len(self.map_tree) + len(self._estore)
def __iter__(self) -> Iterator[str]:
"""Iterate over keys."""
yield from self.map_tree.keys()
for key in self._estore:
if key not in self.map_tree:
yield key
def keys(self) -> Set[str]:
"""Return all keys."""
return self.map_tree.keys() | self._estore.keys()
def values(self) -> Iterator[blosc2.NDArray | SChunk | C2Array]:
"""Iterate over all values."""
# Get all unique keys from both map_tree and _estore, with map_tree taking precedence
all_keys = set(self.map_tree.keys()) | set(self._estore.keys())
for key in all_keys:
if key in self.map_tree:
filepath = self.map_tree[key]
if self.is_zip_store:
if filepath in self.offsets:
offset = self.offsets[filepath]["offset"]
yield blosc2.blosc2_ext.open(
self.b2z_path,
mode="r",
offset=offset,
mmap_mode=self.mmap_mode,
dparams=self.dparams,
)
else:
urlpath = os.path.join(self.working_dir, filepath)
yield blosc2.open(
urlpath,
mode="r" if self.mode == "r" else "a",
mmap_mode=self.mmap_mode if self.mode == "r" else None,
dparams=self.dparams,
)
elif key in self._estore:
yield self._estore[key]
def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk | C2Array]]:
"""Iterate over (key, value) pairs."""
# Get all unique keys from both map_tree and _estore, with map_tree taking precedence
all_keys = set(self.map_tree.keys()) | set(self._estore.keys())
for key in all_keys:
# Check map_tree first, then fall back to _estore
if key in self.map_tree:
filepath = self.map_tree[key]
if self.is_zip_store:
if filepath in self.offsets:
offset = self.offsets[filepath]["offset"]
yield (
key,
blosc2.blosc2_ext.open(
self.b2z_path,
mode="r",
offset=offset,
mmap_mode=self.mmap_mode,
dparams=self.dparams,
),
)
else:
urlpath = os.path.join(self.working_dir, filepath)
yield (
key,
blosc2.open(
urlpath,
mode="r" if self.mode == "r" else "a",
mmap_mode=self.mmap_mode if self.mode == "r" else None,
dparams=self.dparams,
),
)
elif key in self._estore:
yield key, self._estore[key]
def to_b2z(self, overwrite=False, filename=None) -> os.PathLike[Any] | str:
"""
Serialize zip store contents to the b2z file.
Parameters
----------
overwrite : bool, optional
If True, overwrite the existing b2z file if it exists. Default is False.
filename : str, optional
If provided, use this filename instead of the default b2z file path.
Returns
-------
filename : str
The absolute path to the created b2z file.
"""
if self.mode == "r":
raise ValueError("Cannot call to_b2z() on a DictStore opened in read mode.")
b2z_path = self.b2z_path if filename is None else filename
if not b2z_path.endswith(".b2z"):
raise ValueError("b2z_path must have a .b2z extension")
if os.path.exists(b2z_path) and not overwrite:
raise FileExistsError(f"'{b2z_path}' already exists. Use overwrite=True to overwrite.")
# Gather all files except estore_path
filepaths = []
for root, _, files in os.walk(self.working_dir):
for file in files:
filepath = os.path.join(root, file)
if os.path.abspath(filepath) != os.path.abspath(self.estore_path):
filepaths.append(filepath)
# Sort filepaths by file size from largest to smallest
filepaths.sort(key=os.path.getsize, reverse=True)
with zipfile.ZipFile(self.b2z_path, "w", zipfile.ZIP_STORED) as zf:
# Write all files (except estore_path) first (sorted by size)
for filepath in filepaths:
arcname = os.path.relpath(filepath, self.working_dir)
zf.write(filepath, arcname)
# Write estore last
if os.path.exists(self.estore_path):
arcname = os.path.relpath(self.estore_path, self.working_dir)
zf.write(self.estore_path, arcname)
return os.path.abspath(self.b2z_path)
def _get_zip_offsets(self) -> dict[str, dict[str, int]]:
"""Get offset and length of all files in the zip archive."""
self.offsets = {} # Reset offsets
with open(self.b2z_path, "rb") as f, zipfile.ZipFile(f) as zf:
for info in zf.infolist():
# info.header_offset points to the local file header
# The actual file data starts after the header
f.seek(info.header_offset)
local_header = f.read(30)
filename_len = int.from_bytes(local_header[26:28], "little")
extra_len = int.from_bytes(local_header[28:30], "little")
data_offset = info.header_offset + 30 + filename_len + extra_len
self.offsets[info.filename] = {"offset": data_offset, "length": info.file_size}
return self.offsets
def close(self) -> None:
"""Persist changes and cleanup."""
# Repack estore
# TODO: for some reason this is not working
# if self.mode != "r":
# cframe = self._estore.to_cframe()
# with open(self._estore.urlpath, "wb") as f:
# f.write(cframe)
if self.is_zip_store and self.mode in ("w", "a"):
# Serialize to b2z file
self.to_b2z(overwrite=True)
# Clean up temporary directory if we created it
if self._temp_dir_obj is not None:
self._temp_dir_obj.cleanup()
def __enter__(self):
"""Context manager enter."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.close()
# No need to handle exceptions, just close the DictStore
return False
if __name__ == "__main__":
# Example usage
localpath = "example_dstore.b2z"
if True:
with DictStore(localpath, mode="w") as dstore:
dstore["/node1"] = np.array([1, 2, 3])
dstore["/node2"] = blosc2.ones(2)
# Make /node3 an external file
arr_external = blosc2.arange(3, urlpath="ext_node3.b2nd", mode="w")
dstore["/dir1/node3"] = arr_external
print("DictStore keys:", list(dstore.keys()))
print("Node1 data:", dstore["/node1"][:])
print("Node2 data:", dstore["/node2"][:])
print("Node3 data (external):", dstore["/dir1/node3"][:])
del dstore["/node1"]
print("After deletion, keys:", list(dstore.keys()))
# Open the stored zip file
with DictStore(localpath, mode="r") as dstore_opened:
print("Opened dstore keys:", list(dstore_opened.keys()))
for key, value in dstore_opened.items():
if isinstance(value, blosc2.NDArray):
print(
f"Key: {key}, Shape: {value.shape}, Values: {value[:10] if len(value) > 3 else value[:]}"
)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,329 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
import copy
from collections.abc import Iterator, KeysView
from typing import Any
import numpy as np
import blosc2
from blosc2.c2array import C2Array
from blosc2.schunk import SChunk
PROFILE = False # Set to True to enable PROFILE prints in EmbedStore
class EmbedStore:
"""
A dictionary-like container for storing NumPy/Blosc2 arrays (NDArray or SChunk) as nodes.
For NumPy arrays, Blosc2 NDArrays (even if they live in external ``.b2nd`` files),
and Blosc2 SChunk objects, the data is read and embedded into the store. For remote
arrays (``C2Array``), only lightweight references (URL base and path) are stored.
If you need a richer hierarchical container with optional external references, consider using
`blosc2.TreeStore` or `blosc2.DictStore`.
Parameters
----------
urlpath : str or None, optional
Path for persistent storage. Using a '.b2e' extension is recommended.
If None, the embed store will be in memory only, which can be
deserialized later using the :func:`blosc2.from_cframe` function.
mode : str, optional
File mode ('r', 'w', 'a'). Default is 'w'.
mmap_mode : str or None, optional
Memory mapping mode for read access. For now, only ``"r"`` is supported,
and only when ``mode="r"``. Default is None.
cparams : dict or None, optional
Compression parameters for nodes and the embed store itself.
Default is None, which uses the default Blosc2 parameters.
dparams : dict or None, optional
Decompression parameters for nodes and the embed store itself.
Default is None, which uses the default Blosc2 parameters.
storage : blosc2.Storage or None, optional
Storage properties for the embed store. If passed, it will override
the `urlpath` and `mode` parameters.
chunksize : int, optional
Size of chunks for the backing storage. Default is 1 MiB.
Examples
--------
>>> estore = EmbedStore(urlpath="example_estore.b2e", mode="w")
>>> estore["/node1"] = np.array([1, 2, 3])
>>> estore["/node2"] = blosc2.ones(2)
>>> estore["/node3"] = blosc2.arange(3, dtype="i4", urlpath="external_node3.b2nd", mode="w")
>>> urlpath = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo")
>>> estore["/node4"] = blosc2.open(urlpath, mode="r")
>>> print(list(estore.keys()))
['/node1', '/node2', '/node3', '/node4']
>>> print(estore["/node1"][:])
[1 2 3]
"""
def __init__(
self,
urlpath: str | None = None,
mode: str = "a",
cparams: blosc2.CParams | None = None,
dparams: blosc2.CParams | None = None,
storage: blosc2.Storage | None = None,
chunksize: int | None = 2**13,
_from_schunk: SChunk | None = None,
*,
mmap_mode: str | None = None,
meta: dict | None = None,
):
"""Initialize EmbedStore."""
# For some reason, the SChunk store cannot achieve the same compression ratio as the NDArray store,
# although it is more efficient in terms of CPU usage.
# Let's use the SChunk store by default and continue experimenting.
self._schunk_store = True # put this to False to use an NDArray instead of a SChunk
self.urlpath = urlpath
if mmap_mode not in (None, "r"):
raise ValueError("For EmbedStore containers, mmap_mode must be None or 'r'")
if mmap_mode == "r" and mode != "r":
raise ValueError("For EmbedStore containers, mmap_mode='r' requires mode='r'")
self.mmap_mode = mmap_mode
if _from_schunk is not None:
self.cparams = _from_schunk.cparams
self.dparams = _from_schunk.dparams
self.mode = mode
self._store = _from_schunk
self.storage = blosc2.Storage()
self.storage.meta = _from_schunk.meta
self._load_metadata()
return
self.mode = mode
self.cparams = cparams or blosc2.CParams()
# self.cparams.nthreads = 1 # for debugging purposes, use only one thread
self.dparams = dparams or blosc2.DParams()
# self.dparams.nthreads = 1 # for debugging purposes, use only one thread
if storage is None:
self.storage = blosc2.Storage(
contiguous=True,
urlpath=urlpath,
mode=mode,
)
else:
self.storage = storage
if mode in ("r", "a") and urlpath:
self._store = blosc2.blosc2_ext.open(urlpath, mode=mode, offset=0, mmap_mode=mmap_mode)
self.storage.meta = self._store.meta
self._load_metadata()
return
_cparams = copy.deepcopy(self.cparams)
_cparams.typesize = 1 # ensure typesize is set to 1 for byte storage
_storage = self.storage
_storage.meta = meta if meta is not None else {"b2embed": {"version": 1}}
if self._schunk_store:
self._store = blosc2.SChunk(
chunksize=chunksize,
data=None,
cparams=_cparams,
dparams=self.dparams,
storage=_storage,
)
else:
self._store = blosc2.zeros(
chunksize,
dtype=np.uint8,
cparams=_cparams,
dparams=self.dparams,
storage=_storage,
)
self._embed_map: dict = {}
self._current_offset = 0
def _validate_key(self, key: str) -> None:
"""Validate node key."""
if not isinstance(key, str):
raise TypeError("Key must be a string.")
if not key.startswith("/"):
raise ValueError("Key must start with '/'.")
if len(key) > 1 and key.endswith("/"):
raise ValueError("Key cannot end with '/' unless it is the root key '/'.")
if "//" in key:
raise ValueError("Key cannot contain consecutive slashes '//'.")
for char in (":", "\0", "\n", "\r", "\t"):
if char in key:
raise ValueError(f"Key cannot contain character: {char!r}")
if key in self._embed_map:
raise ValueError(f"Key '{key}' already exists in store.")
def _ensure_capacity(self, needed_bytes: int) -> None:
"""Ensure backing storage has enough capacity."""
required_size = self._current_offset + needed_bytes
if required_size > self._store.shape[0]:
new_size = max(required_size, int(self._store.shape[0] * 1.5))
self._store.resize((new_size,))
def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None:
"""Add a node to the embed store."""
if self.mode == "r":
raise ValueError("Cannot set items in read-only mode.")
self._validate_key(key)
if isinstance(value, C2Array):
self._embed_map[key] = {"urlbase": value.urlbase, "path": value.path}
else:
if isinstance(value, np.ndarray):
value = blosc2.asarray(value, cparams=self.cparams, dparams=self.dparams)
serialized_data = value.to_cframe()
data_len = len(serialized_data)
if not self._schunk_store:
self._ensure_capacity(data_len)
offset = self._current_offset
if self._schunk_store:
self._store[offset : offset + data_len] = serialized_data
else:
self._store[offset : offset + data_len] = np.frombuffer(serialized_data, dtype=np.uint8)
self._current_offset += data_len
self._embed_map[key] = {"offset": offset, "length": data_len}
self._save_metadata()
def __getitem__(self, key: str) -> blosc2.NDArray | SChunk:
"""Retrieve a node from the embed store."""
if key not in self._embed_map:
raise KeyError(f"Key '{key}' not found in the embed store.")
node_info = self._embed_map[key]
urlbase = node_info.get("urlbase", None)
if urlbase:
urlpath = blosc2.URLPath(node_info["path"], urlbase=urlbase)
return blosc2.open(urlpath, mode="r")
offset = node_info["offset"]
length = node_info["length"]
serialized_data = bytes(self._store[offset : offset + length])
# It is safer to copy data here, as the reference to the SChunk may disappear
# Use from_cframe so we can deserialize either an NDArray or an SChunk
return blosc2.from_cframe(serialized_data, copy=True)
def get(self, key: str, default: Any = None) -> blosc2.NDArray | SChunk | Any:
"""Retrieve a node, or default if not found."""
return self[key] if key in self._embed_map else default
def __delitem__(self, key: str) -> None:
"""Remove a node from the embed store."""
if key not in self._embed_map:
raise KeyError(f"Key '{key}' not found in the embed store.")
del self._embed_map[key]
self._save_metadata()
def __contains__(self, key: str) -> bool:
"""Check if a key exists."""
return key in self._embed_map
def __len__(self) -> int:
"""Return number of nodes."""
return len(self._embed_map)
def __iter__(self) -> Iterator[str]:
"""Iterate over keys."""
return iter(self._embed_map)
def keys(self) -> KeysView[str]:
"""Return all keys."""
return self._embed_map.keys()
def values(self) -> Iterator[blosc2.NDArray | SChunk]:
"""Iterate over all values."""
for key in self._embed_map:
yield self[key]
def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk]]:
"""Iterate over (key, value) pairs."""
for key in self._embed_map:
yield key, self[key]
def _save_metadata(self) -> None:
"""Save embed store map to vlmeta."""
metadata = {"embed_map": self._embed_map, "current_offset": self._current_offset}
self._store.vlmeta["estore_metadata"] = metadata
def _load_metadata(self) -> None:
"""Load embed store map from vlmeta."""
if "estore_metadata" in self._store.vlmeta:
metadata = self._store.vlmeta["estore_metadata"]
self._embed_map = metadata["embed_map"]
self._current_offset = metadata["current_offset"]
else:
self._embed_map = {}
self._current_offset = 0
def to_cframe(self) -> bytes:
"""Serialize embed store to CFrame format."""
return self._store.to_cframe()
def __enter__(self):
"""Context manager enter."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
# No need to close anything as SChunk/NDArray handles persistence automatically
return False
def estore_from_cframe(cframe: bytes, copy: bool = False) -> EmbedStore:
"""
Deserialize a CFrame to an EmbedStore object.
Parameters
----------
cframe : bytes
CFrame data to deserialize.
copy : bool, optional
If True, copy the data. Default is False.
Returns
-------
estore : EmbedStore
The deserialized EmbedStore object.
"""
schunk = blosc2.schunk_from_cframe(cframe, copy=copy)
return EmbedStore(_from_schunk=schunk)
if __name__ == "__main__":
# Example usage
persistent = False
if persistent:
estore = EmbedStore(urlpath="example_estore.b2e", mode="w") # , cparams=blosc2.CParams(clevel=0))
else:
estore = EmbedStore() # , cparams=blosc2.CParams(clevel=0))
# import pdb; pdb.set_trace()
estore["/node1"] = np.array([1, 2, 3])
estore["/node2"] = blosc2.ones(2)
urlpath = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo")
arr_remote = blosc2.open(urlpath, mode="r")
estore["/dir1/node3"] = arr_remote
print("EmbedStore keys:", list(estore.keys()))
print("Node1 data:", estore["/node1"][:])
print("Node2 data:", estore["/node2"][:])
print("Node3 data (remote):", estore["/dir1/node3"][:3])
del estore["/node1"]
print("After deletion, keys:", list(estore.keys()))
# Reading back the estore
if persistent:
estore_read = EmbedStore(urlpath="example_estore.b2e", mode="r")
else:
estore_read = blosc2.from_cframe(estore.to_cframe())
print("Read keys:", list(estore_read.keys()))
for key, value in estore_read.items():
print(
f"shape of {key}: {value.shape}, dtype: {value.dtype}, map: {estore_read._embed_map[key]}, "
f"values: {value[:10] if len(value) > 3 else value[:]}"
)

@ -0,0 +1,15 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
class MissingOperands(ValueError):
def __init__(self, expr, missing_ops):
self.expr = expr
self.missing_ops = missing_ops
message = f'Lazy expression "{expr}" with missing operands: {missing_ops}'
super().__init__(message)

@ -0,0 +1,62 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
def fft():
raise NotImplementedError
def ifft():
raise NotImplementedError
def fftn():
raise NotImplementedError
def ifftn():
raise NotImplementedError
def rfft():
raise NotImplementedError
def irfft():
raise NotImplementedError
def rfftn():
raise NotImplementedError
def irfftn():
raise NotImplementedError
def hfft():
raise NotImplementedError
def ihfft():
raise NotImplementedError
def fftfreq():
raise NotImplementedError
def rfftfreq():
raise NotImplementedError
def fftshift():
raise NotImplementedError
def ifftshift():
raise NotImplementedError

@ -0,0 +1,675 @@
/*********************************************************************
Blosc - Blocked Shuffling and Compression Library
Copyright (c) 2021 Blosc Development Team <blosc@blosc.org>
https://blosc.org
License: BSD 3-Clause (see LICENSE.txt)
See LICENSE.txt for details about copyright and rights to use.
**********************************************************************/
/** @file b2nd.h
* @brief Blosc2 NDim header file.
*
* This file contains Blosc2 NDim public API and the structures needed to use it.
* @author Blosc Development Team <blosc@blosc.org>
*/
#ifndef BLOSC_B2ND_H
#define BLOSC_B2ND_H
#ifdef __cplusplus
extern "C" {
#endif
#include "blosc2/blosc2-export.h"
#ifdef __cplusplus
}
#endif
#include "blosc2.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined(_MSC_VER)
#define B2ND_DEPRECATED(msg) __declspec(deprecated(msg))
#elif defined(__GNUC__) || defined(__clang__)
#define B2ND_DEPRECATED(msg) __attribute__((deprecated(msg)))
#else
#define B2ND_DEPRECATED(msg)
#endif
/* The version for metalayer format; starts from 0 and it must not exceed 127 */
#define B2ND_METALAYER_VERSION 0
/* The maximum number of dimensions for b2nd arrays */
#define B2ND_MAX_DIM 16
/* The maximum number of metalayers for b2nd arrays */
#define B2ND_MAX_METALAYERS (BLOSC2_MAX_METALAYERS - 1)
/* NumPy dtype format
* https://numpy.org/doc/stable/reference/arrays.dtypes.html#arrays-dtypes-constructing
*/
#define DTYPE_NUMPY_FORMAT 0
/* The default data type */
#define B2ND_DEFAULT_DTYPE "|u1"
/* The default data format */
#define B2ND_DEFAULT_DTYPE_FORMAT DTYPE_NUMPY_FORMAT
/**
* @brief An *optional* cache for a single block.
*
* When a chunk is needed, it is copied into this cache. In this way, if the same chunk is needed
* again afterwards, it is not necessary to recover it because it is already in the cache.
*/
struct chunk_cache_s {
uint8_t *data;
//!< The chunk data.
int64_t nchunk;
//!< The chunk number in cache. If @p nchunk equals to -1, it means that the cache is empty.
};
/**
* @brief General parameters needed for the creation of a b2nd array.
*/
typedef struct b2nd_context_s b2nd_context_t; /* opaque type */
/**
* @brief A multidimensional array of data that can be compressed.
*/
typedef struct {
blosc2_schunk *sc;
//!< Pointer to a Blosc super-chunk
int64_t shape[B2ND_MAX_DIM];
//!< Shape of original data.
int32_t chunkshape[B2ND_MAX_DIM];
//!< Shape of each chunk.
int64_t extshape[B2ND_MAX_DIM];
//!< Shape of padded data.
int32_t blockshape[B2ND_MAX_DIM];
//!< Shape of each block.
int64_t extchunkshape[B2ND_MAX_DIM];
//!< Shape of padded chunk.
int64_t nitems;
//!< Number of items in original data.
int32_t chunknitems;
//!< Number of items in each chunk.
int64_t extnitems;
//!< Number of items in padded data.
int32_t blocknitems;
//!< Number of items in each block.
int64_t extchunknitems;
//!< Number of items in a padded chunk.
int8_t ndim;
//!< Data dimensions.
struct chunk_cache_s chunk_cache;
//!< A partition cache.
int64_t item_array_strides[B2ND_MAX_DIM];
//!< Item - shape strides.
int64_t item_chunk_strides[B2ND_MAX_DIM];
//!< Item - shape strides.
int64_t item_extchunk_strides[B2ND_MAX_DIM];
//!< Item - shape strides.
int64_t item_block_strides[B2ND_MAX_DIM];
//!< Item - shape strides.
int64_t block_chunk_strides[B2ND_MAX_DIM];
//!< Item - shape strides.
int64_t chunk_array_strides[B2ND_MAX_DIM];
//!< Item - shape strides.
char *dtype;
//!< Data type. Different formats can be supported (see dtype_format).
int8_t dtype_format;
//!< The format of the data type. Default is DTYPE_NUMPY_FORMAT.
} b2nd_array_t;
/**
* @brief Create b2nd params.
*
* @param b2_storage The Blosc2 storage params.
* @param ndim The dimensions.
* @param shape The shape.
* @param chunkshape The chunk shape.
* @param blockshape The block shape.
* @param dtype The data type expressed as a string version.
* @param dtype_format The data type format; DTYPE_NUMPY_FORMAT should be chosen for NumPy compatibility.
* @param metalayers The memory pointer to the list of the metalayers desired.
* @param nmetalayers The number of metalayers.
*
* @return A pointer to the new b2nd params. NULL is returned if this fails.
*
* @note The pointer returned must be freed when not used anymore with #b2nd_free_ctx.
*
*/
BLOSC_EXPORT b2nd_context_t *
b2nd_create_ctx(const blosc2_storage *b2_storage, int8_t ndim, const int64_t *shape, const int32_t *chunkshape,
const int32_t *blockshape, const char *dtype, int8_t dtype_format, const blosc2_metalayer *metalayers,
int32_t nmetalayers);
/**
* @brief Free the resources associated with b2nd_context_t.
*
* @param ctx The b2nd context to free.
*
* @return An error code.
*
* @note This is safe in the sense that it will not free the schunk pointer in internal cparams.
*
*/
BLOSC_EXPORT int b2nd_free_ctx(b2nd_context_t *ctx);
/**
* @brief Create an uninitialized array.
*
* @param ctx The b2nd context for the new array.
* @param array The memory pointer where the array will be created.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_uninit(b2nd_context_t *ctx, b2nd_array_t **array);
/**
* @brief Create an empty array.
*
* @param ctx The b2nd context for the new array.
* @param array The memory pointer where the array will be created.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_empty(b2nd_context_t *ctx, b2nd_array_t **array);
/**
* Create an array, with zero being used as the default value for
* uninitialized portions of the array.
*
* @param ctx The b2nd context for the new array.
* @param array The memory pointer where the array will be created.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_zeros(b2nd_context_t *ctx, b2nd_array_t **array);
/**
* Create an array, with NaN being used as the default value for
* uninitialized portions of the array. Should only be used with type sizes
* of either 4 or 8. Other sizes generate an error.
*
* @param ctx The b2nd context for the new array.
* @param array The memory pointer where the array will be created.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_nans(b2nd_context_t *ctx, b2nd_array_t **array);
/**
* Create an array, with @p fill_value being used as the default value for
* uninitialized portions of the array.
*
* @param ctx The b2nd context for the new array.
* @param array The memory pointer where the array will be created.
* @param fill_value Default value for uninitialized portions of the array.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_full(b2nd_context_t *ctx, b2nd_array_t **array, const void *fill_value);
/**
* @brief Free an array.
*
* @param array The array.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_free(b2nd_array_t *array);
/**
* @brief Create a b2nd array from a super-chunk. It can only be used if the array
* is backed by a blosc super-chunk.
*
* @param schunk The blosc super-chunk where the b2nd array is stored.
* @param array The memory pointer where the array will be created.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_from_schunk(blosc2_schunk *schunk, b2nd_array_t **array);
/**
* Create a serialized super-chunk from a b2nd array.
*
* @param array The b2nd array to be serialized.
* @param cframe The pointer of the buffer where the in-memory array will be copied.
* @param cframe_len The length of the in-memory array buffer.
* @param needs_free Whether the buffer should be freed or not.
*
* @return An error code
*/
BLOSC_EXPORT int b2nd_to_cframe(const b2nd_array_t *array, uint8_t **cframe,
int64_t *cframe_len, bool *needs_free);
/**
* @brief Create a b2nd array from a serialized super-chunk.
*
* @param cframe The buffer of the in-memory array.
* @param cframe_len The size (in bytes) of the in-memory array.
* @param copy Whether b2nd should make a copy of the cframe data or not. The copy will be made to an internal sparse frame.
* @param array The memory pointer where the array will be created.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_from_cframe(uint8_t *cframe, int64_t cframe_len, bool copy, b2nd_array_t **array);
/**
* @brief Open a b2nd array from a file.
*
* @param urlpath The path of the b2nd array on disk.
* @param array The memory pointer where the array info will be stored.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_open(const char *urlpath, b2nd_array_t **array);
/**
* @brief Open a b2nd array from a file using an offset.
*
* @param urlpath The path of the b2nd array on disk.
* @param array The memory pointer where the array info will be stored.
* @param offset The offset in the file where the b2nd array frame starts.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_open_offset(const char *urlpath, b2nd_array_t **array, int64_t offset);
/**
* @brief Save b2nd array into a specific urlpath.
*
* @param array The array to be saved.
* @param urlpath The urlpath where the array will be stored.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_save(const b2nd_array_t *array, char *urlpath);
/**
* @brief Append a b2nd array into a file.
*
* @param array The array to write.
* @param urlpath The path for persistent storage.
*
* @return If successful, return the offset where @p array has been appended in @p urlpath.
* Else, a negative value.
*/
BLOSC_EXPORT int64_t b2nd_save_append(const b2nd_array_t *array, const char *urlpath);
/**
* @brief Create a b2nd array from a C buffer.
*
* @param ctx The b2nd context for the new array.
* @param array The memory pointer where the array will be created.
* @param buffer The buffer where source data is stored.
* @param buffersize The size (in bytes) of the buffer.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_from_cbuffer(b2nd_context_t *ctx, b2nd_array_t **array, const void *buffer, int64_t buffersize);
/**
* @brief Extract the data from a b2nd array into a C buffer.
*
* @param array The b2nd array.
* @param buffer The buffer where the data will be stored.
* @param buffersize Size (in bytes) of the buffer.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_to_cbuffer(const b2nd_array_t *array, void *buffer, int64_t buffersize);
/**
* @brief Get a slice from an array and store it into a new array.
*
* @param ctx The b2nd context for the new array.
* @param array The memory pointer where the array will be created.
* @param src The array from which the slice will be extracted
* @param start The coordinates where the slice will begin.
* @param stop The coordinates where the slice will end.
*
* @return An error code.
*
* @note The ndim and shape from ctx will be overwritten by the src and stop-start respectively.
*
*/
BLOSC_EXPORT int b2nd_get_slice(b2nd_context_t *ctx, b2nd_array_t **array, const b2nd_array_t *src,
const int64_t *start, const int64_t *stop);
/**
* @brief Squeeze a b2nd array
*
* This function remove selected single-dimensional entries from the shape of a
b2nd array.
*
* @param array The b2nd array.
* @param view The memory pointer where the new view will be created.
* @param index Indexes of the single-dimensional entries to remove.
*
* @return An error code
*/
BLOSC_EXPORT int b2nd_squeeze_index(b2nd_array_t *array, b2nd_array_t **view, const bool *index);
/**
* @brief Squeeze a b2nd array
*
* This function remove single-dimensional entries from the shape of a b2nd array.
*
* @param array The b2nd array.
* @param view The memory pointer where the new view will be created.
*
* @return An error code
*/
BLOSC_EXPORT int b2nd_squeeze(b2nd_array_t *array, b2nd_array_t **view);
/**
* @brief Add a newaxis to a b2nd array at location @p axis.
*
* @param array The b2nd array to be expanded.
* @param axis The axes where the new dimensions will be added.
* @param view The memory pointer where the new view will be created.
* @param final_dims The final number of dimensions. Should be same as the number of elements in @p axis.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_expand_dims(const b2nd_array_t *array, b2nd_array_t **view, const bool *axis,
const uint8_t final_dims);
/**
* @brief Get a slice from an array and store it into a C buffer.
*
* @param array The array from which the slice will be extracted.
* @param start The coordinates where the slice will begin.
* @param stop The coordinates where the slice will end.
* @param buffershape The shape of the buffer.
* @param buffer The buffer where the data will be stored.
* @param buffersize The size (in bytes) of the buffer.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_get_slice_cbuffer(const b2nd_array_t *array, const int64_t *start, const int64_t *stop,
void *buffer, const int64_t *buffershape, int64_t buffersize);
/**
* @brief Set a slice in a b2nd array using a C buffer.
*
* @param buffer The buffer where the slice data is.
* @param buffershape The shape of the buffer.
* @param buffersize The size (in bytes) of the buffer.
* @param start The coordinates where the slice will begin.
* @param stop The coordinates where the slice will end.
* @param array The b2nd array where the slice will be set
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_set_slice_cbuffer(const void *buffer, const int64_t *buffershape, int64_t buffersize,
const int64_t *start, const int64_t *stop, b2nd_array_t *array);
/**
* @brief Make a copy of the array data. The copy is done into a new b2nd array.
*
* @param ctx The b2nd context for the new array.
* @param src The array from which data is copied.
* @param array The memory pointer where the array will be created.
*
* @return An error code
*
* @note The ndim and shape in ctx will be overwritten by the src ctx.
*
*/
BLOSC_EXPORT int b2nd_copy(b2nd_context_t *ctx, const b2nd_array_t *src, b2nd_array_t **array);
/**
* @brief Concatenate arrays. The result is stored in a new b2nd array, or an enlarged one.
*
* @param ctx The b2nd context for the new array.
* @param src1 The first array from which data is copied.
* @param src2 The second array from which data is copied.
* @param axis The axis along which the arrays will be concatenated.
* @param copy Whether the data should be copied or not. If false, the @p src1 array
* will be expanded as needed to keep the result.
* @param array The memory pointer where the array will be created. It will have the same
* metalayers of @p src1, except for the b2nd metalayer, which will be updated with the
* new shape.
*
* @ note The two arrays must have the same shape in all dimensions except the concatenation axis.
* Also, the typesize of the two arrays must be the same.
*
* @return An error code
*
* @note The ndim and shape in ctx will be overwritten by the src1 ctx.
*
*/
BLOSC_EXPORT int b2nd_concatenate(b2nd_context_t *ctx, const b2nd_array_t *src1, const b2nd_array_t *src2,
int8_t axis, bool copy, b2nd_array_t **array);
/**
* @brief Print metalayer parameters.
*
* @param array The array where the metalayer is stored.
*
* @return An error code
*/
BLOSC_EXPORT int b2nd_print_meta(const b2nd_array_t *array);
/**
* @brief Resize the shape of an array
*
* @param array The array to be resized.
* @param new_shape The new shape from the array.
* @param start The position in which the array will be extended or shrunk.
*
* @return An error code
*/
BLOSC_EXPORT int b2nd_resize(b2nd_array_t *array, const int64_t *new_shape, const int64_t *start);
/**
* @brief Insert given buffer in an array extending the given axis.
*
* @param array The array to insert the data in.
* @param buffer The buffer data to be inserted.
* @param buffersize The size (in bytes) of the buffer.
* @param axis The axis that will be extended.
* @param insert_start The position inside the axis to start inserting the data.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_insert(b2nd_array_t *array, const void *buffer, int64_t buffersize,
int8_t axis, int64_t insert_start);
/**
* Append a buffer at the end of a b2nd array.
*
* @param array The array to append the data in.
* @param buffer The buffer data to be appended.
* @param buffersize Size (in bytes) of the buffer.
* @param axis The axis that will be extended to append the data.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_append(b2nd_array_t *array, const void *buffer, int64_t buffersize,
int8_t axis);
/**
* @brief Delete shrinking the given axis delete_len items.
*
* @param array The array to shrink.
* @param axis The axis to shrink.
* @param delete_start The start position from the axis to start deleting chunks.
* @param delete_len The number of items to delete to the array->shape[axis].
* The newshape[axis] will be the old array->shape[axis] - delete_len
*
* @return An error code.
*
* @note See also b2nd_resize
*/
BLOSC_EXPORT int b2nd_delete(b2nd_array_t *array, int8_t axis,
int64_t delete_start, int64_t delete_len);
// Indexing section
/**
* @brief Get an element selection along each dimension of an array independently.
*
* @param array The array to get the data from.
* @param selection The elements along each dimension.
* @param selection_size The size of the selection along each dimension.
* @param buffer The buffer for getting the data.
* @param buffershape The shape of the buffer.
* @param buffersize The buffer size (in bytes).
*
* @return An error code.
*
* @note See also b2nd_set_orthogonal_selection.
*/
BLOSC_EXPORT int b2nd_get_orthogonal_selection(const b2nd_array_t *array, int64_t **selection,
int64_t *selection_size, void *buffer,
int64_t *buffershape, int64_t buffersize);
/**
* @brief Set an element selection along each dimension of an array independently.
*
* @param array The array to set the data to.
* @param selection The elements along each dimension.
* @param selection_size The size of the selection along each dimension.
* @param buffer The buffer with the data for setting.
* @param buffershape The shape of the buffer.
* @param buffersize The buffer size (in bytes).
*
* @return An error code.
*
* @note See also b2nd_get_orthogonal_selection.
*/
BLOSC_EXPORT int b2nd_set_orthogonal_selection(b2nd_array_t *array, int64_t **selection,
int64_t *selection_size, const void *buffer,
int64_t *buffershape, int64_t buffersize);
/**
* @brief Create the metainfo for the b2nd metalayer.
*
* @param ndim The number of dimensions in the array.
* @param shape The shape of the array.
* @param chunkshape The shape of the chunks in the array.
* @param blockshape The shape of the blocks in the array.
* @param dtype A string representation of the data type of the array.
* @param dtype_format The format of the dtype representation. 0 means NumPy.
* @param smeta The msgpack buffer (output).
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_serialize_meta(int8_t ndim, const int64_t *shape, const int32_t *chunkshape,
const int32_t *blockshape, const char *dtype,
int8_t dtype_format, uint8_t **smeta);
/**
* @brief Read the metainfo in the b2nd metalayer.
*
* @param smeta The msgpack buffer (input).
* @param smeta_len The length of the smeta buffer (input).
* @param ndim The number of dimensions in the array (output).
* @param shape The shape of the array (output).
* @param chunkshape The shape of the chunks in the array (output).
* @param blockshape The shape of the blocks in the array (output).
* @param dtype A string representation of the data type of the array (output).
* @param dtype_format The format of the dtype representation (output). 0 means NumPy (the default).
*
* @note This function is inlined and available even when not linking with libblosc2.
*
* @return An error code.
*/
BLOSC_EXPORT int b2nd_deserialize_meta(const uint8_t *smeta, int32_t smeta_len, int8_t *ndim, int64_t *shape,
int32_t *chunkshape, int32_t *blockshape, char **dtype, int8_t *dtype_format);
// Utilities for C buffers representing multidimensional arrays
/**
* @brief Copy a slice of a source array into another array. The arrays have
* the same number of dimensions (though their shapes may differ), the same
* item size, and they are stored as C buffers with contiguous data (any
* padding is considered part of the array).
*
* @param ndim The number of dimensions in both arrays.
* @param itemsize The size of the individual data item in both arrays.
* @param src The buffer for getting the data from the source array.
* @param src_pad_shape The shape of the source array, including padding.
* @param src_start The source coordinates where the slice will begin.
* @param src_stop The source coordinates where the slice will end.
* @param dst The buffer for setting the data into the destination array.
* @param dst_pad_shape The shape of the destination array, including padding.
* @param dst_start The destination coordinates where the slice will be placed.
*
* @return An error code.
*
* @note This is kept for backward compatibility with existing code out there. New code should use
* b2nd_copy_buffer2 instead.
*
* @note Please make sure that slice boundaries fit within the source and
* destination arrays before using this function, as it does not perform these
* checks itself.
*/
B2ND_DEPRECATED("Use b2nd_copy_buffer2 instead.")
BLOSC_EXPORT int b2nd_copy_buffer(int8_t ndim,
uint8_t itemsize,
const void *src, const int64_t *src_pad_shape,
const int64_t *src_start, const int64_t *src_stop,
void *dst, const int64_t *dst_pad_shape,
const int64_t *dst_start);
/**
* @brief Copy a slice of a source array into another array. The arrays have
* the same number of dimensions (though their shapes may differ), the same
* item size, and they are stored as C buffers with contiguous data (any
* padding is considered part of the array).
*
* @param ndim The number of dimensions in both arrays.
* @param itemsize The size of the individual data item in both arrays.
* @param src The buffer for getting the data from the source array.
* @param src_pad_shape The shape of the source array, including padding.
* @param src_start The source coordinates where the slice will begin.
* @param src_stop The source coordinates where the slice will end.
* @param dst The buffer for setting the data into the destination array.
* @param dst_pad_shape The shape of the destination array, including padding.
* @param dst_start The destination coordinates where the slice will be placed.
*
* @return An error code.
*
* @note This is a version of (now deprecated) b2nd_copy_buffer() that uses
* signed 32-bit integers for copying data. This is useful when data is stored
* in a buffer that uses itemsizes that are larger than 255 bytes.
*
* @note Please make sure that slice boundaries fit within the source and
* destination arrays before using this function, as it does not perform these
* checks itself.
*/
BLOSC_EXPORT int b2nd_copy_buffer2(int8_t ndim,
int32_t itemsize,
const void *src, const int64_t *src_pad_shape,
const int64_t *src_start, const int64_t *src_stop,
void *dst, const int64_t *dst_pad_shape,
const int64_t *dst_start);
#ifdef __cplusplus
}
#endif
#endif /* BLOSC_B2ND_H */

File diff suppressed because it is too large Load Diff

@ -0,0 +1,80 @@
/*********************************************************************
Blosc - Blocked Shuffling and Compression Library
Copyright (c) 2021 Blosc Development Team <blosc@blosc.org>
https://blosc.org
License: BSD 3-Clause (see LICENSE.txt)
See LICENSE.txt for details about copyright and rights to use.
**********************************************************************/
#ifndef BLOSC_BLOSC2_BLOSC2_COMMON_H
#define BLOSC_BLOSC2_BLOSC2_COMMON_H
#include "blosc2-export.h"
#include <stdint.h>
#include <string.h>
// For shutting up stupid compiler warning about some 'unused' variables in GCC
#ifdef __GNUC__
#define BLOSC_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
#define BLOSC_UNUSED_VAR __attribute__ ((unused))
#else
#define BLOSC_UNUSED_VAR
#endif // __GNUC__
// For shutting up compiler warning about unused parameters
#define BLOSC_UNUSED_PARAM(x) ((void)(x))
/* Use inlined functions for supported systems */
#if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */
#define inline __inline /* Visual C is not C99, but supports some kind of inline */
#endif
/* Define the __SSE2__ symbol if compiling with Visual C++ and
targeting the minimum architecture level supporting SSE2.
Other compilers define this as expected and emit warnings
when it is re-defined. */
#if !defined(__SSE2__) && defined(_MSC_VER) && \
(defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2))
#define __SSE2__
#endif
/*
* Detect if the architecture is fine with unaligned access.
*/
#if !defined(BLOSC_STRICT_ALIGN)
#define BLOSC_STRICT_ALIGN
#if defined(__i386__) || defined(__386) || defined (__amd64) /* GNU C, Sun Studio */
#undef BLOSC_STRICT_ALIGN
#elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */
#undef BLOSC_STRICT_ALIGN
#elif defined(_M_IX86) || defined(_M_X64) /* Intel, MSVC */
#undef BLOSC_STRICT_ALIGN
#elif defined(__386)
#undef BLOSC_STRICT_ALIGN
#elif defined(_X86_) /* MinGW */
#undef BLOSC_STRICT_ALIGN
#elif defined(__I86__) /* Digital Mars */
#undef BLOSC_STRICT_ALIGN
/* Modern ARM systems (like ARM64) should support unaligned access
quite efficiently. */
#elif defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM64_ARCH_8__)
#undef BLOSC_STRICT_ALIGN
#elif defined(_ARCH_PPC) || defined(__PPC__)
/* Modern PowerPC systems (like POWER8) should support unaligned access
quite efficiently. */
#undef BLOSC_STRICT_ALIGN
#endif
#endif
#if defined(__SSE2__)
#include <emmintrin.h>
#endif
#if defined(__AVX2__) || defined(__AVX512F__) || defined (__AVX512BW__)
#include <immintrin.h>
#endif
#endif /* BLOSC_BLOSC2_BLOSC2_COMMON_H */

@ -0,0 +1,48 @@
/*********************************************************************
Blosc - Blocked Shuffling and Compression Library
Copyright (c) 2021 Blosc Development Team <blosc@blosc.org>
https://blosc.org
License: BSD 3-Clause (see LICENSE.txt)
See LICENSE.txt for details about copyright and rights to use.
**********************************************************************/
#ifndef BLOSC_BLOSC2_BLOSC2_EXPORT_H
#define BLOSC_BLOSC2_BLOSC2_EXPORT_H
/* Macros for specifying exported symbols.
BLOSC_EXPORT is used to decorate symbols that should be
exported by the blosc shared library.
BLOSC_NO_EXPORT is used to decorate symbols that should NOT
be exported by the blosc shared library.
*/
#if defined(BLOSC_SHARED_LIBRARY)
#if defined(_MSC_VER)
#define BLOSC_EXPORT __declspec(dllexport)
#elif (defined(__GNUC__) && __GNUC__ >= 4) || defined(__clang__)
#if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__)
#define BLOSC_EXPORT __attribute__((dllexport))
#else
#define BLOSC_EXPORT __attribute__((visibility("default")))
#endif /* defined(_WIN32) || defined(__CYGWIN__) */
#else
#error Cannot determine how to define BLOSC_EXPORT for this compiler.
#endif
#else
#define BLOSC_EXPORT
#endif /* defined(BLOSC_SHARED_LIBRARY) */
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MINGW32__)
#define BLOSC_NO_EXPORT __attribute__((visibility("hidden")))
#else
#define BLOSC_NO_EXPORT
#endif /* (defined(__GNUC__) || defined(__clang__)) && !defined(__MINGW32__) */
/* When testing, export everything to make it easier to implement tests. */
#if defined(BLOSC_TESTING)
#undef BLOSC_NO_EXPORT
#define BLOSC_NO_EXPORT BLOSC_EXPORT
#endif /* defined(BLOSC_TESTING) */
#endif /* BLOSC_BLOSC2_BLOSC2_EXPORT_H */

@ -0,0 +1,117 @@
/*********************************************************************
Blosc - Blocked Shuffling and Compression Library
Copyright (c) 2021 Blosc Development Team <blosc@blosc.org>
https://blosc.org
License: BSD 3-Clause (see LICENSE.txt)
See LICENSE.txt for details about copyright and rights to use.
**********************************************************************/
#ifndef BLOSC_BLOSC2_BLOSC2_STDIO_H
#define BLOSC_BLOSC2_BLOSC2_STDIO_H
#include "blosc2-export.h"
#if defined(_MSC_VER)
#include <io.h>
#else
#include <unistd.h>
#endif
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#if defined(_WIN32)
#include <windows.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
FILE *file;
} blosc2_stdio_file;
BLOSC_EXPORT void *blosc2_stdio_open(const char *urlpath, const char *mode, void* params);
BLOSC_EXPORT int blosc2_stdio_close(void *stream);
BLOSC_EXPORT int64_t blosc2_stdio_size(void *stream);
BLOSC_EXPORT int64_t blosc2_stdio_write(const void *ptr, int64_t size, int64_t nitems, int64_t position, void *stream);
BLOSC_EXPORT int64_t blosc2_stdio_read(void **ptr, int64_t size, int64_t nitems, int64_t position, void *stream);
BLOSC_EXPORT int blosc2_stdio_truncate(void *stream, int64_t size);
BLOSC_EXPORT int blosc2_stdio_destroy(void* params);
/**
* @brief Parameters for memory-mapped I/O. You can use the blosc2_schunk_open*_udio functions to memory-map existing
* schunk files from disk. To create a new schunk which is backed up by a memory-mapped file on disk, set the io member
* of the #blosc2_storage struct (see test_mmap for examples). Please note that memory-mapped I/O is only available for
* cframes and not sframes.
*/
typedef struct {
/* Arguments of the mapping */
const char* mode;
//!< The opening mode of the memory-mapped file (r, r+, w+ or c) similar to Numpy's np.memmap
//!< (https://numpy.org/doc/stable/reference/generated/numpy.memmap.html). Set to r if the file should only be read,
//!< r+ if you want to extend data to an existing file, w+ to create a new file and c to use an existing file as basis
//!< but keep all modifications in-memory. On Windows, the size of the mapping cannot change in the c mode.
int64_t initial_mapping_size;
//!< The initial size of the memory mapping used as a large enough write buffer for the r+, w+ and c modes (for
//!< Windows, only the r+ and w+ modes). On Windows, this will also be the size of the file while the file is opened.
//!< It will be truncated to the target size when the file is closed (e.g., when the schunk is destroyed).
bool needs_free;
//!< Indicates whether this object should be freed in the blosc2_destroy_cb callback (set to true if the
//!< blosc2_stdio_mmap struct was created on the heap).
/* Internal attributes of the mapping */
char* addr;
//!< The starting address of the mapping.
char* urlpath;
//!< The path to the file which is associated with this object.
int64_t file_size;
//!< The size of the file.
int64_t mapping_size;
//!< The size of the mapping (mapping_size >= file_size).
bool is_memory_only;
//!< Whether the mapping is only in-memory and changes are not reflected to the file on disk (c mode).
FILE* file;
//!< The underlying file handle.
int fd;
//!< The underlying file descriptor.
int64_t access_flags;
//!< The access attributes for the memory pages.
int64_t map_flags;
//!< The attributes of the mapping.
#if defined(_WIN32)
HANDLE mmap_handle;
//!< The Windows handle to the memory mapping.
#endif
} blosc2_stdio_mmap;
/**
* @brief Default struct for memory-mapped I/O for user initialization.
*/
static const blosc2_stdio_mmap BLOSC2_STDIO_MMAP_DEFAULTS = {
"r", (1 << 30), false, NULL, NULL, -1, -1, false, NULL, -1, -1, -1
#if defined(_WIN32)
, INVALID_HANDLE_VALUE
#endif
};
BLOSC_EXPORT void *blosc2_stdio_mmap_open(const char *urlpath, const char *mode, void* params);
BLOSC_EXPORT int blosc2_stdio_mmap_close(void *stream);
BLOSC_EXPORT int64_t blosc2_stdio_mmap_size(void *stream);
BLOSC_EXPORT int64_t blosc2_stdio_mmap_write(
const void *ptr, int64_t size, int64_t nitems, int64_t position, void *stream);
BLOSC_EXPORT int64_t blosc2_stdio_mmap_read(void **ptr, int64_t size, int64_t nitems, int64_t position, void *stream);
BLOSC_EXPORT int blosc2_stdio_mmap_truncate(void *stream, int64_t size);
BLOSC_EXPORT int blosc2_stdio_mmap_destroy(void* params);
#ifdef __cplusplus
}
#endif
#endif /* BLOSC_BLOSC2_BLOSC2_STDIO_H */

@ -0,0 +1,58 @@
/*********************************************************************
Blosc - Blocked Shuffling and Compression Library
Copyright (c) 2021 Blosc Development Team <blosc@blosc.org>
https://blosc.org
License: BSD 3-Clause (see LICENSE.txt)
See LICENSE.txt for details about copyright and rights to use.
**********************************************************************/
#ifndef BLOSC_BLOSC2_CODECS_REGISTRY_H
#define BLOSC_BLOSC2_CODECS_REGISTRY_H
#ifdef __cplusplus
extern "C" {
#endif
enum {
BLOSC_CODEC_NDLZ = 32,
//!< Simple Lempel-Ziv compressor for NDim data. Experimental, mainly for teaching purposes.
BLOSC_CODEC_ZFP_FIXED_ACCURACY = 33,
//!< ZFP compressor for fixed accuracy mode. The desired accuracy is set in `compcode_meta`.
//!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/codecs/zfp/README.md
BLOSC_CODEC_ZFP_FIXED_PRECISION = 34,
//!< ZFP compressor for fixed precision. The desired precision is set in `compcode_meta`.
//!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/codecs/zfp/README.md
BLOSC_CODEC_ZFP_FIXED_RATE = 35,
//!< ZFP compressor for fixed precision. The desired rate is set in `compcode_meta`.
//!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/codecs/zfp/README.md
BLOSC_CODEC_OPENHTJ2K = 36,
//!< OpenHTJ2K compressor for JPEG 2000 HT.
//!< See https://github.com/Blosc/blosc2_openhtj2k
BLOSC_CODEC_GROK = 37,
//!< Grok compressor for JPEG 2000.
//!< See https://github.com/Blosc/blosc2_grok
BLOSC_CODEC_OPENZL = 38,
//!< OpenZL metacompressor.
//!< See https://github.com/Blosc/blosc2_openzl
};
void register_codecs(void);
// For dynamically loaded codecs
typedef struct {
char *encoder;
char *decoder;
} codec_info;
// If ever add .free func for codecs, may be needed
// typedef struct {
// char *free;
// } codecparams_info;
#ifdef __cplusplus
}
#endif
#endif /* BLOSC_BLOSC2_CODECS_REGISTRY_H */

@ -0,0 +1,49 @@
/*********************************************************************
Blosc - Blocked Shuffling and Compression Library
Copyright (c) 2021 Blosc Development Team <blosc@blosc.org>
https://blosc.org
License: BSD 3-Clause (see LICENSE.txt)
See LICENSE.txt for details about copyright and rights to use.
**********************************************************************/
#ifndef BLOSC_BLOSC2_FILTERS_REGISTRY_H
#define BLOSC_BLOSC2_FILTERS_REGISTRY_H
#ifdef __cplusplus
extern "C" {
#endif
enum {
BLOSC_FILTER_NDCELL = 32,
//!< Simple filter for grouping NDim cell data together.
//!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/filters/ndcell/README.md
BLOSC_FILTER_NDMEAN = 33,
//!< Simple filter for replacing content of a NDim cell with its mean value.
//!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/filters/ndmean/README.md
BLOSC_FILTER_BYTEDELTA_BUGGY = 34,
// buggy version. See #524
BLOSC_FILTER_BYTEDELTA = 35,
//!< Byte-wise delta. Assumes M streams of bytes of length N, where M is the typesize (specified by `filters_meta`).
//!< Should be used in combination with @ref BLOSC_SHUFFLE or @ref BLOSC_BITSHUFFLE.
//!< See https://www.blosc.org/posts/bytedelta-enhance-compression-toolset/
BLOSC_FILTER_INT_TRUNC = 36,
//!< Truncate int precision; positive values in `filters_meta` slot will keep bits;
//!< negative values will remove (set to zero) bits.
//!< This is similar to @ref BLOSC_TRUNC_PREC, but for integers instead of floating point data.
};
void register_filters(void);
// For dynamically loaded filters
typedef struct {
char *forward;
char *backward;
} filter_info;
#ifdef __cplusplus
}
#endif
#endif /* BLOSC_BLOSC2_FILTERS_REGISTRY_H */

@ -0,0 +1,37 @@
/*********************************************************************
Blosc - Blocked Shuffling and Compression Library
Copyright (C) 2021 The Blosc Developers <blosc@blosc.org>
https://blosc.org
License: BSD 3-Clause (see LICENSE.txt)
See LICENSE.txt for details about copyright and rights to use.
**********************************************************************/
#ifndef BLOSC_BLOSC2_TUNERS_REGISTRY_H
#define BLOSC_BLOSC2_TUNERS_REGISTRY_H
#ifdef __cplusplus
extern "C" {
#endif
enum {
BLOSC_BTUNE = 32,
};
void register_tuners(void);
// For dynamically loaded tuners
typedef struct {
char *init;
char *next_blocksize;
char *next_cparams;
char *update;
char *free;
} tuner_info;
#ifdef __cplusplus
}
#endif
#endif /* BLOSC_BLOSC2_TUNERS_REGISTRY_H */

@ -0,0 +1,64 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
import io
import pprint
from textwrap import TextWrapper
def info_text_report_(items: list) -> str:
with io.StringIO() as buf:
print(items, file=buf)
return buf.getvalue()
def info_text_report(items: list) -> str:
keys = [k for k, v in items]
max_key_len = max(len(k) for k in keys)
report = ""
for k, v in items:
if isinstance(v, dict):
# rich way, this is disabled because it doesn't work well in the notebooks
# with io.StringIO() as buf:
# v_sorted = {k: val for k, val in sorted(v.items())}
# rich.print(v_sorted, file=buf)
# str_v = buf.getvalue()[:-1] # remove the trailing \n
# text = k.ljust(max_key_len) + " : " + str_v
# pprint way
text = k.ljust(max_key_len) + " : " + pprint.pformat(v)
else:
wrapper = TextWrapper(
width=96,
initial_indent=k.ljust(max_key_len) + " : ",
subsequent_indent=" " * max_key_len + " : ",
)
text = wrapper.fill(str(v))
report += text + "\n"
return report
def info_html_report(items: list) -> str:
report = '<table class="NDArray-info">'
report += "<tbody>"
for k, v in items:
report += f'<tr><th style="text-align: left">{k}</th><td style="text-align: left">{v}</td></tr>'
report += "</tbody>"
report += "</table>"
return report
class InfoReporter:
def __init__(self, obj):
self.obj = obj
def __repr__(self):
items = self.obj.info_items
return info_text_report(items)
def _repr_html_(self):
items = self.obj.info_items
return info_html_report(items)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,133 @@
# only add PUBLIC dependencies as well
# https://cmake.org/cmake/help/latest/manual/cmake-packages.7.html#creating-a-package-configuration-file
include(CMakeFindDependencyMacro)
# Search in <PackageName>_ROOT:
# https://cmake.org/cmake/help/v3.12/policy/CMP0074.html
if(POLICY CMP0074)
cmake_policy(SET CMP0074 NEW)
endif()
# locate the installed FindABC.cmake modules
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/Modules")
# this section stores which configuration options were set
set(HAVE_THREADS ON)
set(HAVE_IPP )
set(HAVE_LZ4_CONFIG )
set(HAVE_ZLIB_NG TRUE)
set(HAVE_ZLIB_NG_CONFIG )
set(HAVE_ZSTD_CONFIG )
set(DEACTIVATE_IPP ON)
set(DEACTIVATE_ZLIB OFF)
set(DEACTIVATE_ZSTD OFF)
set(PREFER_EXTERNAL_LZ4 OFF)
set(PREFER_EXTERNAL_ZLIB OFF)
set(PREFER_EXTERNAL_ZSTD OFF)
# find dependencies and their targets, which are used in our Blosc2Targets.cmake
# additionally, the Blosc2_..._FOUND variables are used to support
# find_package(Blosc2 ... COMPONENTS ... ...)
# this enables downstream projects to express the need for specific features.
set(CMAKE_THREAD_PREFER_PTHREAD TRUE) # pre 3.1
set(THREADS_PREFER_PTHREAD_FLAG TRUE) # CMake 3.1+
if(HAVE_THREADS)
find_dependency(Threads)
set(Blosc2_THREADS_FOUND TRUE)
else()
set(Blosc2_THREADS_FOUND FALSE)
endif()
if(NOT DEACTIVATE_IPP AND HAVE_IPP)
find_dependency(IPP)
set(Blosc2_IPP_FOUND FALSE)
else()
set(Blosc2_IPP_FOUND TRUE)
endif()
if(PREFER_EXTERNAL_LZ4 AND HAVE_LZ4_CONFIG)
find_dependency(lz4 CONFIG)
endif()
set(Blosc2_LZ4_FOUND TRUE)
if(DEACTIVATE_ZLIB)
set(Blosc2_ZLIB_FOUND FALSE)
elseif(NOT DEACTIVATE_ZLIB AND PREFER_EXTERNAL_ZLIB)
if(HAVE_ZLIB_NG)
if (HAVE_ZLIB_NG_CONFIG)
find_dependency(zlib-ng CONFIG)
endif()
else()
find_dependency(ZLIB)
endif()
set(Blosc2_ZLIB_FOUND TRUE)
endif()
if(DEACTIVATE_ZSTD)
set(Blosc2_ZSTD_FOUND FALSE)
elseif(NOT DEACTIVATE_ZSTD AND PREFER_EXTERNAL_ZSTD)
if(HAVE_ZSTD_CONFIG)
find_dependency(zstd CONFIG)
endif()
set(Blosc2_ZSTD_FOUND TRUE)
endif()
# define central Blosc2::blosc2_shared/static targets
include("${CMAKE_CURRENT_LIST_DIR}/Blosc2Targets.cmake")
# check if components are fulfilled and set Blosc2_<COMPONENT>_FOUND vars
# Blosc2_FIND_COMPONENTS is a list set by find_package(... COMPONENTS ... ...)
# likewise Blosc2_FIND_REQUIRED_... per component specified
foreach(comp ${Blosc2_FIND_COMPONENTS})
if(NOT Blosc2_${comp}_FOUND)
if(Blosc2_FIND_REQUIRED_${comp})
set(Blosc2_FOUND FALSE)
endif()
endif()
endforeach()
# Defines imported targets for Blosc2 inside a Python wheel
# ------------------------------
# Shared library target
# ------------------------------
if(NOT TARGET Blosc2::blosc2_shared)
add_library(Blosc2::blosc2_shared SHARED IMPORTED GLOBAL)
if(WIN32)
# MSVC: import library (.lib) + runtime DLL (.dll)
set_target_properties(Blosc2::blosc2_shared PROPERTIES
IMPORTED_IMPLIB "${CMAKE_CURRENT_LIST_DIR}/../blosc2_shared.lib"
IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_shared.dll"
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include"
)
else()
# Linux/macOS
set_target_properties(Blosc2::blosc2_shared PROPERTIES
IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_shared.so"
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include"
)
endif()
endif()
# ------------------------------
# Static library target
# ------------------------------
if(NOT TARGET Blosc2::blosc2_static)
add_library(Blosc2::blosc2_static STATIC IMPORTED GLOBAL)
if(MSVC)
# Windows static library uses .lib
set_target_properties(Blosc2::blosc2_static PROPERTIES
IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_static.lib"
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include"
)
else()
# Linux/macOS static library uses .a
set_target_properties(Blosc2::blosc2_static PROPERTIES
IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_static.a"
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include"
)
endif()
endif()

@ -0,0 +1,65 @@
# This is a basic version file for the Config-mode of find_package().
# It is used by write_basic_package_version_file() as input file for configure_file()
# to create a version-file which can be installed along a config.cmake file.
#
# The created file sets PACKAGE_VERSION_EXACT if the current version string and
# the requested version string are exactly the same and it sets
# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version,
# but only if the requested major version is the same as the current one.
# The variable CVF_VERSION must be set before calling configure_file().
set(PACKAGE_VERSION "2.23.1")
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)
set(PACKAGE_VERSION_COMPATIBLE FALSE)
else()
if("2.23.1" MATCHES "^([0-9]+)\\.")
set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}")
if(NOT CVF_VERSION_MAJOR VERSION_EQUAL 0)
string(REGEX REPLACE "^0+" "" CVF_VERSION_MAJOR "${CVF_VERSION_MAJOR}")
endif()
else()
set(CVF_VERSION_MAJOR "2.23.1")
endif()
if(PACKAGE_FIND_VERSION_RANGE)
# both endpoints of the range must have the expected major version
math (EXPR CVF_VERSION_MAJOR_NEXT "${CVF_VERSION_MAJOR} + 1")
if (NOT PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR
OR ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX_MAJOR STREQUAL CVF_VERSION_MAJOR)
OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX VERSION_LESS_EQUAL CVF_VERSION_MAJOR_NEXT)))
set(PACKAGE_VERSION_COMPATIBLE FALSE)
elseif(PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR
AND ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND PACKAGE_VERSION VERSION_LESS_EQUAL PACKAGE_FIND_VERSION_MAX)
OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION_MAX)))
set(PACKAGE_VERSION_COMPATIBLE TRUE)
else()
set(PACKAGE_VERSION_COMPATIBLE FALSE)
endif()
else()
if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR)
set(PACKAGE_VERSION_COMPATIBLE TRUE)
else()
set(PACKAGE_VERSION_COMPATIBLE FALSE)
endif()
if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)
set(PACKAGE_VERSION_EXACT TRUE)
endif()
endif()
endif()
# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it:
if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "8" STREQUAL "")
return()
endif()
# check that the installed version has the same 32/64bit-ness as the one which is currently searching:
if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "8")
math(EXPR installedBits "8 * 8")
set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)")
set(PACKAGE_VERSION_UNSUITABLE TRUE)
endif()

@ -0,0 +1,29 @@
#----------------------------------------------------------------
# Generated CMake target import file for configuration "Release".
#----------------------------------------------------------------
# Commands may need to know the format version.
set(CMAKE_IMPORT_FILE_VERSION 1)
# Import target "Blosc2::blosc2_shared" for configuration "Release"
set_property(TARGET Blosc2::blosc2_shared APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(Blosc2::blosc2_shared PROPERTIES
IMPORTED_IMPLIB_RELEASE "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/blosc2.lib"
IMPORTED_LOCATION_RELEASE "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/libblosc2.dll"
)
list(APPEND _cmake_import_check_targets Blosc2::blosc2_shared )
list(APPEND _cmake_import_check_files_for_Blosc2::blosc2_shared "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/blosc2.lib" "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/libblosc2.dll" )
# Import target "Blosc2::blosc2_static" for configuration "Release"
set_property(TARGET Blosc2::blosc2_static APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(Blosc2::blosc2_static PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
IMPORTED_LOCATION_RELEASE "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/libblosc2.lib"
)
list(APPEND _cmake_import_check_targets Blosc2::blosc2_static )
list(APPEND _cmake_import_check_files_for_Blosc2::blosc2_static "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/libblosc2.lib" )
# Commands beyond this point should not need to know the version.
set(CMAKE_IMPORT_FILE_VERSION)

@ -0,0 +1,116 @@
# Generated by CMake
if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8)
message(FATAL_ERROR "CMake >= 3.0.0 required")
endif()
if(CMAKE_VERSION VERSION_LESS "3.0.0")
message(FATAL_ERROR "CMake >= 3.0.0 required")
endif()
cmake_policy(PUSH)
cmake_policy(VERSION 3.0.0...3.29)
#----------------------------------------------------------------
# Generated CMake target import file.
#----------------------------------------------------------------
# Commands may need to know the format version.
set(CMAKE_IMPORT_FILE_VERSION 1)
# Protect against multiple inclusion, which would fail when already imported targets are added once more.
set(_cmake_targets_defined "")
set(_cmake_targets_not_defined "")
set(_cmake_expected_targets "")
foreach(_cmake_expected_target IN ITEMS Blosc2::blosc2_shared Blosc2::blosc2_static Blosc2::blosc2)
list(APPEND _cmake_expected_targets "${_cmake_expected_target}")
if(TARGET "${_cmake_expected_target}")
list(APPEND _cmake_targets_defined "${_cmake_expected_target}")
else()
list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}")
endif()
endforeach()
unset(_cmake_expected_target)
if(_cmake_targets_defined STREQUAL _cmake_expected_targets)
unset(_cmake_targets_defined)
unset(_cmake_targets_not_defined)
unset(_cmake_expected_targets)
unset(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)
return()
endif()
if(NOT _cmake_targets_defined STREQUAL "")
string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}")
string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}")
message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n")
endif()
unset(_cmake_targets_defined)
unset(_cmake_targets_not_defined)
unset(_cmake_expected_targets)
# The installation prefix configured by this project.
set(_IMPORT_PREFIX "C:/Users/runneradmin/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib")
# Create imported target Blosc2::blosc2_shared
add_library(Blosc2::blosc2_shared SHARED IMPORTED)
set_target_properties(Blosc2::blosc2_shared PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/include"
)
# Create imported target Blosc2::blosc2_static
add_library(Blosc2::blosc2_static STATIC IMPORTED)
set_target_properties(Blosc2::blosc2_static PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/include"
INTERFACE_LINK_LIBRARIES "\$<LINK_ONLY:Threads::Threads>"
)
# Create imported target Blosc2::blosc2
add_library(Blosc2::blosc2 INTERFACE IMPORTED)
set_target_properties(Blosc2::blosc2 PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/include"
INTERFACE_LINK_LIBRARIES "Blosc2::blosc2_static"
)
# Load information for each installed configuration.
file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/Blosc2Targets-*.cmake")
foreach(_cmake_config_file IN LISTS _cmake_config_files)
include("${_cmake_config_file}")
endforeach()
unset(_cmake_config_file)
unset(_cmake_config_files)
# Cleanup temporary variables.
set(_IMPORT_PREFIX)
# Loop over all imported files and verify that they actually exist
foreach(_cmake_target IN LISTS _cmake_import_check_targets)
if(CMAKE_VERSION VERSION_LESS "3.28"
OR NOT DEFINED _cmake_import_check_xcframework_for_${_cmake_target}
OR NOT IS_DIRECTORY "${_cmake_import_check_xcframework_for_${_cmake_target}}")
foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}")
if(NOT EXISTS "${_cmake_file}")
message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file
\"${_cmake_file}\"
but this file does not exist. Possible reasons include:
* The file was deleted, renamed, or moved to another location.
* An install or uninstall procedure did not complete successfully.
* The installation package was faulty and contained
\"${CMAKE_CURRENT_LIST_FILE}\"
but not all the files it references.
")
endif()
endforeach()
endif()
unset(_cmake_file)
unset("_cmake_import_check_files_for_${_cmake_target}")
endforeach()
unset(_cmake_target)
unset(_cmake_import_check_targets)
# This file does not depend on other imported targets which have
# been exported from the same project but in a separate export set.
# Commands beyond this point should not need to know the version.
set(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)

@ -0,0 +1,74 @@
# Find the Intel IPP (Integrated Performance Primitives)
#
# IPP_FOUND - System has IPP
# IPP_INCLUDE_DIRS - IPP include files directories
# IPP_LIBRARIES - The IPP libraries
#
# The environment variable IPPROOT is used to find the installation location.
# If the environment variable is not set we'll look for it in the default installation locations.
#
# Usage:
#
# find_package(IPP)
# if(IPP_FOUND)
# target_link_libraries(TARGET ${IPP_LIBRARIES})
# endif()
find_path(IPP_ROOT_DIR
include/ipp.h
PATHS
$ENV{IPPROOT}
/opt/intel/compilers_and_libraries/linux/ipp
/opt/intel/compilers_and_libraries/mac/ipp
"C:/IntelSWTools/compilers_and_libraries/windows/ipp/"
"C:/Program Files (x86)/IntelSWTools/compilers_and_libraries/windows/ipp"
$ENV{HOME}/intel/ipp
$ENV{HOME}/miniconda3
$ENV{USERPROFILE}/miniconda3/Library
"C:/Miniconda37-x64/Library" # Making AppVeyor happy
)
find_path(IPP_INCLUDE_DIR
ipp.h
PATHS
${IPP_ROOT_DIR}/include
)
if(WIN32)
set(IPP_SEARCH_LIB ippcoremt.lib)
set(IPP_LIBS ippcoremt.lib ippsmt.lib ippdcmt.lib)
elseif(APPLE)
set(IPP_SEARCH_LIB libippcore.a)
set(IPP_LIBS libipps.a libippdc.a libippcore.a)
else() # Linux
set(IPP_SEARCH_LIB libippcore.so)
set(IPP_LIBS ipps ippdc ippcore)
endif()
find_path(IPP_LIB_SEARCHPATH
${IPP_SEARCH_LIB}
PATHS
${IPP_ROOT_DIR}/lib/intel64
${IPP_ROOT_DIR}/lib
)
foreach(LIB ${IPP_LIBS})
find_library(${LIB}_PATH ${LIB} PATHS ${IPP_LIB_SEARCHPATH})
if(${LIB}_PATH)
set(IPP_LIBRARIES ${IPP_LIBRARIES} ${${LIB}_PATH})
set(IPP_FOUND TRUE)
else()
# message(STATUS "Could not find ${LIB}: disabling IPP")
set(IPP_NOTFOUND TRUE)
endif()
endforeach()
if(IPP_FOUND AND NOT IPP_NOTFOUND)
set(IPP_INCLUDE_DIRS ${IPP_INCLUDE_DIR})
include_directories(${IPP_INCLUDE_DIRS})
message(STATUS "Found IPP libraries in: ${IPP_LIBRARIES}")
else()
message(STATUS "No IPP libraries found.")
set(IPP_FOUND FALSE)
endif()

@ -0,0 +1,10 @@
find_path(LZ4_INCLUDE_DIR lz4.h)
find_library(LZ4_LIBRARY NAMES lz4 liblz4)
if(LZ4_INCLUDE_DIR AND LZ4_LIBRARY)
set(LZ4_FOUND TRUE)
message(STATUS "Found LZ4 library: ${LZ4_LIBRARY}")
else()
message(STATUS "No LZ4 library found. Using internal sources.")
endif()

@ -0,0 +1,58 @@
# Check if SSE/AVX instructions are available on the machine where
# the project is compiled.
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
exec_program(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
string(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE "${CPUINFO}")
string(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE)
if(SSE2_TRUE)
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
else()
set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
endif()
string(REGEX REPLACE "^.*(avx2).*$" "\\1" SSE_THERE "${CPUINFO}")
string(COMPARE EQUAL "avx2" "${SSE_THERE}" AVX2_TRUE)
if(AVX2_TRUE)
set(AVX2_FOUND true CACHE BOOL "AVX2 available on host")
else()
set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
endif()
elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin")
exec_program("/usr/sbin/sysctl -a | grep machdep.cpu.features" OUTPUT_VARIABLE CPUINFO)
string(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE "${CPUINFO}")
string(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE)
if(SSE2_TRUE)
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
else()
set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
endif()
exec_program("/usr/sbin/sysctl -a | grep machdep.cpu.leaf7_features" OUTPUT_VARIABLE CPUINFO)
string(REGEX REPLACE "^.*(AVX2).*$" "\\1" SSE_THERE "${CPUINFO}")
string(COMPARE EQUAL "AVX2" "${SSE_THERE}" AVX2_TRUE)
if(AVX2_TRUE)
set(AVX2_FOUND true CACHE BOOL "AVX2 available on host")
else()
set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
endif()
elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
# TODO. For now supposing SSE2 is safe enough
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
else()
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
endif()
if(NOT SSE2_FOUND)
message(STATUS "Could not find hardware support for SSE2 on this machine.")
endif()
if(NOT AVX2_FOUND)
message(STATUS "Could not find hardware support for AVX2 on this machine.")
endif()
mark_as_advanced(SSE2_FOUND AVX2_FOUND)

@ -0,0 +1,54 @@
find_path(ZLIB_NG_INCLUDE_DIR NAMES zlib-ng.h)
if(ZLIB_INCLUDE_DIRS)
set(ZLIB_NG_LIBRARY_DIRS ${ZLIB_NG_INCLUDE_DIR})
if("${ZLIB_NG_LIBRARY_DIRS}" MATCHES "/include$")
# Strip off the trailing "/include" in the path.
GET_FILENAME_COMPONENT(ZLIB_NG_LIBRARY_DIRS ${ZLIB_NG_LIBRARY_DIRS} PATH)
endif("${ZLIB_NG_LIBRARY_DIRS}" MATCHES "/include$")
if(EXISTS "${ZLIB_NG_LIBRARY_DIRS}/lib")
set(ZLIB_NG_LIBRARY_DIRS ${ZLIB_NG_LIBRARY_DIRS}/lib)
endif(EXISTS "${ZLIB_NG_LIBRARY_DIRS}/lib")
endif()
find_library(ZLIB_NG_LIBRARY NAMES z-ng libz-ng zlib-ng libz-ng.a)
set(ZLIB_NG_LIBRARIES ${ZLIB_NG_LIBRARY})
set(ZLIB_NG_INCLUDE_DIR ${ZLIB_NG_INCLUDE_DIR})
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(ZLIB_NG DEFAULT_MSG ZLIB_NG_LIBRARY ZLIB_NG_INCLUDE_DIR)
if(ZLIB_NG_INCLUDE_DIR AND ZLIB_NG_LIBRARIES)
set(ZLIB_NG_FOUND TRUE)
else(ZLIB_NG_INCLUDE_DIR AND ZLIB_NG_LIBRARIES)
set(ZLIB_NG_FOUND FALSE)
endif(ZLIB_NG_INCLUDE_DIR AND ZLIB_NG_LIBRARIES)
if(ZLIB_NG_FOUND)
message(STATUS "Found zlib-ng: ${ZLIB_NG_LIBRARIES}, ${ZLIB_NG_INCLUDE_DIR}")
endif()
#[[
Copyright https://github.com/zlib-ng/minizip-ng, 2021
Condition of use and distribution are the same as zlib:
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgement in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
]]#

@ -0,0 +1,8 @@
find_path(ZSTD_INCLUDE_DIR zstd.h)
find_library(ZSTD_LIBRARY NAMES zstd)
if(ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY)
set(ZSTD_FOUND TRUE)
message(STATUS "Found ZSTD library: ${ZSTD_LIBRARY}")
endif()

@ -0,0 +1,26 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(CMAKE_SYSTEM_VERSION 1)
message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}")
set(CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
set(CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
set(CMAKE_CROSSCOMPILING TRUE)
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-aarch64 -L /usr/${CMAKE_C_COMPILER_TARGET}/)
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc)
if(NOT C_COMPILER_FULL_PATH)
message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found")
endif()
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++)
if(CXX_COMPILER_FULL_PATH)
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
endif()

@ -0,0 +1,27 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR arm)
set(CMAKE_SYSTEM_VERSION 1)
message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}")
set(CMAKE_C_COMPILER_TARGET arm-linux-gnueabihf)
set(CMAKE_CXX_COMPILER_TARGET arm-linux-gnueabihf)
set(CMAKE_CROSSCOMPILING TRUE)
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-arm -L /usr/${CMAKE_C_COMPILER_TARGET}/)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc)
if(NOT C_COMPILER_FULL_PATH)
message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found")
endif()
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++)
if(CXX_COMPILER_FULL_PATH)
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
endif()

@ -0,0 +1,31 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR arm)
set(CMAKE_SYSTEM_VERSION 1)
message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}")
if(NOT DEFINED CMAKE_C_COMPILER_TARGET)
set(CMAKE_C_COMPILER_TARGET arm-linux-gnueabi)
endif()
if(NOT DEFINED CMAKE_CXX_COMPILER_TARGET)
set(CMAKE_CXX_COMPILER_TARGET arm-linux-gnueabi)
endif()
set(CMAKE_CROSSCOMPILING TRUE)
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-arm -L /usr/${CMAKE_C_COMPILER_TARGET}/)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc)
if(NOT C_COMPILER_FULL_PATH)
message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found")
endif()
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++)
if(CXX_COMPILER_FULL_PATH)
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
endif()

@ -0,0 +1,11 @@
libdir=C:/Users/runneradmin/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib
includedir=C:/Users/runneradmin/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/include
Name: blosc2
Description: A blocking, shuffling and lossless compression library
URL: https://blosc.org/
Version: 2.23.1
Requires:
Libs: -L${libdir} -lblosc2
Cflags: -I${includedir}

@ -0,0 +1,822 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
from __future__ import annotations
import builtins
import math
import warnings
from itertools import product
from typing import TYPE_CHECKING, Any
import numpy as np
import blosc2
from .utils import get_intersecting_chunks, nptranspose, npvecdot, slice_to_chunktuple
if TYPE_CHECKING:
from collections.abc import Sequence
def matmul(x1: blosc2.Array, x2: blosc2.NDArray, **kwargs: Any) -> blosc2.NDArray:
"""
Computes the matrix product between two Blosc2 NDArrays.
Parameters
----------
x1: :ref:`NDArray` | np.ndarray
The first input array.
x2: :ref:`NDArray` | np.ndarray
The second input array.
kwargs: Any, optional
Keyword arguments that are supported by the :func:`empty` constructor.
Returns
-------
out: :ref:`NDArray`
The matrix product of the inputs. This is a scalar only when both x1,
x2 are 1-d vectors.
Raises
------
ValueError
If the last dimension of ``x1`` is not the same size as
the second-to-last dimension of ``x2``.
If a scalar value is passed in.
References
----------
`numpy.matmul <https://numpy.org/doc/stable/reference/generated/numpy.matmul.html>`_
Examples
--------
For 2-D arrays it is the matrix product:
>>> import numpy as np
>>> import blosc2
>>> a = np.array([[1, 2],
... [3, 4]])
>>> nd_a = blosc2.asarray(a)
>>> b = np.array([[2, 3],
... [2, 1]])
>>> nd_b = blosc2.asarray(b)
>>> blosc2.matmul(nd_a, nd_b)
array([[ 6, 5],
[14, 13]])
For 2-D mixed with 1-D, the result is the usual.
>>> a = np.array([[1, 3],
... [0, 1]])
>>> nd_a = blosc2.asarray(a)
>>> v = np.array([1, 2])
>>> nd_v = blosc2.asarray(v)
>>> blosc2.matmul(nd_a, nd_v)
array([7, 2])
>>> blosc2.matmul(nd_v, nd_a)
array([1, 5])
"""
# Validate arguments are not scalars
if np.isscalar(x1) or np.isscalar(x2):
raise ValueError("Arguments can't be scalars.")
# Makes a SimpleProxy if inputs are not blosc2 arrays
x1, x2 = blosc2.as_simpleproxy(x1, x2)
# Validate matrix multiplication compatibility
if x1.shape[builtins.max(-1, -len(x2.shape))] != x2.shape[builtins.max(-2, -len(x2.shape))]:
raise ValueError("Shapes are not aligned for matrix multiplication.")
# Promote 1D arrays to 2D if necessary
x1_is_vector = False
x2_is_vector = False
if x1.ndim == 1:
x1 = blosc2.expand_dims(x1, axis=0) # (N,) -> (1, N)
x1_is_vector = True
if x2.ndim == 1:
x2 = blosc2.expand_dims(x2, axis=1) # (M,) -> (M, 1)
x2_is_vector = True
n, k = x1.shape[-2:]
m = x2.shape[-1]
result_shape = np.broadcast_shapes(x1.shape[:-2], x2.shape[:-2]) + (n, m)
# For matmul, we don't want to reduce the chunksize, as experiments show that
# the larger, the better (as long as some limits are not exceeded).
kwargs["_chunksize_reduc_factor"] = 1
result = blosc2.zeros(result_shape, dtype=blosc2.result_type(x1, x2), **kwargs)
if 0 not in result.shape + x1.shape + x2.shape: # if any array is empty, return array of 0s
p, q = result.chunks[-2:]
r = x2.chunks[-1]
intersecting_chunks = get_intersecting_chunks((), result.shape[:-2], result.chunks[:-2])
for chunk in intersecting_chunks:
chunk = chunk.raw
for row in range(0, n, p):
row_end = builtins.min(row + p, n)
for col in range(0, m, q):
col_end = builtins.min(col + q, m)
for aux in range(0, k, r):
aux_end = builtins.min(aux + r, k)
bx1 = (
x1[chunk[-x1.ndim + 2 :] + (slice(row, row_end), slice(aux, aux_end))]
if x1.ndim > 2
else x1[row:row_end, aux:aux_end]
)
bx2 = (
x2[chunk[-x2.ndim + 2 :] + (slice(aux, aux_end), slice(col, col_end))]
if x2.ndim > 2
else x2[aux:aux_end, col:col_end]
)
result[chunk + (slice(row, row_end), slice(col, col_end))] += np.matmul(bx1, bx2)
if x1_is_vector:
result = result.squeeze(axis=-2)
if x2_is_vector:
result = result.squeeze(axis=-1)
return result
def tensordot(
x1: blosc2.NDArray,
x2: blosc2.NDArray,
axes: int | tuple[Sequence[int], Sequence[int]] = 2,
**kwargs: Any,
) -> blosc2.NDArray:
"""
Returns a tensor contraction of x1 and x2 over specific axes. The tensordot function corresponds to the
generalized matrix product. Note: Neither argument is complex-conjugated or transposed. If conjugation and/or transposition is desired, these operations should be explicitly
performed prior to computing the generalized matrix product.
Parameters
----------
x1: blosc2.NDArray
First input array. Should have a numeric data type.
x2: blosc2.NDArray
Second input array. Should have a numeric data type. Corresponding contracted axes of x1 and x2
must be equal.
axes: int | tuple[Sequence[int], Sequence[int]]
Number of axes (dimensions) to contract or explicit sequences of axis (dimension) indices for x1 and x2,
respectively.
* If axes is an int equal to N, then contraction is performed over the last N axes of x1 and the first N axes of x2 in order. The size of each corresponding axis (dimension) must match. Must be nonnegative.
* If N equals 0, the result is the tensor (outer) product.
* If N equals 1, the result is the tensor dot product.
* If N equals 2, the result is the tensor double contraction (default).
* If axes is a tuple of two sequences (x1_axes, x2_axes), the first sequence applies to x1 and the second sequence to x2.
Both sequences must have the same length. Each axis (dimension) x1_axes[i] for x1 must have the same size as the respective
axis (dimension) x2_axes[i] for x2. Each index referred to in a sequence must be unique. If x1 has rank (i.e, number of dimensions) N,
a valid x1 axis must reside on the half-open interval [-N, N). If x2 has rank M, a valid x2 axis must reside on the half-open interval [-M, M).
kwargs: Any, optional
Keyword arguments that are supported by the :func:`empty` constructor.
Returns
-------
out: blosc2.NDArray
An array containing the tensor contraction whose shape consists of the non-contracted axes (dimensions) of the first array x1, followed by
the non-contracted axes (dimensions) of the second array x2.
"""
fast_path = kwargs.pop("fast_path", None) # for testing purposes
# TODO: add fast path for when don't need to change chunkshapes
# Makes a SimpleProxy if inputs are not blosc2 arrays
x1, x2 = blosc2.as_simpleproxy(x1, x2)
if isinstance(axes, tuple):
a_axes, b_axes = axes
a_axes = list(a_axes)
b_axes = list(b_axes)
if len(a_axes) != len(b_axes):
raise ValueError("Lengths of reduction axes for x1 and x2 must be equal!")
# need to track order of b_axes; later we cycle through a_axes sorted for op_chunk
# a_sorted[inv_sort][b_sort] matches b_sorted since b_axes matches a_axes
inv_sort = np.argsort(np.argsort(a_axes))
b_sort = np.argsort(b_axes)
order = inv_sort[b_sort]
a_keep, b_keep = [True] * x1.ndim, [True] * x2.ndim
for i, j in zip(a_axes, b_axes, strict=False):
i = x1.ndim + i if i < 0 else i
j = x2.ndim + j if j < 0 else j
a_keep[i] = False
b_keep[j] = False
a_axes = [] if a_axes == () else a_axes # handle no reduction
b_axes = [] if b_axes == () else b_axes # handle no reduction
elif isinstance(axes, int):
if axes < 0:
raise ValueError("Integer axes argument must be nonnegative!")
order = np.arange(axes, dtype=int) # no reordering required
a_axes = list(range(x1.ndim - axes, x1.ndim))
b_axes = list(range(0, axes))
a_keep = [i + axes < x1.ndim for i in range(x1.ndim)]
b_keep = [i >= axes for i in range(x2.ndim)]
else:
raise ValueError("Axes argument must be two element tuple of sequences or an integer.")
x1shape = np.array(x1.shape)
x2shape = np.array(x2.shape)
a_chunks_red = tuple(c for i, c in enumerate(x1.chunks) if not a_keep[i])
a_shape_red = tuple(c for i, c in enumerate(x1.shape) if not a_keep[i])
if np.any(x1shape[a_axes] != x2shape[b_axes]):
raise ValueError("x1 and x2 must have same shapes along reduction dimensions")
result_shape = tuple(x1shape[a_keep]) + tuple(x2shape[b_keep])
result = blosc2.zeros(result_shape, dtype=blosc2.result_type(x1, x2), **kwargs)
op_chunks = [
slice_to_chunktuple(slice(0, s, 1), c) for s, c in zip(x1shape[a_axes], a_chunks_red, strict=True)
]
res_chunks = [
slice_to_chunktuple(s, c)
for s, c in zip([slice(0, r, 1) for r in result.shape], result.chunks, strict=True)
]
a_selection = (slice(None, None, 1),) * x1.ndim
b_selection = (slice(None, None, 1),) * x2.ndim
chunk_memory = np.prod(result.chunks) * (
np.prod(x1shape[a_axes]) * x1.dtype.itemsize + np.prod(x2shape[b_axes]) * x2.dtype.itemsize
)
if chunk_memory < blosc2.MAX_FAST_PATH_SIZE:
fast_path = True if fast_path is None else fast_path
fast_path = False if fast_path is None else fast_path # fast_path set via kwargs for testing
# adapted from numpy.tensordot
a_keep_axes = [i for i, k in enumerate(a_keep) if k]
b_keep_axes = [i for i, k in enumerate(b_keep) if k]
newaxes_a = a_keep_axes + a_axes
newaxes_b = b_axes + b_keep_axes
for rchunk in product(*res_chunks):
res_chunk = tuple(
slice(rc * rcs, builtins.min((rc + 1) * rcs, rshape), 1)
for rc, rcs, rshape in zip(rchunk, result.chunks, result.shape, strict=True)
)
rchunk_iter = iter(res_chunk)
a_selection = tuple(next(rchunk_iter) if a else slice(None, None, 1) for a in a_keep)
b_selection = tuple(next(rchunk_iter) if b else slice(None, None, 1) for b in b_keep)
res_chunks = tuple(s.stop - s.start for s in res_chunk)
for ochunk in product(*op_chunks):
if not fast_path: # operands too big, have to go chunk-by-chunk
op_chunk = tuple(
slice(rc * rcs, builtins.min((rc + 1) * rcs, x1s), 1)
for rc, rcs, x1s in zip(ochunk, a_chunks_red, a_shape_red, strict=True)
) # use x1 chunk shape to iterate over reduction axes
ochunk_iter = iter(op_chunk)
a_selection = tuple(
next(ochunk_iter) if not a else as_ for as_, a in zip(a_selection, a_keep, strict=True)
)
# have to permute to match order of a_axes
order_iter = iter(order)
b_selection = tuple(
op_chunk[next(order_iter)] if not b else bs_
for bs_, b in zip(b_selection, b_keep, strict=True)
)
bx1 = x1[a_selection]
bx2 = x2[b_selection]
# adapted from numpy tensordot
newshape_a = (
math.prod([bx1.shape[i] for i in a_keep_axes]),
math.prod([bx1.shape[a] for a in a_axes]),
)
newshape_b = (
math.prod([bx2.shape[b] for b in b_axes]),
math.prod([bx2.shape[i] for i in b_keep_axes]),
)
at = nptranspose(bx1, newaxes_a).reshape(newshape_a)
bt = nptranspose(bx2, newaxes_b).reshape(newshape_b)
res = np.dot(at, bt)
result[res_chunk] += res.reshape(res_chunks)
if fast_path: # already done everything
break
return result
def vecdot(x1: blosc2.NDArray, x2: blosc2.NDArray, axis: int = -1, **kwargs) -> blosc2.NDArray:
"""
Computes the (vector) dot product of two arrays. Complex conjugates x1.
Parameters
----------
x1: blosc2.NDArray
First input array. Must have floating-point data type.
x2: blosc2.NDArray
Second input array. Must be compatible with x1 for all non-contracted axes (via broadcasting).
The size of the axis over which to compute the dot product must be the same size as the respective axis in x1.
Must have a floating-point data type.
axis: int
The axis (dimension) of x1 and x2 containing the vectors for which to compute the dot product.
Should be an integer on the interval [-N, -1], where N is min(x1.ndim, x2.ndim). Default: -1.
Returns
-------
out: blosc2.NDArray
If x1 and x2 are both one-dimensional arrays, a zero-dimensional containing the dot product;
otherwise, a non-zero-dimensional array containing the dot products and having rank N-1,
where N is the rank (number of dimensions) of the shape determined according to broadcasting
along the non-contracted axes.
"""
fast_path = kwargs.pop("fast_path", None) # for testing purposes
# Added this to pass array-api tests (which use internal getitem to check results)
if isinstance(x1, np.ndarray) and isinstance(x2, np.ndarray):
return npvecdot(x1, x2, axis=axis)
# Makes a SimpleProxy if inputs are not blosc2 arrays
x1, x2 = blosc2.as_simpleproxy(x1, x2)
N = builtins.min(x1.ndim, x2.ndim)
if axis < -N or axis > -1:
raise ValueError("axis must be on interval [-N,-1].")
a_axes = axis + x1.ndim
b_axes = axis + x2.ndim
a_keep = [True] * x1.ndim
a_keep[a_axes] = False
b_keep = [True] * x2.ndim
b_keep[b_axes] = False
x1shape = np.array(x1.shape)
x2shape = np.array(x2.shape)
a_chunks_red = x1.chunks[a_axes]
a_shape_red = x1.shape[a_axes]
if np.any(x1shape[a_axes] != x2shape[b_axes]):
raise ValueError("x1 and x2 must have same shapes along reduction dimensions")
result_shape = np.broadcast_shapes(x1shape[a_keep], x2shape[b_keep])
result = blosc2.zeros(result_shape, dtype=blosc2.result_type(x1, x2), **kwargs)
res_chunks = [
slice_to_chunktuple(s, c)
for s, c in zip([slice(0, r, 1) for r in result.shape], result.chunks, strict=True)
]
a_selection = (slice(None, None, 1),) * x1.ndim
b_selection = (slice(None, None, 1),) * x2.ndim
chunk_memory = np.prod(result.chunks) * (
x1shape[a_axes] * x1.dtype.itemsize + x2shape[b_axes] * x2.dtype.itemsize
)
if chunk_memory < blosc2.MAX_FAST_PATH_SIZE:
fast_path = True if fast_path is None else fast_path
fast_path = False if fast_path is None else fast_path # fast_path set via kwargs for testing
for rchunk in product(*res_chunks):
res_chunk = tuple(
slice(rc * rcs, builtins.min((rc + 1) * rcs, rshape), 1)
for rc, rcs, rshape in zip(rchunk, result.chunks, result.shape, strict=True)
)
# handle broadcasting - if x1, x2 different ndim, could have to prepend 1s
rchunk_iter = (
slice(0, 1, 1) if s == 1 else r
for r, s in zip(res_chunk[-x1.ndim + 1 :], x1shape[a_keep], strict=True)
)
a_selection = tuple(next(rchunk_iter) if a else slice(None, None, 1) for a in a_keep)
rchunk_iter = (
slice(0, 1, 1) if s == 1 else r
for r, s in zip(res_chunk[-x2.ndim + 1 :], x2shape[b_keep], strict=True)
)
b_selection = tuple(next(rchunk_iter) if b else slice(None, None, 1) for b in b_keep)
for ochunk in range(0, a_shape_red, a_chunks_red):
if not fast_path: # operands too big, go chunk-by-chunk
op_chunk = (slice(ochunk, builtins.min(ochunk + a_chunks_red, x1.shape[a_axes]), 1),)
a_selection = a_selection[:a_axes] + op_chunk + a_selection[a_axes + 1 :]
b_selection = b_selection[:b_axes] + op_chunk + b_selection[b_axes + 1 :]
bx1 = x1[a_selection]
bx2 = x2[b_selection]
res = npvecdot(bx1, bx2, axis=axis) # handles conjugation of bx1
result[res_chunk] += res
if fast_path: # already done everything
break
return result
def permute_dims(
arr: blosc2.Array, axes: tuple[int] | list[int] | None = None, **kwargs: Any
) -> blosc2.NDArray:
"""
Permutes the axes (dimensions) of an array.
Parameters
----------
arr: :ref:`blosc2.NDArray` | np.ndarray
The input array.
axes: tuple[int], list[int], optional
The desired permutation of axes. If None, the axes are reversed by default.
If specified, axes must be a tuple or list representing a permutation of
``[0, 1, ..., N-1]``, where ``N`` is the number of dimensions of the input array.
Negative indices are also supported. The *i*-th axis of the result will correspond
to the axis numbered ``axes[i]`` of the input.
kwargs: Any, optional
Keyword arguments that are supported by the :func:`empty` constructor.
Returns
-------
out: :ref:`blosc2.NDArray`
A Blosc2 :ref:`blosc2.NDArray` with axes transposed.
Raises
------
ValueError
If ``axes`` is not a valid permutation of the dimensions of ``arr``.
References
----------
`numpy.transpose <https://numpy.org/doc/2.2/reference/generated/numpy.transpose.html>`_
`permute_dims <https://data-apis.org/array-api/latest/API_specification/generated/array_api.permute_dims.html#permute-dims>`_
Examples
--------
For 2-D arrays it is the matrix transposition as usual:
>>> import blosc2
>>> a = blosc2.arange(1, 10).reshape((3, 3))
>>> a[:]
array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
>>> at = blosc2.permute_dims(a)
>>> at[:]
array([[1, 4, 7],
[2, 5, 8],
[3, 6, 9]])
For 3-D arrays:
>>> import blosc2
>>> a = blosc2.arange(1, 25).reshape((2, 3, 4))
>>> a[:]
array([[[ 1, 2, 3, 4],
[ 5, 6, 7, 8],
[ 9, 10, 11, 12]],
[[13, 14, 15, 16],
[17, 18, 19, 20],
[21, 22, 23, 24]]])
>>> at = blosc2.permute_dims(a, axes=(1, 0, 2))
>>> at[:]
array([[[ 1, 2, 3, 4],
[13, 14, 15, 16]],
[[ 5, 6, 7, 8],
[17, 18, 19, 20]],
[[ 9, 10, 11, 12],
[21, 22, 23, 24]]])
"""
if np.isscalar(arr) or arr.ndim < 2:
return arr
# Makes a SimpleProxy if input is not blosc2 array
arr = blosc2.as_simpleproxy(arr)
ndim = arr.ndim
if axes is None:
axes = tuple(range(ndim))[::-1]
else:
axes = tuple(axis if axis >= 0 else ndim + axis for axis in axes)
if sorted(axes) != list(range(ndim)):
raise ValueError(f"axes {axes} is not a valid permutation of {ndim} dimensions")
new_shape = tuple(arr.shape[axis] for axis in axes)
if "chunks" not in kwargs or kwargs["chunks"] is None:
kwargs["chunks"] = tuple(arr.chunks[axis] for axis in axes)
result = blosc2.empty(shape=new_shape, dtype=arr.dtype, **kwargs)
chunks = arr.chunks
shape = arr.shape
# handle SimpleProxy which doesn't have iterchunks_info
if hasattr(arr, "iterchunks_info"):
my_it = arr.iterchunks_info()
_get_el = lambda x: x.coords # noqa: E731
else:
my_it = get_intersecting_chunks((), shape, chunks)
_get_el = lambda x: x.raw # noqa: E731
for info in my_it:
coords = _get_el(info)
start_stop = [
(coord * chunk, builtins.min(chunk * (coord + 1), dim))
for coord, chunk, dim in zip(coords, chunks, shape, strict=False)
]
src_slice = tuple(slice(start, stop) for start, stop in start_stop)
dst_slice = tuple(slice(start_stop[ax][0], start_stop[ax][1]) for ax in axes)
transposed = nptranspose(arr[src_slice], axes=axes)
result[dst_slice] = np.ascontiguousarray(transposed)
return result
def transpose(x, **kwargs: Any) -> blosc2.NDArray:
"""
Returns a Blosc2 blosc2.NDArray with axes transposed.
Only 2D arrays are supported for now. Other dimensions raise an error.
Parameters
----------
x: :ref:`blosc2.NDArray`
The input array.
kwargs: Any, optional
Keyword arguments that are supported by the :func:`empty` constructor.
Returns
-------
out: :ref:`blosc2.NDArray`
The Blosc2 blosc2.NDArray with axes transposed.
References
----------
`numpy.transpose <https://numpy.org/doc/2.2/reference/generated/numpy.transpose.html>`_
"""
warnings.warn(
"transpose is deprecated and will be removed in a future version. "
"Use matrix_transpose or permute_dims instead.",
DeprecationWarning,
stacklevel=2,
)
# If arguments are dimension < 2, they are returned
if np.isscalar(x) or x.ndim < 2:
return x
# Makes a SimpleProxy if input is not blosc2 array
x = blosc2.as_simpleproxy(x)
# Validate arguments are dimension 2
if x.ndim > 2:
raise ValueError("Transposing arrays with dimension greater than 2 is not supported yet.")
return permute_dims(x, **kwargs)
def matrix_transpose(arr: blosc2.Array, **kwargs: Any) -> blosc2.NDArray:
"""
Transposes a matrix (or a stack of matrices).
Parameters
----------
arr: :ref:`blosc2.NDArray` | np.ndarray
The input blosc2.NDArray having shape ``(..., M, N)`` and whose innermost two dimensions form
``MxN`` matrices.
Returns
-------
out: :ref:`blosc2.NDArray`
A new :ref:`blosc2.NDArray` containing the transpose for each matrix and having shape
``(..., N, M)``.
"""
axes = None
# Makes a SimpleProxy if input is not blosc2 array
arr = blosc2.as_simpleproxy(arr)
if not np.isscalar(arr) and arr.ndim > 2:
axes = list(range(arr.ndim))
axes[-2], axes[-1] = axes[-1], axes[-2]
return permute_dims(arr, axes, **kwargs)
def diagonal(x: blosc2.blosc2.NDArray, offset: int = 0) -> blosc2.blosc2.NDArray:
"""
Returns the specified diagonals of a matrix (or a stack of matrices) x.
Parameters
----------
x: blosc2.NDArray
Input array having shape (..., M, N) and whose innermost two dimensions form MxN matrices.
offset: int
Offset specifying the off-diagonal relative to the main diagonal.
* offset = 0: the main diagonal.
* offset > 0: off-diagonal above the main diagonal.
* offset < 0: off-diagonal below the main diagonal.
Default: 0.
Returns
-------
out: blosc2.NDArray
An array containing the diagonals and whose shape is determined by
removing the last two dimensions and appending a dimension equal to the size of the
resulting diagonals.
Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.diag.html#diag
"""
# Makes a SimpleProxy if input is not blosc2 array
x = blosc2.as_simpleproxy(x)
n_rows, n_cols = x.shape[-2:]
min_idx = builtins.min(n_rows, n_cols)
if offset < 0:
start = -offset
rows = np.arange(start, builtins.min(start + n_cols, n_rows))
cols = np.arange(len(rows))
elif offset > 0:
cols = np.arange(offset, builtins.min(offset + n_rows, n_cols))
rows = np.arange(len(cols))
else:
rows = cols = np.arange(min_idx)
key = tuple(slice(None, None, 1) for i in range(x.ndim - 2)) + (rows, cols)
# TODO: change to use slice to give optimised compressing
return blosc2.asarray(x[key])
def outer(x1: blosc2.blosc2.NDArray, x2: blosc2.blosc2.NDArray, **kwargs: Any) -> blosc2.blosc2.NDArray:
"""
Returns the outer product of two vectors x1 and x2.
Parameters
----------
x1: blosc2.NDArray
First one-dimensional input array of size N. Must have a numeric data type.
x2: blosc2.NDArray
Second one-dimensional input array of size M. Must have a numeric data type.
kwargs: Any, optional
Keyword arguments that are supported by the :func:`empty` constructor.
Returns
-------
out: blosc2.NDArray
A two-dimensional array containing the outer product and whose shape is (N, M).
"""
x1, x2 = blosc2.as_simpleproxy(x1, x2)
if (x1.ndim != 1) or (x2.ndim != 1):
raise ValueError("outer only valid for 1D inputs.")
return tensordot(x1, x2, ((), ()), **kwargs) # for testing purposes
def cholesky(x: blosc2.blosc2.NDArray, upper: bool = False) -> blosc2.blosc2.NDArray:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.cholesky.html#cholesky
# """
raise NotImplementedError
def cross(x1: blosc2.blosc2.NDArray, x2: blosc2.blosc2.NDArray, axis: int = -1) -> blosc2.blosc2.NDArray:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.cross.html#cross
# """
raise NotImplementedError
def det(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.det.html#det
# """
raise NotImplementedError
def eigh(x: blosc2.blosc2.NDArray) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.eigh.html#eigh
# """
raise NotImplementedError
def eigvalsh(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.eigvalsh.html#eigvalsh
# """
raise NotImplementedError
def inv(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.inv.html#inv
# """
raise NotImplementedError
def matrix_norm(
x: blosc2.blosc2.NDArray, keepdims: bool = False, ord: int | float | str | None = "fro"
) -> blosc2.blosc2.NDArray:
# """
# Not Implemented but could be doable. ord may take values:
# * 'fro' - Frobenius norm
# * 'nuc' - nuclear norm
# * 1 - max(sum(abs(x), axis=-2))
# * 2 - largest singular value (sum(x**2, axis=[-1,-2]))
# * inf - max(sum(abs(x), axis=-1))
# * -1 - min(sum(abs(x), axis=-2))
# * -2 - smallest singular value
# * -inf - min(sum(abs(x), axis=-1))
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.matrix_norm.html#matrix_norm
# """
raise NotImplementedError
def matrix_power(x: blosc2.blosc2.NDArray, n: int) -> blosc2.blosc2.NDArray:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.matrix_power.html#matrix_power
# """
raise NotImplementedError
def matrix_rank(
x: blosc2.blosc2.NDArray, rtol: float | blosc2.blosc2.NDArray | None = None
) -> blosc2.blosc2.NDArray:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.matrix_rank.html#matrix_rank
# """
raise NotImplementedError
def pinv(
x: blosc2.blosc2.NDArray, rtol: float | blosc2.blosc2.NDArray | None = None
) -> blosc2.blosc2.NDArray:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.pinv.html#pinv
# """
raise NotImplementedError
def qr(
x: blosc2.blosc2.NDArray, mode: str = "reduced"
) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.qr.html#qr
# """
raise NotImplementedError
def slogdet(x: blosc2.blosc2.NDArray) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.slogdet.html#slogdet
# """
raise NotImplementedError
def solve(x1: blosc2.blosc2.NDArray, x2: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.solve.html#solve
# """
raise NotImplementedError
def svd(
x: blosc2.blosc2.NDArray, full_matrices: bool = True
) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.svd.html#svd
# """
raise NotImplementedError
def svdvals(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.svdvals.html#svdvals
# """
raise NotImplementedError
def trace(x: blosc2.blosc2.NDArray, offset: int = 0, dtype: np.dtype | None = None) -> blosc2.blosc2.NDArray:
# """
# Not Implemented
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.trace.html#trace
# """
raise NotImplementedError
def vector_norm(
x: blosc2.blosc2.NDArray,
axis: int | tuple[int] | None = None,
keepdims: bool = False,
ord: int | float = 2,
) -> blosc2.blosc2.NDArray:
# """
# Not Implemented but could be doable. ord may take values:
# * p: int - p-norm
# * inf - max(x)
# * -inf - min(abs(x))
# Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.vector_norm.html#vector_norm
# """
raise NotImplementedError

@ -0,0 +1,462 @@
/* Runtime-agnostic wasm32 JIT JS glue for miniexpr.
* Callers provide runtime facilities via the `runtime` object.
*/
(function(root) {
'use strict';
function _meJitInstantiate(runtime, wasmBytes, bridgeLookupFnIdx) {
if (!runtime || !wasmBytes) {
return 0;
}
var HEAPF64 = runtime.HEAPF64;
var HEAPF32 = runtime.HEAPF32;
var wasmMemory = runtime.wasmMemory;
var wasmTable = runtime.wasmTable;
var stackSave = runtime.stackSave;
var stackAlloc = runtime.stackAlloc;
var stackRestore = runtime.stackRestore;
var lengthBytesUTF8 = runtime.lengthBytesUTF8;
var stringToUTF8 = runtime.stringToUTF8;
var addFunction = runtime.addFunction;
var err = runtime.err || function(message) {
if (typeof console !== 'undefined' && typeof console.error === 'function') {
console.error(message);
}
};
if (!HEAPF64 || !HEAPF32 || !wasmMemory || !wasmTable ||
typeof stackSave !== 'function' || typeof stackAlloc !== 'function' ||
typeof stackRestore !== 'function' || typeof lengthBytesUTF8 !== 'function' ||
typeof stringToUTF8 !== 'function' || typeof addFunction !== 'function') {
err('[me-wasm-jit] invalid runtime object');
return 0;
}
var src = wasmBytes;
var enc = new TextEncoder();
var dec = new TextDecoder();
/* --- LEB128 helpers ------------------------------------------------- */
function readULEB(buf, pos) {
var r = 0, s = 0, b;
do { b = buf[pos++]; r |= (b & 0x7f) << s; s += 7; } while (b & 0x80);
return [r, pos];
}
function encULEB(v) {
var a = [];
do { var b = v & 0x7f; v >>>= 7; if (v) b |= 0x80; a.push(b); } while (v);
return a;
}
function encStr(s) {
var b = enc.encode(s);
return encULEB(b.length).concat(Array.from(b));
}
function readName(buf, pos) {
var t = readULEB(buf, pos);
var n = t[0];
pos = t[1];
var s = dec.decode(buf.subarray(pos, pos + n));
return [s, pos + n];
}
function skipLimits(buf, pos) {
var t = readULEB(buf, pos);
var flags = t[0];
pos = t[1];
t = readULEB(buf, pos);
pos = t[1];
if (flags & 0x01) {
t = readULEB(buf, pos);
pos = t[1];
}
return pos;
}
function encMemoryImport() {
var imp = [];
imp = imp.concat(encStr("env"), encStr("memory"));
imp.push(0x02, 0x00); /* memory, limits-flag: no-max */
imp = imp.concat(encULEB(256));
return imp;
}
function buildImportSecWithMemory() {
var body = encULEB(1);
body = body.concat(encMemoryImport());
var sec = [0x02];
sec = sec.concat(encULEB(body.length));
return sec.concat(body);
}
function patchImportSec(secData) {
var pos = 0;
var t = readULEB(secData, pos);
var nimports = t[0];
pos = t[1];
var entries = [];
var hasEnvMemory = false;
for (var i = 0; i < nimports; i++) {
var start = pos;
var moduleName = "";
var fieldName = "";
t = readName(secData, pos);
moduleName = t[0];
pos = t[1];
t = readName(secData, pos);
fieldName = t[0];
pos = t[1];
var kind = secData[pos++];
if (kind === 0x00) {
t = readULEB(secData, pos);
pos = t[1];
}
else if (kind === 0x01) {
pos++; /* elem type */
pos = skipLimits(secData, pos);
}
else if (kind === 0x02) {
pos = skipLimits(secData, pos);
if (moduleName === "env" && fieldName === "memory") {
hasEnvMemory = true;
}
}
else if (kind === 0x03) {
pos += 2; /* valtype + mutability */
}
else {
throw new Error("unsupported wasm import kind " + kind);
}
entries.push(Array.from(secData.subarray(start, pos)));
}
if (!hasEnvMemory) {
entries.push(encMemoryImport());
}
var body = encULEB(entries.length);
for (var ei = 0; ei < entries.length; ei++) {
body = body.concat(entries[ei]);
}
var sec = [0x02];
sec = sec.concat(encULEB(body.length));
return sec.concat(body);
}
function buildEnvImports() {
var bridgeLookup = null;
var bridgeCache = Object.create(null);
if (bridgeLookupFnIdx) {
bridgeLookup = wasmTable.get(bridgeLookupFnIdx);
}
function lookupBridge(name) {
if (!bridgeLookup) {
return null;
}
if (Object.prototype.hasOwnProperty.call(bridgeCache, name)) {
return bridgeCache[name];
}
var sp = stackSave();
try {
var nbytes = lengthBytesUTF8(name) + 1;
var namePtr = stackAlloc(nbytes);
stringToUTF8(name, namePtr, nbytes);
var fnIdx = bridgeLookup(namePtr) | 0;
bridgeCache[name] = fnIdx ? wasmTable.get(fnIdx) : null;
} finally {
stackRestore(sp);
}
return bridgeCache[name];
}
function bindBridge(name, fallback) {
var fn = lookupBridge(name);
return fn ? fn : fallback;
}
function fdim(x, y) { return x > y ? (x - y) : 0.0; }
function copysign(x, y) {
if (y === 0) {
return (1 / y === -Infinity) ? -Math.abs(x) : Math.abs(x);
}
return y < 0 ? -Math.abs(x) : Math.abs(x);
}
function ldexp(x, e) { return x * Math.pow(2.0, e); }
function rint(x) {
if (!isFinite(x)) {
return x;
}
var n = Math.round(x);
if (Math.abs(x - n) === 0.5) {
n = 2 * Math.round(x / 2);
}
return n;
}
function remainder(x, y) {
if (!isFinite(x) || !isFinite(y) || y === 0.0) {
return NaN;
}
return x - y * Math.round(x / y);
}
function erfApprox(x) {
var sign = x < 0 ? -1.0 : 1.0;
x = Math.abs(x);
var a1 = 0.254829592;
var a2 = -0.284496736;
var a3 = 1.421413741;
var a4 = -1.453152027;
var a5 = 1.061405429;
var p = 0.3275911;
var t = 1.0 / (1.0 + p * x);
var y = 1.0 - (((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t) * Math.exp(-x * x);
return sign * y;
}
function erfcApprox(x) { return 1.0 - erfApprox(x); }
function tgammaApprox(z) {
var p = [
676.5203681218851, -1259.1392167224028, 771.32342877765313,
-176.61502916214059, 12.507343278686905, -0.13857109526572012,
9.9843695780195716e-6, 1.5056327351493116e-7
];
if (z < 0.5) {
return Math.PI / (Math.sin(Math.PI * z) * tgammaApprox(1.0 - z));
}
z -= 1.0;
var x = 0.99999999999980993;
for (var i = 0; i < p.length; i++) {
x += p[i] / (z + i + 1.0);
}
var t = z + p.length - 0.5;
return Math.sqrt(2.0 * Math.PI) * Math.pow(t, z + 0.5) * Math.exp(-t) * x;
}
function lgammaApprox(x) {
var g = tgammaApprox(x);
return Math.log(Math.abs(g));
}
function nextafterApprox(x, y) {
if (isNaN(x) || isNaN(y)) {
return NaN;
}
if (x === y) {
return y;
}
if (x === 0.0) {
return y > 0.0 ? Number.MIN_VALUE : -Number.MIN_VALUE;
}
var buf = new ArrayBuffer(8);
var dv = new DataView(buf);
dv.setFloat64(0, x, true);
var bits = dv.getBigUint64(0, true);
if ((y > x) === (x > 0.0)) {
bits += 1n;
}
else {
bits -= 1n;
}
dv.setBigUint64(0, bits, true);
return dv.getFloat64(0, true);
}
function meJitExp10(x) { return Math.pow(10.0, x); }
function meJitSinpi(x) { return Math.sin(Math.PI * x); }
function meJitCospi(x) { return Math.cos(Math.PI * x); }
var mathExp2 = Math.exp2 ? Math.exp2 : function(x) { return Math.pow(2.0, x); };
function meJitLogaddexp(a, b) {
var hi = a > b ? a : b;
var lo = a > b ? b : a;
return hi + Math.log1p(Math.exp(lo - hi));
}
function meJitWhere(c, x, y) { return c !== 0.0 ? x : y; }
function vecUnaryF64(inPtr, outPtr, n, fn) {
var ii = inPtr >> 3;
var oo = outPtr >> 3;
for (var i = 0; i < n; i++) {
HEAPF64[oo + i] = fn(HEAPF64[ii + i]);
}
}
function vecBinaryF64(aPtr, bPtr, outPtr, n, fn) {
var aa = aPtr >> 3;
var bb = bPtr >> 3;
var oo = outPtr >> 3;
for (var i = 0; i < n; i++) {
HEAPF64[oo + i] = fn(HEAPF64[aa + i], HEAPF64[bb + i]);
}
}
function vecUnaryF32(inPtr, outPtr, n, fn) {
var ii = inPtr >> 2;
var oo = outPtr >> 2;
for (var i = 0; i < n; i++) {
HEAPF32[oo + i] = fn(HEAPF32[ii + i]);
}
}
function vecBinaryF32(aPtr, bPtr, outPtr, n, fn) {
var aa = aPtr >> 2;
var bb = bPtr >> 2;
var oo = outPtr >> 2;
for (var i = 0; i < n; i++) {
HEAPF32[oo + i] = fn(HEAPF32[aa + i], HEAPF32[bb + i]);
}
}
var env = {
memory: wasmMemory,
acos: Math.acos, acosh: Math.acosh, asin: Math.asin, asinh: Math.asinh,
atan: Math.atan, atan2: Math.atan2, atanh: Math.atanh, cbrt: Math.cbrt,
ceil: Math.ceil, copysign: copysign, cos: Math.cos, cosh: Math.cosh,
erf: erfApprox, erfc: erfcApprox, exp: Math.exp, exp2: mathExp2,
expm1: Math.expm1, fabs: Math.abs, fdim: fdim, floor: Math.floor,
fma: function(a, b, c) { return a * b + c; }, fmax: Math.max, fmin: Math.min,
fmod: function(a, b) { return a % b; }, hypot: Math.hypot, ldexp: ldexp,
lgamma: lgammaApprox, log: Math.log, log10: Math.log10, log1p: Math.log1p,
log2: Math.log2, nextafter: nextafterApprox, pow: Math.pow, remainder: remainder,
rint: rint, round: Math.round, sin: Math.sin, sinh: Math.sinh, sqrt: Math.sqrt,
tan: Math.tan, tanh: Math.tanh, tgamma: tgammaApprox, trunc: Math.trunc,
me_jit_exp10: meJitExp10, me_jit_sinpi: meJitSinpi, me_jit_cospi: meJitCospi,
me_jit_logaddexp: meJitLogaddexp, me_jit_where: meJitWhere
};
env.me_wasm32_cast_int = function(x) {
return x < 0 ? Math.ceil(x) : Math.floor(x);
};
env.me_wasm32_cast_float = function(x) {
return x;
};
env.me_wasm32_cast_bool = function(x) {
return x !== 0 ? 1 : 0;
};
env.memset = bindBridge("memset", function(ptr, value, n) {
if (n > 0) {
HEAPU8.fill(value & 255, ptr, ptr + n);
}
return ptr | 0;
});
/* Prefer host wasm bridge symbols; keep JS fallbacks for robustness. */
env.me_jit_exp10 = bindBridge("me_jit_exp10", env.me_jit_exp10);
env.me_jit_sinpi = bindBridge("me_jit_sinpi", env.me_jit_sinpi);
env.me_jit_cospi = bindBridge("me_jit_cospi", env.me_jit_cospi);
env.me_jit_logaddexp = bindBridge("me_jit_logaddexp", env.me_jit_logaddexp);
env.me_jit_where = bindBridge("me_jit_where", env.me_jit_where);
env.me_jit_vec_sin_f64 = bindBridge("me_jit_vec_sin_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.sin); });
env.me_jit_vec_cos_f64 = bindBridge("me_jit_vec_cos_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.cos); });
env.me_jit_vec_exp_f64 = bindBridge("me_jit_vec_exp_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.exp); });
env.me_jit_vec_log_f64 = bindBridge("me_jit_vec_log_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.log); });
env.me_jit_vec_exp10_f64 = bindBridge("me_jit_vec_exp10_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, meJitExp10); });
env.me_jit_vec_sinpi_f64 = bindBridge("me_jit_vec_sinpi_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, meJitSinpi); });
env.me_jit_vec_cospi_f64 = bindBridge("me_jit_vec_cospi_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, meJitCospi); });
env.me_jit_vec_atan2_f64 = bindBridge("me_jit_vec_atan2_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.atan2); });
env.me_jit_vec_hypot_f64 = bindBridge("me_jit_vec_hypot_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.hypot); });
env.me_jit_vec_pow_f64 = bindBridge("me_jit_vec_pow_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.pow); });
env.me_jit_vec_fmax_f64 = bindBridge("me_jit_vec_fmax_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.max); });
env.me_jit_vec_fmin_f64 = bindBridge("me_jit_vec_fmin_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.min); });
env.me_jit_vec_expm1_f64 = bindBridge("me_jit_vec_expm1_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.expm1); });
env.me_jit_vec_log10_f64 = bindBridge("me_jit_vec_log10_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.log10); });
env.me_jit_vec_sinh_f64 = bindBridge("me_jit_vec_sinh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.sinh); });
env.me_jit_vec_cosh_f64 = bindBridge("me_jit_vec_cosh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.cosh); });
env.me_jit_vec_tanh_f64 = bindBridge("me_jit_vec_tanh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.tanh); });
env.me_jit_vec_asinh_f64 = bindBridge("me_jit_vec_asinh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.asinh); });
env.me_jit_vec_acosh_f64 = bindBridge("me_jit_vec_acosh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.acosh); });
env.me_jit_vec_atanh_f64 = bindBridge("me_jit_vec_atanh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.atanh); });
env.me_jit_vec_abs_f64 = bindBridge("me_jit_vec_abs_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.abs); });
env.me_jit_vec_sqrt_f64 = bindBridge("me_jit_vec_sqrt_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.sqrt); });
env.me_jit_vec_log1p_f64 = bindBridge("me_jit_vec_log1p_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.log1p); });
env.me_jit_vec_exp2_f64 = bindBridge("me_jit_vec_exp2_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, mathExp2); });
env.me_jit_vec_log2_f64 = bindBridge("me_jit_vec_log2_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.log2); });
env.me_jit_vec_sin_f32 = bindBridge("me_jit_vec_sin_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.sin); });
env.me_jit_vec_cos_f32 = bindBridge("me_jit_vec_cos_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.cos); });
env.me_jit_vec_exp_f32 = bindBridge("me_jit_vec_exp_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.exp); });
env.me_jit_vec_log_f32 = bindBridge("me_jit_vec_log_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.log); });
env.me_jit_vec_exp10_f32 = bindBridge("me_jit_vec_exp10_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, meJitExp10); });
env.me_jit_vec_sinpi_f32 = bindBridge("me_jit_vec_sinpi_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, meJitSinpi); });
env.me_jit_vec_cospi_f32 = bindBridge("me_jit_vec_cospi_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, meJitCospi); });
env.me_jit_vec_atan2_f32 = bindBridge("me_jit_vec_atan2_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.atan2); });
env.me_jit_vec_hypot_f32 = bindBridge("me_jit_vec_hypot_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.hypot); });
env.me_jit_vec_pow_f32 = bindBridge("me_jit_vec_pow_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.pow); });
env.me_jit_vec_fmax_f32 = bindBridge("me_jit_vec_fmax_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.max); });
env.me_jit_vec_fmin_f32 = bindBridge("me_jit_vec_fmin_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.min); });
env.me_jit_vec_expm1_f32 = bindBridge("me_jit_vec_expm1_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.expm1); });
env.me_jit_vec_log10_f32 = bindBridge("me_jit_vec_log10_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.log10); });
env.me_jit_vec_sinh_f32 = bindBridge("me_jit_vec_sinh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.sinh); });
env.me_jit_vec_cosh_f32 = bindBridge("me_jit_vec_cosh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.cosh); });
env.me_jit_vec_tanh_f32 = bindBridge("me_jit_vec_tanh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.tanh); });
env.me_jit_vec_asinh_f32 = bindBridge("me_jit_vec_asinh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.asinh); });
env.me_jit_vec_acosh_f32 = bindBridge("me_jit_vec_acosh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.acosh); });
env.me_jit_vec_atanh_f32 = bindBridge("me_jit_vec_atanh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.atanh); });
env.me_jit_vec_abs_f32 = bindBridge("me_jit_vec_abs_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.abs); });
env.me_jit_vec_sqrt_f32 = bindBridge("me_jit_vec_sqrt_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.sqrt); });
env.me_jit_vec_log1p_f32 = bindBridge("me_jit_vec_log1p_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.log1p); });
env.me_jit_vec_exp2_f32 = bindBridge("me_jit_vec_exp2_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, mathExp2); });
env.me_jit_vec_log2_f32 = bindBridge("me_jit_vec_log2_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.log2); });
return env;
}
/* --- parse sections ------------------------------------------------- */
var pos = 8, sections = [];
while (pos < src.length) {
var id = src[pos++];
var tmp = readULEB(src, pos), len = tmp[0]; pos = tmp[1];
sections.push({ id: id, data: src.subarray(pos, pos + len) });
pos += len;
}
/* --- reassemble with patched memory -------------------------------- */
var out = [0x00,0x61,0x73,0x6d, 0x01,0x00,0x00,0x00];
var impDone = false;
for (var i = 0; i < sections.length; i++) {
var s = sections[i];
if (s.id === 5) continue; /* drop memory section */
if (s.id === 2) {
out = out.concat(patchImportSec(s.data));
impDone = true;
continue;
}
if (!impDone && s.id > 2) {
out = out.concat(buildImportSecWithMemory());
impDone = true;
}
if (s.id === 7) { /* strip memory export from export section */
var ep = 0, et = readULEB(s.data, ep), ne = et[0]; ep = et[1];
var exps = [];
for (var e = 0; e < ne; e++) {
var nt = readULEB(s.data, ep), nl = nt[0]; ep = nt[1];
var nm = dec.decode(s.data.subarray(ep, ep + nl)); ep += nl;
var kd = s.data[ep++];
var xt = readULEB(s.data, ep), xi = xt[0]; ep = xt[1];
if (nm === "memory" && kd === 0x02) continue;
exps.push({ n: nm, k: kd, i: xi });
}
var eb = encULEB(exps.length);
for (var e = 0; e < exps.length; e++) {
eb = eb.concat(encStr(exps[e].n));
eb.push(exps[e].k);
eb = eb.concat(encULEB(exps[e].i));
}
out.push(0x07);
out = out.concat(encULEB(eb.length));
out = out.concat(eb);
continue;
}
out.push(s.id);
out = out.concat(encULEB(s.data.length));
out = out.concat(Array.from(s.data));
}
if (!impDone) {
out = out.concat(buildImportSecWithMemory());
}
/* --- instantiate with shared memory -------------------------------- */
var patched = new Uint8Array(out);
try {
var mod = new WebAssembly.Module(patched);
var inst = new WebAssembly.Instance(mod, { env: buildEnvImports() });
} catch (e) {
err("[me-wasm-jit] " + e.message);
return 0;
}
var fn = inst.exports["me_dsl_jit_kernel"];
if (!fn) { err("[me-wasm-jit] missing export"); return 0; }
return addFunction(fn, "iiii");
}
function _meJitFreeFn(runtime, idx) {
if (!runtime || typeof runtime.removeFunction !== 'function') {
return;
}
if (idx) {
runtime.removeFunction(idx);
}
}
root._meJitInstantiate = _meJitInstantiate;
root._meJitFreeFn = _meJitFreeFn;
if (typeof module !== 'undefined' && module.exports) {
module.exports = {
_meJitInstantiate: _meJitInstantiate,
_meJitFreeFn: _meJitFreeFn
};
}
})(typeof globalThis !== 'undefined' ? globalThis : (typeof self !== 'undefined' ? self : this));

File diff suppressed because it is too large Load Diff

@ -0,0 +1,856 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
from abc import ABC, abstractmethod
from collections.abc import Sequence
try:
from numpy.typing import DTypeLike
except (ImportError, AttributeError):
# fallback to internal module (use with caution)
from numpy._typing import DTypeLike
import numpy as np
import blosc2
class ProxyNDSource(ABC):
"""
Base interface for NDim sources in :ref:`Proxy`.
"""
@property
@abstractmethod
def shape(self) -> tuple:
"""
The shape of the source.
"""
pass
@property
@abstractmethod
def chunks(self) -> tuple:
"""
The chunk shape of the source.
"""
pass
@property
@abstractmethod
def blocks(self) -> tuple:
"""
The block shape of the source.
"""
pass
@property
@abstractmethod
def dtype(self) -> np.dtype:
"""
The dtype of the source.
"""
pass
@property
def cparams(self) -> blosc2.CParams:
"""
The compression parameters of the source.
This property is optional and can be overridden if the source has a
different compression configuration.
"""
return blosc2.CParams(typesize=self.dtype.itemsize)
@abstractmethod
def get_chunk(self, nchunk: int) -> bytes:
"""
Return the compressed chunk in :paramref:`self`.
Parameters
----------
nchunk: int
The unidimensional index of the chunk to retrieve.
Returns
-------
out: bytes object
The compressed chunk.
"""
pass
async def aget_chunk(self, nchunk: int) -> bytes:
"""
Return the compressed chunk in :paramref:`self` asynchronously.
Parameters
----------
nchunk: int
The index of the chunk to retrieve.
Returns
-------
out: bytes object
The compressed chunk.
Notes
-----
This method is optional, and only available if the source has an async
`aget_chunk` method.
"""
raise NotImplementedError(
"aget_chunk is only available if the source has an async aget_chunk method"
)
class ProxySource(ABC):
"""
Base interface for sources of :ref:`Proxy` that are not NDim objects.
"""
@property
@abstractmethod
def nbytes(self) -> int:
"""
The total number of bytes in the source.
"""
pass
@property
@abstractmethod
def chunksize(self) -> tuple:
"""
The chunksize of the source.
"""
pass
@property
@abstractmethod
def typesize(self) -> int:
"""
The typesize of the source.
"""
pass
@property
def cparams(self) -> blosc2.CParams:
"""
The compression parameters of the source.
This property is optional and can be overridden if the source has a
different compression configuration.
"""
return blosc2.CParams(typesize=self.typesize)
@abstractmethod
def get_chunk(self, nchunk: int) -> bytes:
"""
Return the compressed chunk in :paramref:`self`.
Parameters
----------
nchunk: int
The index of the chunk to retrieve.
Returns
-------
out: bytes object
The compressed chunk.
"""
pass
async def aget_chunk(self, nchunk: int) -> bytes:
"""
Return the compressed chunk in :paramref:`self` asynchronously.
Parameters
----------
nchunk: int
The index of the chunk to retrieve.
Returns
-------
out: bytes object
The compressed chunk.
Notes
-----
This method is optional and only available if the source has an async
`aget_chunk` method.
"""
raise NotImplementedError(
"aget_chunk is only available if the source has an async aget_chunk method"
)
class Proxy(blosc2.Operand):
"""Proxy (with cache support) for an object following the :ref:`ProxySource` interface.
This can be used to cache chunks of a regular data container which follows the
:ref:`ProxySource` or :ref:`ProxyNDSource` interfaces.
"""
def __init__(
self, src: ProxySource or ProxyNDSource, urlpath: str | None = None, mode="a", **kwargs: dict
):
"""
Create a new :ref:`Proxy` to serve as a cache to save accessed chunks locally.
Parameters
----------
src: :ref:`ProxySource` or :ref:`ProxyNDSource`
The original container.
urlpath: str, optional
The urlpath where to save the container that will work as a cache.
mode: str, optional
"a" means read/write (create if it doesn't exist); "w" means create
(overwrite if it exists). Default is "a".
kwargs: dict, optional
Keyword arguments supported:
vlmeta: dict or None
A dictionary with different variable length metalayers. One entry per metalayer:
key: bytes or str
The name of the metalayer.
value: object
The metalayer object that will be serialized using msgpack.
"""
self.src = src
self.urlpath = urlpath
if kwargs is None:
kwargs = {}
self._cache = kwargs.pop("_cache", None)
if self._cache is None:
meta_val = {
"local_abspath": None,
"urlpath": None,
"caterva2_env": kwargs.pop("caterva2_env", False),
}
container = getattr(self.src, "schunk", self.src)
if hasattr(container, "urlpath"):
meta_val["local_abspath"] = container.urlpath
elif isinstance(self.src, blosc2.C2Array):
meta_val["urlpath"] = (self.src.path, self.src.urlbase, self.src.auth_token)
meta = {"proxy-source": meta_val}
if hasattr(self.src, "shape"):
self._cache = blosc2.empty(
self.src.shape,
self.src.dtype,
chunks=self.src.chunks,
blocks=self.src.blocks,
cparams=self.src.cparams,
urlpath=urlpath,
mode=mode,
meta=meta,
)
else:
self._cache = blosc2.SChunk(
chunksize=self.src.chunksize,
cparams=self.src.cparams,
urlpath=urlpath,
mode=mode,
meta=meta,
)
self._cache.fill_special(self.src.nbytes // self.src.typesize, blosc2.SpecialValue.UNINIT)
self._schunk_cache = getattr(self._cache, "schunk", self._cache)
vlmeta = kwargs.get("vlmeta")
if vlmeta:
for key in vlmeta:
self._schunk_cache.vlmeta[key] = vlmeta[key]
def fetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray | blosc2.schunk.SChunk:
"""
Get the container used as cache with the requested data updated.
Parameters
----------
item: slice or list of slices, optional
If not None, only the chunks that intersect with the slices
in items will be retrieved if they have not been already.
Returns
-------
out: :ref:`NDArray` or :ref:`SChunk`
The local container used to cache the already requested data.
Examples
--------
>>> import numpy as np
>>> import blosc2
>>> data = np.arange(20).reshape(10, 2)
>>> ndarray = blosc2.asarray(data)
>>> proxy = blosc2.Proxy(ndarray)
>>> slice_data = proxy.fetch((slice(0, 3), slice(0, 2)))
>>> slice_data[:3, :2]
[[0 1]
[2 3]
[4 5]]
"""
if item == ():
# Full realization
for info in self._schunk_cache.iterchunks_info():
if info.special != blosc2.SpecialValue.NOT_SPECIAL:
chunk = self.src.get_chunk(info.nchunk)
self._schunk_cache.update_chunk(info.nchunk, chunk)
else:
# Get only a slice
nchunks = blosc2.get_slice_nchunks(self._cache, item)
for info in self._schunk_cache.iterchunks_info():
if info.nchunk in nchunks and info.special != blosc2.SpecialValue.NOT_SPECIAL:
chunk = self.src.get_chunk(info.nchunk)
self._schunk_cache.update_chunk(info.nchunk, chunk)
return self._cache
async def afetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray | blosc2.schunk.SChunk:
"""
Retrieve the cache container with the requested data updated asynchronously.
Parameters
----------
item: slice or list of slices, optional
If provided, only the chunks intersecting with the specified slices
will be retrieved if they have not been already.
Returns
-------
out: :ref:`NDArray` or :ref:`SChunk`
The local container used to cache the already requested data.
Notes
-----
This method is only available if the :ref:`ProxySource` or :ref:`ProxyNDSource`
have an async `aget_chunk` method.
Examples
--------
>>> import numpy as np
>>> import blosc2
>>> import asyncio
>>> from blosc2 import ProxyNDSource
>>> class MyProxySource(ProxyNDSource):
>>> def __init__(self, data):
>>> # If the next source is multidimensional, it must have the attributes:
>>> self.data = data
>>> f"Data shape: {self.shape}, Chunks: {self.chunks}"
>>> f"Blocks: {self.blocks}, Dtype: {self.dtype}"
>>> @property
>>> def shape(self):
>>> return self.data.shape
>>> @property
>>> def chunks(self):
>>> return self.data.chunks
>>> @property
>>> def blocks(self):
>>> return self.data.blocks
>>> @property
>>> def dtype(self):
>>> return self.data.dtype
>>> # This method must be present
>>> def get_chunk(self, nchunk):
>>> return self.data.get_chunk(nchunk)
>>> # This method is optional
>>> async def aget_chunk(self, nchunk):
>>> await asyncio.sleep(0.1) # Simulate an asynchronous operation
>>> return self.data.get_chunk(nchunk)
>>> data = np.arange(20).reshape(4, 5)
>>> chunks = [2, 5]
>>> blocks = [1, 5]
>>> data = blosc2.asarray(data, chunks=chunks, blocks=blocks)
>>> source = MyProxySource(data)
>>> proxy = blosc2.Proxy(source)
>>> async def fetch_data():
>>> # Fetch a slice of the data from the proxy asynchronously
>>> slice_data = await proxy.afetch(slice(0, 2))
>>> # Note that only data fetched is shown, the rest is uninitialized
>>> slice_data[:]
>>> asyncio.run(fetch_data())
>>> # Using getitem to get a slice of the data
>>> result = proxy[1:2, 1:3]
>>> f"Proxy getitem: {result}"
Data shape: (4, 5), Chunks: (2, 5)
Blocks: (1, 5), Dtype: int64
[[0 1 2 3 4]
[5 6 7 8 9]
[0 0 0 0 0]
[0 0 0 0 0]]
Proxy getitem: [[6 7]]
"""
if not callable(getattr(self.src, "aget_chunk", None)):
raise NotImplementedError("afetch is only available if the source has an aget_chunk method")
if item == ():
# Full realization
for info in self._schunk_cache.iterchunks_info():
if info.special != blosc2.SpecialValue.NOT_SPECIAL:
chunk = await self.src.aget_chunk(info.nchunk)
self._schunk_cache.update_chunk(info.nchunk, chunk)
else:
# Get only a slice
nchunks = blosc2.get_slice_nchunks(self._cache, item)
for info in self._schunk_cache.iterchunks_info():
if info.nchunk in nchunks and info.special != blosc2.SpecialValue.NOT_SPECIAL:
chunk = await self.src.aget_chunk(info.nchunk)
self._schunk_cache.update_chunk(info.nchunk, chunk)
return self._cache
def __getitem__(self, item: slice | list[slice]) -> np.ndarray:
"""
Get a slice as a numpy.ndarray using the :ref:`Proxy`.
Parameters
----------
item: slice or list of slices
The slice of the desired data.
Returns
-------
out: numpy.ndarray
An array with the data slice.
Examples
--------
>>> import numpy as np
>>> import blosc2
>>> data = np.arange(25).reshape(5, 5)
>>> ndarray = blosc2.asarray(data)
>>> proxy = blosc2.Proxy(ndarray)
>>> proxy[0:3, 0:3]
[[ 0 1 2]
[ 5 6 7]
[10 11 12]
[20 21 22]]
>>> proxy[2:5, 2:5]
[[12 13 14]
[17 18 19]
[22 23 24]]
"""
# Populate the cache
self.fetch(item)
return self._cache[item]
@property
def dtype(self) -> np.dtype:
"""The dtype of :paramref:`self` or None if the data is unidimensional"""
return self._cache.dtype if isinstance(self._cache, blosc2.NDArray) else None
@property
def shape(self) -> tuple[int]:
"""The shape of :paramref:`self`"""
return self._cache.shape if isinstance(self._cache, blosc2.NDArray) else len(self._cache)
@property
def chunks(self) -> tuple[int]: # cache should have same chunks as src
"""The chunks of :paramref:`self` or None if the data is not a Blosc2 NDArray"""
return self._cache.chunks if isinstance(self._cache, blosc2.NDArray) else None
@property
def blocks(self) -> tuple[int]: # cache should have same blocks as src
"""The blocks of :paramref:`self` or None if the data is not a Blosc2 NDArray"""
return self._cache.blocks if isinstance(self._cache, blosc2.NDArray) else None
@property
def schunk(self) -> blosc2.schunk.SChunk:
"""The :ref:`SChunk` of the cache"""
return self._schunk_cache
@property
def cparams(self) -> blosc2.CParams:
"""The compression parameters of the cache"""
return self._cache.cparams
@property
def info(self) -> str:
"""The info of the cache"""
if isinstance(self._cache, blosc2.NDArray):
return self._cache.info
raise NotImplementedError("info is only available if the source is a NDArray")
def __str__(self):
return f"Proxy({self.src}, urlpath={self.urlpath})"
@property
def vlmeta(self) -> blosc2.schunk.vlmeta:
"""
Get the vlmeta of the cache.
See Also
--------
:py:attr:`blosc2.schunk.SChunk.vlmeta`
"""
return self._schunk_cache.vlmeta
@property
def fields(self) -> dict:
"""
Dictionary with the fields of :paramref:`self`.
Returns
-------
fields: dict
A dictionary with the fields of the :ref:`Proxy`.
See Also
--------
:ref:`NDField`
Examples
--------
>>> import numpy as np
>>> import blosc2
>>> data = np.ones(16, dtype=[('field1', 'i4'), ('field2', 'f4')]).reshape(4, 4)
>>> ndarray = blosc2.asarray(data)
>>> proxy = blosc2.Proxy(ndarray)
>>> # Get a dictionary of fields from the proxy, where each field can be accessed individually
>>> fields_dict = proxy.fields
>>> for field_name, field_proxy in fields_dict.items():
>>> print(f"Field name: {field_name}, Field data: {field_proxy}")
Field name: field1, Field data: <blosc2.proxy.ProxyNDField object at 0x114472d20>
Field name: field2, Field data: <blosc2.proxy.ProxyNDField object at 0x10e215be0>
>>> fields_dict['field2'][:]
[[1. 1. 1. 1.]
[1. 1. 1. 1.]
[1. 1. 1. 1.]
[1. 1. 1. 1.]]
"""
_fields = getattr(self._cache, "fields", None)
if _fields is None:
return None
return {key: ProxyNDField(self, key) for key in _fields}
class ProxyNDField(blosc2.Operand):
def __init__(self, proxy: Proxy, field: str):
self.proxy = proxy
self.field = field
self._dtype = proxy.dtype[field]
self._shape = proxy.shape
@property
def dtype(self) -> np.dtype:
"""
Get the data type of the :ref:`ProxyNDField`.
Returns
-------
out: np.dtype
The data type of the :ref:`ProxyNDField`.
"""
return self._dtype
@property
def shape(self) -> tuple[int]:
"""
Get the shape of the :ref:`ProxyNDField`.
Returns
-------
out: tuple
The shape of the :ref:`ProxyNDField`.
"""
return self._shape
def __getitem__(self, item: slice | list[slice]) -> np.ndarray:
"""
Get a slice as a numpy.ndarray using the `field` in `proxy`.
Parameters
----------
item: slice or list of slices
The slice of the desired data.
Returns
-------
out: numpy.ndarray
An array with the data slice.
"""
# Get the data and return the corresponding field
nparr = self.proxy[item]
return nparr[self.field]
def _convert_dtype(dt: str | DTypeLike):
"""
Attempts to convert to blosc2.dtype (i.e. numpy dtype)
"""
if hasattr(dt, "as_numpy_dtype"):
dt = dt.as_numpy_dtype
try:
return np.dtype(dt)
except TypeError: # likely passed e.g. a torch.float64
return np.dtype(str(dt).split(".")[1])
except Exception as e:
raise TypeError(f"Could not parse dtype arg {dt}.") from e
class SimpleProxy(blosc2.Operand):
"""
Simple proxy for any data container to be used with the compute engine.
The source must have a `shape` and `dtype` attributes; if not,
it will be converted to a NumPy array via the `np.asarray` function.
It should also have a `__getitem__` method.
This only supports the __getitem__ method. No caching is performed.
Examples
--------
>>> import numpy as np
>>> import blosc2
>>> a = np.arange(20, dtype=np.float32).reshape(4, 5)
>>> proxy = blosc2.SimpleProxy(a)
>>> proxy[1:3, 2:4]
[[ 7. 8.]
[12. 13.]]
"""
def __init__(self, src, chunks: tuple | None = None, blocks: tuple | None = None):
if not hasattr(src, "shape") or not hasattr(src, "dtype"):
# If the source is not an array, convert it to NumPy
src = np.asarray(src)
if not hasattr(src, "__getitem__"):
raise TypeError("The source must have a __getitem__ method")
self._src = src
self._dtype = _convert_dtype(src.dtype)
self._shape = src.shape if isinstance(src.shape, tuple) else tuple(src.shape)
# Compute reasonable values for chunks and blocks
cparams = blosc2.CParams(clevel=0)
def is_ints_sequence(src, attr):
seq = getattr(src, attr, None)
if not isinstance(seq, Sequence) or isinstance(seq, (str, bytes)):
return False
return all(isinstance(x, int) for x in seq)
chunks = src.chunks if chunks is None and is_ints_sequence(src, "chunks") else chunks
blocks = src.blocks if blocks is None and is_ints_sequence(src, "blocks") else blocks
self.chunks, self.blocks = blosc2.compute_chunks_blocks(
self.shape, chunks, blocks, self.dtype, cparams=cparams
)
@property
def src(self):
"""The source object that this proxy wraps."""
return self._src
@property
def shape(self):
"""The shape of the source array."""
return self._shape
@property
def dtype(self):
"""The data type of the source array."""
return self._dtype
@property
def ndim(self):
"""The number of dimensions of the source array."""
return len(self.shape)
def __getitem__(self, item: slice | list[slice]) -> np.ndarray:
"""
Get a slice as a numpy.ndarray (via this proxy).
Parameters
----------
item
Returns
-------
out: numpy.ndarray
An array with the data slice.
"""
out = self._src[item]
if not hasattr(out, "shape") or out.shape == ():
return out
else:
# avoids copy for PyTorch (JAX/Tensorflow will always copy,
# no easy way around it)
return np.asarray(out)
def as_simpleproxy(*arrs: Sequence[blosc2.Array]) -> tuple[SimpleProxy | blosc2.Operand]:
"""
Convert an Array object which fulfills Array protocol into SimpleProxy. If x is already a
blosc2.Operand simply returns object.
Parameters
----------
arrs: Sequence[blosc2.Array]
Objects fulfilling Array protocol.
Returns
-------
out: tuple[blosc2.SimpleProxy | blosc2.Operand]
Objects with minimal interface for blosc2 LazyExpr computations.
"""
out = ()
for x in arrs:
if isinstance(x, blosc2.Operand):
out += (x,)
else:
out += (SimpleProxy(x),)
return out[0] if len(out) == 1 else out
def jit(func=None, *, out=None, disable=False, **kwargs): # noqa: C901
"""
Prepare a function so that it can be used with the Blosc2 compute engine.
The inputs of the function can be any combination of NumPy/NDArray arrays
and scalars. The function will be called with the NumPy arrays replaced by
:ref:`SimpleProxy` objects, whereas NDArray objects will be used as is.
The returned value will be a NDArray if appropriate kwargs are provided
(e.g. `cparams=`). Else, the return value will be a NumPy array
(if the function returns a NumPy array). If `out` is provided,
the result will be computed and stored in the `out` array
Parameters
----------
func: callable
The function to be prepared for the Blosc2 compute engine.
out: np.ndarray, NDArray, optional
The output array where the result will be stored.
disable: bool, optional
If True, the decorator is disabled and the original function is returned unchanged.
Default is False.
**kwargs: dict, optional
Additional keyword arguments supported by the :func:`empty` constructor.
Returns
-------
wrapper
Notes
-----
* Although many NumPy functions are supported, some may not be implemented yet.
If you find a function that is not supported, please open an issue.
* `out` and `kwargs` parameters are not supported for all expressions
(e.g. when using a reduction as the last function). In this case, you can
still use the `out` parameter of the reduction function for some custom
control over the output.
Examples
--------
>>> import numpy as np
>>> import blosc2
>>> @blosc2.jit
>>> def compute_expression(a, b, c):
>>> return np.sum(((a ** 3 + np.sin(a * 2)) > 2 * c) & (b > 0), axis=1)
>>> a = np.arange(20, dtype=np.float32).reshape(4, 5)
>>> b = np.arange(20).reshape(4, 5)
>>> c = np.arange(5)
>>> compute_expression(a, b, c)
[5 5 5 5]
"""
def decorator(func):
if disable:
return func
def wrapper(*args, **func_kwargs):
# Get some kwargs in decorator for SimpleProxy constructor
proxy_kwargs = {"chunks": kwargs.get("chunks"), "blocks": kwargs.get("blocks")}
# Wrap the arguments in SimpleProxy objects if they are not NDArrays
new_args = []
for arg in args:
if issubclass(type(arg), blosc2.Operand):
new_args.append(arg)
else:
new_args.append(SimpleProxy(arg, **proxy_kwargs))
# The same for the keyword arguments
for key, value in func_kwargs.items():
if issubclass(type(value), blosc2.Operand):
continue
func_kwargs[key] = SimpleProxy(value, **proxy_kwargs)
# Call function with the new arguments
retval = func(*new_args, **func_kwargs)
# Treat return value
# If it is a numpy array, return it as is
if isinstance(retval, np.ndarray):
if kwargs and any(kwargs[key] is not None for key in kwargs):
# But if kwargs are provided, return a NDArray instead
return blosc2.asarray(retval, **kwargs)
return retval
# In some instances, the return value is not a LazyExpr
# (e.g. using a reduction as the last function, and using an `out` param)
if not isinstance(retval, blosc2.LazyExpr):
return retval
# If the return value is a LazyExpr, compute it
if out is not None:
return retval.compute(out=out, **kwargs)
if kwargs and any(kwargs[key] is not None for key in kwargs):
return retval.compute(**kwargs)
# If no kwargs are provided, return a numpy array
return retval[()]
return wrapper
if func is None:
return decorator
else:
return decorator(func)
class PandasUdfEngine:
@staticmethod
def _ensure_numpy_data(data):
if not isinstance(data, np.ndarray):
try:
data = data.values
except AttributeError as err:
raise ValueError(
"blosc2.jit received an object of type {data.__name__}, which is not supported. "
"Try casting your Series or DataFrame to a NumPy dtype."
) from err
return data
@classmethod
def map(cls, data, func, args, kwargs, decorator, skip_na):
"""
JIT a NumPy array element-wise. In the case of Blosc2, functions are
expected to be vectorized NumPy operations, so the function is called
with the NumPy array as the function parameter, instead of calling the
function once for each element.
"""
raise NotImplementedError("The Blosc2 engine does not support map. Use apply instead.")
@classmethod
def apply(cls, data, func, args, kwargs, decorator, axis):
"""
JIT a NumPy array by column or row. In the case of Blosc2, functions are
expected to be vectorized NumPy operations, so the function is called
with the NumPy array as the function parameter, instead of calling the
function once for each column or row.
"""
data = cls._ensure_numpy_data(data)
func = decorator(func)
if data.ndim == 1 or axis is None:
# pandas Series.apply or pipe
return func(data, *args, **kwargs)
elif axis in (0, "index"):
# pandas apply(axis=0) column-wise
result = [func(data[:, row_idx], *args, **kwargs) for row_idx in range(data.shape[1])]
return np.vstack(result).transpose()
elif axis in (1, "columns"):
# pandas apply(axis=1) row-wise
result = [func(data[col_idx, :], *args, **kwargs) for col_idx in range(data.shape[0])]
return np.vstack(result)
else:
raise NotImplementedError(f"Unknown axis '{axis}'. Use one of 0, 1 or None.")
jit.__pandas_udf__ = PandasUdfEngine

File diff suppressed because it is too large Load Diff

@ -0,0 +1,255 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
import contextlib
import warnings
from dataclasses import asdict, dataclass, field, fields
import blosc2
def default_nthreads():
return blosc2.nthreads
def default_filters():
return [
blosc2.Filter.NOFILTER,
blosc2.Filter.NOFILTER,
blosc2.Filter.NOFILTER,
blosc2.Filter.NOFILTER,
blosc2.Filter.NOFILTER,
blosc2.Filter.SHUFFLE,
]
def default_filters_meta():
return [0] * 6
@dataclass
class CParams:
"""Dataclass for hosting the different compression parameters.
Parameters
----------
codec: :class:`Codec` or int
The compressor code. Default is :py:obj:`Codec.ZSTD <Codec>`.
codec_meta: int
The metadata for the compressor code. Default is 0.
clevel: int
The compression level from 0 (no compression) to 9
(maximum compression). Default is 1.
use_dict: bool
Whether to use dictionaries when compressing
(only for :py:obj:`blosc2.Codec.ZSTD <Codec>`). Default is `False`.
typesize: int
The data type size, ranging from 1 to 255. Default is 8.
nthreads: int
The number of threads to use internally. By default, the
value of :py:obj:`blosc2.nthreads` is used. If not set with
:func:`blosc2.set_nthreads`, blosc2 computes a good guess for it.
blocksize: int
The requested size of the compressed blocks. If set to 0 (the default)
blosc2 will choose the size automatically.
splitmode: :class:`SplitMode`
The split mode for the blocks.
The default value is :py:obj:`SplitMode.AUTO_SPLIT <SplitMode>`.
filters: :class:`Filter` or int list or None
The sequence of filters. Default: [:py:obj:`Filter.NOFILTER <Filter>`,
:py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.NOFILTER <Filter>`,
:py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.SHUFFLE <Filter>`].
filters_meta: list
The metadata for filters. Default: `[0, 0, 0, 0, 0, 0]`.
tuner: :class:`Tuner`
The tuner to use. Default: :py:obj:`Tuner.STUNE <Tuner>`.
"""
codec: blosc2.Codec | int = blosc2.Codec.ZSTD
codec_meta: int = 0
clevel: int = 5
use_dict: bool = False
typesize: int = 8
nthreads: int = field(default_factory=default_nthreads)
blocksize: int = 0
splitmode: blosc2.SplitMode = blosc2.SplitMode.AUTO_SPLIT
filters: list[blosc2.Filter | int] = field(default_factory=default_filters)
filters_meta: list[int] = field(default_factory=default_filters_meta)
tuner: blosc2.Tuner = blosc2.Tuner.STUNE
def __post_init__(self):
# C2Array sends metadata (like codec, filters, splitmode and tuner) as ints
if not isinstance(self.codec, blosc2.Codec):
with contextlib.suppress(ValueError):
# User-defined codecs may have no entries in Codec
self.codec = blosc2.Codec(self.codec)
if not isinstance(self.splitmode, blosc2.SplitMode):
with contextlib.suppress(ValueError):
self.splitmode = blosc2.SplitMode(self.splitmode)
if not isinstance(self.tuner, blosc2.Tuner):
with contextlib.suppress(ValueError):
self.tuner = blosc2.Tuner(self.tuner)
if len(self.filters) > 6:
raise ValueError("Number of filters exceeds 6")
if len(self.filters) < len(self.filters_meta):
self.filters_meta = self.filters_meta[: len(self.filters)]
# There is no need to raise a warning here
# warnings.warn("Changed `filters_meta` length to match `filters` length")
if len(self.filters) > len(self.filters_meta):
raise ValueError("Number of filters cannot exceed number of filters meta")
for i, filter_i in enumerate(self.filters):
if not isinstance(filter_i, blosc2.Filter):
with contextlib.suppress(ValueError):
# User-defined filters may have no entries in Filter
self.filters[i] = blosc2.Filter(filter_i)
if self.filters_meta[i] == 0 and self.filters[i] == blosc2.Filter.BYTEDELTA:
self.filters_meta[i] = self.typesize
@dataclass
class DParams:
"""Dataclass for hosting the different decompression parameters.
Parameters
----------
nthreads: int
The number of threads to use internally. By default, the
value of :py:obj:`blosc2.nthreads` is used. If not set with
:func:`blosc2.set_nthreads`, blosc2 computes a good guess for it.
"""
nthreads: int = field(default_factory=default_nthreads)
@dataclass
class Storage:
"""Dataclass for hosting the different storage parameters.
Parameters
----------
contiguous: bool
Indicates whether the chunks are stored contiguously.
Default is True when :paramref:`urlpath` is not None;
False otherwise.
urlpath: str or pathlib.Path, optional
If the storage is persistent, the name of the file (when
`contiguous = True`) or the directory (if `contiguous = False`).
If the storage is in-memory, then this field is `None`.
mode: str, optional
Persistence mode: 'r' means read only (must exist);
'a' means read/write (create if it doesn't exist);
'w' means create (overwrite if it exists). Default is 'a'.
mmap_mode: str, optional
If set, the file will be memory-mapped instead of using the default
I/O functions and the `mode` argument will be ignored. The memory-mapping
modes are similar to those used by the
`numpy.memmap <https://numpy.org/doc/stable/reference/generated/numpy.memmap.html>`_
function, but it is possible to extend the file:
.. list-table::
:widths: 10 90
:header-rows: 1
* - mode
- description
* - 'r'
- Open an existing file for reading only.
* - 'r+'
- Open an existing file for reading and writing. Use this mode if you want
to append data to an existing schunk file.
* - 'w+'
- Create or overwrite an existing file for reading and writing. Use this
mode if you want to create a new schunk.
* - 'c'
- Open an existing file in copy-on-write mode: all changes affect the data
in memory but changes are not saved to disk. The file on disk is
read-only. On Windows, the size of the mapping cannot change.
Only contiguous storage can be memory-mapped. Hence, `urlpath` must point to a
file (and not a directory).
.. note::
Memory-mapped files are opened once, and their contents remain in (virtual)
memory for the lifetime of the schunk. Using memory-mapped I/O can be faster
than the default I/O functions, depending on the use case. While
reading performance is generally better, writing performance may be
slower in some cases on certain systems. Memory-mapped files
can be especially beneficial when operating with network file systems
(like NFS).
This is currently a beta feature (especially for write operations) and we
recommend trying it out and reporting any issues you may encounter.
initial_mapping_size: int, optional
The initial size of the mapping for the memory-mapped file when writes are
allowed (r+ w+, or c mode). Once a file is memory-mapped and extended beyond the
initial mapping size, the file must be remapped, which may be expensive. This
parameter allows decoupling the mapping size from the actual file size to
reserve memory early for future writes and avoid remappings. The memory is only
reserved virtually and does not occupy physical memory unless actual writes
occur. Since the virtual address space is large enough, it is ok to be generous
with this parameter (with special consideration on Windows, see note below).
For best performance, set this to the maximum expected size of the compressed
data (see example in :obj:`SChunk.__init__ <blosc2.schunk.SChunk.__init__>`).
The size is in bytes.
Default: 1 GiB.
.. note::
On Windows, the size of the mapping is directly coupled to the file size.
When the schunk is destroyed, the file size will be truncated to the
actual size of the schunk.
meta: dict or None
A dictionary with different metalayers. Each entry represents a metalayer:
key: bytes or str
The name of the metalayer.
value: object
The metalayer object that will be serialized using msgpack.
"""
contiguous: bool = None
urlpath: str = None
mode: str = "a"
mmap_mode: str = None
initial_mapping_size: int = None
meta: dict = None
def __post_init__(self):
if self.contiguous is None:
self.contiguous = self.urlpath is not None
# Check for None values
for f in fields(self):
if getattr(self, f.name) is None and f.name not in [
"urlpath",
"mmap_mode",
"initial_mapping_size",
"meta",
]:
setattr(self, f.name, getattr(Storage(), f.name))
warnings.warn(f"`{f.name}` field value changed from `None` to `{getattr(self, f.name)}`")
# Defaults for compression params
cparams_dflts = asdict(CParams())
"""
Compression params defaults.
"""
# Defaults for decompression params
dparams_dflts = asdict(DParams())
"""
Decompression params defaults.
"""
# Default for storage
storage_dflts = asdict(Storage())
"""
Storage params defaults. This is meant only for :ref:`SChunk <SChunk>` or :ref:`NDArray <NDArray>`.
"""

@ -0,0 +1,700 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################
import contextlib
import os
from collections.abc import Iterator, MutableMapping
from typing import TYPE_CHECKING
import numpy as np
import blosc2
from blosc2.dict_store import DictStore
from blosc2.schunk import SChunk
if TYPE_CHECKING:
from blosc2.c2array import C2Array
from blosc2.ndarray import NDArray
class vlmetaProxy(MutableMapping):
"""Proxy for SChunk.vlmeta to control access and slicing.
- Ensures `vlmeta[:]` returns a dict of {name: value} using decoded values.
- Enforces TreeStore read-only mode for set/del operations.
- Delegates iteration and length to the underlying vlmeta object.
"""
def __init__(self, tstore: "TreeStore", inner_vlmeta):
self._tstore = tstore
self._inner = inner_vlmeta
def __setitem__(self, key, value):
if self._tstore.mode == "r":
raise ValueError("TreeStore is in read-only mode")
# Ensure the vlmeta SChunk is persisted before any write operation.
# This handles the case where vlmeta is being created lazily.
# Use DictStore's methods directly to bypass TreeStore's vlmeta filtering
if not DictStore.__contains__(self._tstore, self._tstore._vlmeta_key):
DictStore.__setitem__(self._tstore, self._tstore._vlmeta_key, self._tstore._vlmeta)
# Support bulk set via [:]
if isinstance(key, slice):
if key.start is None and key.stop is None:
# Merge/update existing values instead of replacing
for k, v in value.items():
self._inner[k] = v
# Persist once after bulk update
self._tstore._persist_vlmeta()
return
raise NotImplementedError("Slicing is not supported, unless [:]")
self._inner[key] = value
# Persist changes in the embed store snapshot
self._tstore._persist_vlmeta()
def __getitem__(self, key):
# Support bulk get via [:]
if isinstance(key, slice):
if key.start is None and key.stop is None:
# Build a Python dict to ensure keys are str and values decoded
return {name: self._inner[name] for name in self._inner}
raise NotImplementedError("Slicing is not supported, unless [:]")
return self._inner[key]
def __delitem__(self, key):
if self._tstore.mode == "r":
raise ValueError("TreeStore is in read-only mode")
self._inner.__delitem__(key)
# Persist changes in the embed store snapshot
self._tstore._persist_vlmeta()
def __iter__(self):
return iter(self._inner)
def __len__(self):
return len(self._inner)
class TreeStore(DictStore):
"""
A hierarchical tree-based storage container for Blosc2 data.
Extends :class:`blosc2.DictStore` with strict hierarchical key validation
and tree traversal capabilities. Keys must follow a hierarchical structure
using '/' as separator and always start with '/'. If user passes a key
that doesn't start with '/', it will be automatically added.
It supports the same arguments as :class:`blosc2.DictStore`.
Parameters
----------
localpath : str
Local path for the directory (`.b2d`) or file (`.b2z`); other extensions
are not supported. If a directory is specified, it will be treated as
a Blosc2 directory format (B2DIR). If a file is specified, it
will be treated as a Blosc2 zip format (B2ZIP).
mode : str, optional
File mode ('r', 'w', 'a'). Default is 'a'.
tmpdir : str or None, optional
Temporary directory to use when working with `.b2z` files. If None,
a system temporary directory will be managed. Default is None.
cparams : dict or None, optional
Compression parameters for the internal embed store.
If None, the default Blosc2 parameters are used.
dparams : dict or None, optional
Decompression parameters for the internal embed store.
If None, the default Blosc2 parameters are used.
storage : blosc2.Storage or None, optional
Storage properties for the internal embed store.
If None, the default Blosc2 storage properties are used.
threshold : int, optional
Threshold for the array size (bytes) to be kept in the embed store.
If the *compressed* array size is below this threshold, it will be
stored in the embed store instead of as a separate file. If None,
in-memory arrays are stored in the embed store and on-disk arrays
are stored as separate files.
C2Array objects will always be stored in the embed store,
regardless of their size.
Examples
--------
>>> tstore = TreeStore(localpath="my_tstore.b2z", mode="w")
>>> # Create a hierarchy. Data is stored in leaf nodes.
>>> # Structural nodes like /child0 and /child0/child1 are created automatically.
>>> tstore["/child0/leaf1"] = np.array([1, 2, 3])
>>> tstore["/child0/child1/leaf2"] = np.array([4, 5, 6])
>>> tstore["/child0/child2"] = np.array([7, 8, 9])
>>>
>>> # Walk the tree structure
>>> for path, children, nodes in tstore.walk("/child0"):
... print(f"Path: {path}, Children: {sorted(children)}, Nodes: {sorted(nodes)}")
Path: /child0, Children: ['/child0/child1'], Nodes: ['/child0/child2', '/child0/leaf1']
Path: /child0/child1, Children: [], Nodes: ['/child0/child1/leaf2']
>>>
>>> # Get a subtree view
>>> subtree = tstore.get_subtree("/child0")
>>> sorted(list(subtree.keys()))
['/child1/leaf2', '/child2', '/leaf1']
"""
# For some reason, we had to revert the explicit parametrisation of the
# constructor to make benchmarks working again.
def __init__(self, *args, _from_parent_store=None, **kwargs):
"""Initialize TreeStore with subtree support.
It supports the same arguments as :class:`blosc2.DictStore`.
"""
if _from_parent_store is not None:
# This is a subtree view, copy state from parent
self.__dict__.update(_from_parent_store.__dict__)
else:
# Call initialization and mark this storage as a b2tree object
super().__init__(*args, **kwargs, _storage_meta={"b2tree": {"version": 1}})
self.subtree_path = "" # Empty string means full tree
def _is_vlmeta_key(self, key: str) -> bool:
"""Check if a key is a vlmeta key that should be hidden from regular access."""
return key.endswith("/__vlmeta__")
def _translate_key_to_full(self, key: str) -> str:
"""Translate subtree-relative key to full tree key."""
if not self.subtree_path:
return key
if key == "/":
return self.subtree_path
else:
return self.subtree_path + key
def _translate_key_from_full(self, full_key: str) -> str | None:
"""Translate full tree key to subtree-relative key."""
if not self.subtree_path:
return full_key
if full_key == self.subtree_path:
return "/"
elif full_key.startswith(self.subtree_path + "/"):
return full_key[len(self.subtree_path) :]
else:
# Key is not within this subtree
return None
def _validate_key(self, key: str) -> str:
"""Validate and normalize hierarchical key structure.
Parameters
----------
key : str
The key to validate and normalize.
Returns
-------
normalized_key : str
The normalized key with leading '/' added if missing.
Raises
------
ValueError
If key doesn't follow hierarchical rules.
"""
if not isinstance(key, str):
raise ValueError(f"Key must be a string, got {type(key)}")
# Auto-add leading '/' if missing
if not key.startswith("/"):
key = "/" + key
if key != "/" and key.endswith("/"):
raise ValueError(f"Key cannot end with '/' (except for root), got: {key}")
if "//" in key:
raise ValueError(f"Key cannot contain empty path segments '//', got: {key}")
# Additional validation for special characters that might cause issues
invalid_chars = ["\0", "\n", "\r", "\t"]
for char in invalid_chars:
if char in key:
raise ValueError(f"Key cannot contain invalid character {char!r}, got: {key}")
return key
def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None:
"""Add a node with hierarchical key validation.
Parameters
----------
key : str
Hierarchical node key.
value : np.ndarray or blosc2.NDArray or blosc2.C2Array or blosc2.SChunk
to store.
Raises
------
ValueError
If key doesn't follow hierarchical structure rules, if trying to
assign to a structural path that already has children, or if trying
to add a child to a path that already contains data.
"""
key = self._validate_key(key)
# Check if this key already has children (is a structural subtree)
children = self.get_children(key)
if children:
raise ValueError(
f"Cannot assign array to structural path '{key}' that already has children: {children}"
)
# Check if we're trying to add a child to a path that already has data
# Extract parent path from the key
if key != "/":
parent_path = "/".join(key.split("/")[:-1])
if not parent_path: # Handle case where parent is root
parent_path = "/"
full_parent_key = self._translate_key_to_full(parent_path)
if super().__contains__(full_parent_key):
raise ValueError(
f"Cannot add child '{key}' to path '{parent_path}' that already contains data"
)
full_key = self._translate_key_to_full(key)
super().__setitem__(full_key, value)
def __getitem__(self, key: str) -> "NDArray | C2Array | SChunk | TreeStore":
"""Retrieve a node or subtree view.
If the key points to a subtree (intermediate path with children),
returns a TreeStore view of that subtree. If the key points to
a final node (leaf), returns the stored array or schunk.
Parameters
----------
key : str
Hierarchical node key.
Returns
-------
out : blosc2.NDArray or blosc2.C2Array or blosc2.SChunk or TreeStore
The stored array/chunk if key is a leaf node, or a TreeStore subtree view
if key is an intermediate path with children.
Raises
------
KeyError
If key is not found.
ValueError
If key doesn't follow hierarchical structure rules.
"""
key = self._validate_key(key)
if self._is_vlmeta_key(key):
raise KeyError(f"Key '{key}' not found; vlmeta keys are not directly accessible.")
full_key = self._translate_key_to_full(key)
# Check if this key has children (is a subtree)
children = self.get_children(key)
# Check if the key exists as an actual data node
key_exists_as_data = super().__contains__(full_key)
if children:
# If it has children, return a subtree view
return self.get_subtree(key)
elif key_exists_as_data:
# If no children but exists as data, it's a leaf node - get the actual data
return super().__getitem__(full_key)
else:
# Key doesn't exist at all
raise KeyError(f"Key '{key}' not found")
def __delitem__(self, key: str) -> None:
"""Remove a node or subtree.
If the key points to a subtree (intermediate path with children),
removes all nodes in that subtree recursively. If the key points to a final
node (leaf), removes only that node.
Parameters
----------
key : str
Hierarchical node key.
Raises
------
KeyError
If key is not found.
ValueError
If key doesn't follow hierarchical structure rules.
"""
key = self._validate_key(key)
if self._is_vlmeta_key(key):
raise KeyError(f"Key '{key}' not found; vlmeta keys are not directly accessible.")
# Check if the key exists (either as data or as a structural node with descendants)
full_key = self._translate_key_to_full(key)
key_exists_as_data = super().__contains__(full_key)
descendants = self.get_descendants(key)
if not key_exists_as_data and not descendants:
raise KeyError(f"Key '{key}' not found")
# Collect all keys to delete (leaf nodes only, since structural nodes don't exist as data)
keys_to_delete = []
# If the key itself has data, include it
if key_exists_as_data:
keys_to_delete.append(key)
# Add all descendant leaf nodes (only those that actually exist as data)
for descendant in descendants:
full_descendant_key = self._translate_key_to_full(descendant)
if super().__contains__(full_descendant_key):
keys_to_delete.append(descendant)
# Delete all data keys in the subtree
for k in keys_to_delete:
full_key_to_delete = self._translate_key_to_full(k)
super().__delitem__(full_key_to_delete)
def __contains__(self, key: str) -> bool:
"""Check if a key exists.
Parameters
----------
key : str
Hierarchical node key.
Returns
-------
exists : bool
True if key exists, False otherwise.
"""
try:
key = self._validate_key(key)
if self._is_vlmeta_key(key):
return False
full_key = self._translate_key_to_full(key)
return super().__contains__(full_key)
except ValueError:
return False
def keys(self):
"""Return all keys in the current subtree view."""
if not self.subtree_path:
all_keys = set(super().keys())
else:
all_keys = set()
for full_key in super().keys(): # noqa: SIM118
relative_key = self._translate_key_from_full(full_key)
if relative_key is not None:
all_keys.add(relative_key)
# Filter out vlmeta keys
all_keys = {key for key in all_keys if not self._is_vlmeta_key(key)}
# Also include structural paths (intermediate nodes that have children but no data)
structural_keys = set()
for key in all_keys:
# For each leaf key, add all its parent paths
parts = key.split("/")[1:] # Remove empty first element from split
current_path = ""
for part in parts[:-1]: # Exclude the leaf itself
current_path = current_path + "/" + part if current_path else "/" + part
if current_path and current_path != "/" and current_path not in all_keys:
structural_keys.add(current_path)
return all_keys | structural_keys
def __iter__(self) -> Iterator[str]:
"""Iterate over keys, excluding vlmeta keys."""
return iter(self.keys())
def items(self) -> Iterator[tuple[str, "NDArray | C2Array | SChunk | TreeStore"]]:
"""Return key-value pairs in the current subtree view."""
for key in self.keys():
yield key, self[key]
def get_children(self, path: str) -> list[str]:
"""Get direct children of a given path.
Parameters
----------
path : str
The parent path to get children for.
Returns
-------
children : list[str]
List of direct child paths.
"""
path = self._validate_key(path)
if path == "/":
prefix = "/"
else:
prefix = path + "/"
prefix_len = len(prefix)
children_names = set()
for key in self.keys():
if self._is_vlmeta_key(key):
continue # Should be already filtered by self.keys(), but for safety
if key.startswith(prefix):
# e.g. key = /hierarchy/level1/data, prefix = /hierarchy/
# rest = level1/data
rest = key[prefix_len:]
# child_name = level1
child_name = rest.split("/")[0]
children_names.add(child_name)
if path == "/":
return sorted(["/" + name for name in children_names])
else:
return sorted([path + "/" + name for name in children_names])
def get_descendants(self, path: str) -> list[str]:
"""Get all descendants of a given path.
Parameters
----------
path : str
The parent path to get descendants for.
Returns
-------
descendants : list[str]
List of all descendant paths.
"""
path = self._validate_key(path)
if path == "/":
prefix = "/"
else:
prefix = path + "/"
descendants = set()
# Get all leaf nodes under this path
for key in self.keys():
if self._is_vlmeta_key(key):
continue # Should be already filtered by self.keys(), but for safety
if key.startswith(prefix) and key != path:
descendants.add(key)
return sorted(descendants)
def walk(self, path: str = "/", topdown: bool = True) -> Iterator[tuple[str, list[str], list[str]]]:
"""Walk the tree structure.
Similar to os.walk(), this visits all structural nodes in the hierarchy,
yielding information about each level. Returns relative names, not full paths.
Parameters
----------
path : str, optional
The root path to start walking from. Default is "/".
topdown : bool, optional
If True (default), traverse top-down (yield parent before children).
If False, traverse bottom-up (yield children before parent), mimicking os.walk(topdown=False).
Yields
------
path : str
Current path being walked.
children : list[str]
List of child directory names (structural nodes that have descendants).
These are just the names, not full paths.
nodes : list[str]
List of leaf node names (nodes that contain data).
These are just the names, not full paths.
Examples
--------
>>> for path, children, nodes in tstore.walk("/child0", topdown=True):
... print(f"Path: {path}, Children: {children}, Nodes: {nodes}")
"""
path = self._validate_key(path)
# Get all direct children of this path
direct_children = self.get_children(path)
# Separate children into directories (have descendants) and leaf nodes
children_dirs = []
leaf_nodes = []
for child in direct_children:
child_descendants = self.get_descendants(child)
if child_descendants:
# Extract just the name from the full path
child_name = child.split("/")[-1]
children_dirs.append(child_name)
else:
# Extract just the name from the full path
child_name = child.split("/")[-1]
leaf_nodes.append(child_name)
# Validate and normalize names to ensure robustness
# 1) Enforce that returned names are simple (no '/')
children_dirs = [
name for name in children_dirs if isinstance(name, str) and "/" not in name and name != ""
]
leaf_nodes = [
name for name in leaf_nodes if isinstance(name, str) and "/" not in name and name != ""
]
# 2) Ensure leaf nodes correspond to actual data nodes in the underlying store
valid_leaf_nodes: list[str] = []
for name in leaf_nodes:
# Compose subtree-relative child path
child_rel_path = path + "/" + name if path != "/" else "/" + name
# Translate to full key in the backing store and verify it's a data node
full_key = self._translate_key_to_full(child_rel_path)
if super().__contains__(full_key):
valid_leaf_nodes.append(name)
leaf_nodes = valid_leaf_nodes
if topdown:
# Yield current level first (pre-order)
yield path, children_dirs, leaf_nodes
# Recursively walk child directories (structural nodes)
for child in direct_children:
child_descendants = self.get_descendants(child)
if child_descendants:
yield from self.walk(child, topdown=topdown)
if not topdown:
# Yield current level after children (post-order)
yield path, children_dirs, leaf_nodes
def get_subtree(self, path: str) -> "TreeStore":
"""Create a subtree view with the specified path as root.
Parameters
----------
path : str
The path that will become the root of the subtree view (relative to current subtree,
will be normalized to start with '/' if missing).
Returns
-------
subtree : TreeStore
A new TreeStore instance that presents the subtree as if `path` were the root.
Examples
--------
>>> tstore["/child0/child1/data"] = np.array([1, 2, 3])
>>> tstore["/child0/child1/grandchild"] = np.array([4, 5, 6])
>>> subtree = tstore.get_subtree("/child0/child1")
>>> list(subtree.keys())
['/data', '/grandchild']
>>> subtree["/grandchild"][:]
array([4, 5, 6])
Notes
-----
This is equivalent to `tstore[path]` when path is a structural path.
"""
path = self._validate_key(path)
full_path = self._translate_key_to_full(path)
# Create a new TreeStore instance that shares the same underlying storage
# but with a different subtree_path
subtree = TreeStore(_from_parent_store=self)
subtree.subtree_path = full_path
return subtree
@property
def vlmeta(self) -> MutableMapping:
"""Access variable-length metadata for the TreeStore or current subtree.
Returns a proxy to the vlmeta attribute of an internal SChunk stored at
'/__vlmeta__' for the root tree, or '<subtree_path>/__vlmeta__' for subtrees.
The SChunk is created on-demand if it doesn't exist.
Notes
-----
The metadata is stored as vlmeta of an internal SChunk, ensuring robust
serialization and persistence. This mirrors SChunk.vlmeta behavior, with
additional guarantees:
- Bulk get via `[:]` always returns a dict with string keys and decoded values.
- Read-only protection is enforced at the TreeStore level.
- Each subtree has its own independent vlmeta storage.
"""
# Create vlmeta key based on subtree_path
if not self.subtree_path:
# Root tree uses global vlmeta
vlmeta_key = "/__vlmeta__"
else:
# Subtree uses path-specific vlmeta: <subtree_path>/__vlmeta__
vlmeta_key = f"{self.subtree_path}/__vlmeta__"
# Use super().__contains__ to bypass our own filtering logic
if super().__contains__(vlmeta_key):
# Load the current snapshot from the store to ensure freshness
self._vlmeta = super().__getitem__(vlmeta_key)
else:
# Create a new, empty SChunk in memory. It will be persisted on first write.
self._vlmeta = blosc2.SChunk()
# Store the key for _persist_vlmeta method
self._vlmeta_key = vlmeta_key
# Return a fresh proxy that wraps the latest inner vlmeta
return vlmetaProxy(self, self._vlmeta.vlmeta)
def _persist_vlmeta(self) -> None:
"""Persist current vlmeta SChunk into the store.
This is needed because the EmbedStore keeps a serialized snapshot of
stored objects; mutating the in-memory SChunk does not automatically
update the snapshot. We emulate an update by deleting and re-adding
the object in the embed store.
"""
if hasattr(self, "_vlmeta_key"):
vlmeta_key = self._vlmeta_key
# Only embedded case is expected; handle it safely.
if hasattr(self, "_estore") and vlmeta_key in self._estore:
# Replace the stored snapshot
with contextlib.suppress(KeyError):
del self._estore[vlmeta_key]
self._estore[vlmeta_key] = self._vlmeta
if __name__ == "__main__":
# Example usage
localpath = "example_tstore.b2z"
with TreeStore(localpath, mode="w") as tstore:
# Create a hierarchical structure.
# Note: data is stored in leaf nodes, not structural nodes.
tstore["/child0/data_node"] = np.array([1, 2, 3])
tstore["/child0/child1/data_node"] = np.array([4, 5, 6])
tstore["/child0/child2"] = np.array([7, 8, 9])
tstore["/child0/child1/grandchild"] = np.array([10, 11, 12])
tstore["/other"] = np.array([13, 14, 15])
print("TreeStore keys:", sorted(tstore.keys()))
# Test subtree view
root_subtree = tstore["/child0"]
root_subtree.vlmeta["foo"] = "bar"
print("Subtree keys:", sorted(root_subtree.keys()))
print("Subtree vlmeta:", root_subtree.vlmeta)
# Walk the tree
for path, children, nodes in root_subtree.walk("/"):
print(f"Path: {path}, Children: {children}, Nodes: {nodes}")
# Clean up
if os.path.exists(localpath):
os.remove(localpath)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,2 @@
__version__ = "4.1.2"
__array_api_version__ = "2024.12"

@ -0,0 +1,5 @@
import sys
from cpuinfo.cpuinfo import *

@ -0,0 +1,5 @@
import cpuinfo
cpuinfo.main()

File diff suppressed because it is too large Load Diff

@ -0,0 +1,132 @@
#ifndef LIBTCC_H
#define LIBTCC_H
#ifndef LIBTCCAPI
# define LIBTCCAPI
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*****************************/
/* set custom allocator for all allocations (optional), NULL for default. */
typedef void *TCCReallocFunc(void *ptr, unsigned long size);
LIBTCCAPI void tcc_set_realloc(TCCReallocFunc *my_realloc);
/*****************************/
typedef struct TCCState TCCState;
/* create a new TCC compilation context */
LIBTCCAPI TCCState *tcc_new(void);
/* free a TCC compilation context */
LIBTCCAPI void tcc_delete(TCCState *s);
/* set CONFIG_TCCDIR at runtime */
LIBTCCAPI void tcc_set_lib_path(TCCState *s, const char *path);
/* set error/warning callback (optional) */
typedef void TCCErrorFunc(void *opaque, const char *msg);
LIBTCCAPI void tcc_set_error_func(TCCState *s, void *error_opaque, TCCErrorFunc *error_func);
/* set options as from command line (multiple supported) */
LIBTCCAPI int tcc_set_options(TCCState *s, const char *str);
/*****************************/
/* preprocessor */
/* add include path */
LIBTCCAPI int tcc_add_include_path(TCCState *s, const char *pathname);
/* add in system include path */
LIBTCCAPI int tcc_add_sysinclude_path(TCCState *s, const char *pathname);
/* define preprocessor symbol 'sym'. value can be NULL, sym can be "sym=val" */
LIBTCCAPI void tcc_define_symbol(TCCState *s, const char *sym, const char *value);
/* undefine preprocess symbol 'sym' */
LIBTCCAPI void tcc_undefine_symbol(TCCState *s, const char *sym);
/*****************************/
/* compiling */
/* add a file (C file, dll, object, library, ld script). Return -1 if error. */
LIBTCCAPI int tcc_add_file(TCCState *s, const char *filename);
/* compile a string containing a C source. Return -1 if error. */
LIBTCCAPI int tcc_compile_string(TCCState *s, const char *buf);
/* Tip: to have more specific errors/warnings from tcc_compile_string(),
you can prefix the string with "#line <num> \"<filename>\"\n" */
/*****************************/
/* linking commands */
/* set output type. MUST BE CALLED before any compilation */
LIBTCCAPI int tcc_set_output_type(TCCState *s, int output_type);
#define TCC_OUTPUT_MEMORY 1 /* output will be run in memory */
#define TCC_OUTPUT_EXE 2 /* executable file */
#define TCC_OUTPUT_DLL 4 /* dynamic library */
#define TCC_OUTPUT_OBJ 3 /* object file */
#define TCC_OUTPUT_PREPROCESS 5 /* only preprocess */
/* equivalent to -Lpath option */
LIBTCCAPI int tcc_add_library_path(TCCState *s, const char *pathname);
/* the library name is the same as the argument of the '-l' option */
LIBTCCAPI int tcc_add_library(TCCState *s, const char *libraryname);
/* add a symbol to the compiled program */
LIBTCCAPI int tcc_add_symbol(TCCState *s, const char *name, const void *val);
/* output an executable, library or object file. DO NOT call
tcc_relocate() before. */
LIBTCCAPI int tcc_output_file(TCCState *s, const char *filename);
/* link and run main() function and return its value. DO NOT call
tcc_relocate() before. */
LIBTCCAPI int tcc_run(TCCState *s, int argc, char **argv);
/* do all relocations (needed before using tcc_get_symbol()) */
LIBTCCAPI int tcc_relocate(TCCState *s1);
/* return symbol value or NULL if not found */
LIBTCCAPI void *tcc_get_symbol(TCCState *s, const char *name);
/* list all (global) symbols and their values via 'symbol_cb()' */
LIBTCCAPI void tcc_list_symbols(TCCState *s, void *ctx,
void (*symbol_cb)(void *ctx, const char *name, const void *val));
/* experimental/advanced section (see libtcc_test_mt.c for an example) */
/* catch runtime exceptions (optionally limit backtraces at top_func),
when using tcc_set_options("-bt") and when not using tcc_run() */
LIBTCCAPI void *_tcc_setjmp(TCCState *s1, void *jmp_buf, void *top_func, void *longjmp);
#define tcc_setjmp(s1,jb,f) setjmp(_tcc_setjmp(s1, jb, f, longjmp))
/* debugging */
/* For debugging to work you have to enable it with tcc_set_options */
/* compile a string containing a C source. Return -1 if error.
Write the string to file filename if debug is set. */
LIBTCCAPI int tcc_compile_string_file(TCCState *s, const char *buf, const char *filename);
/* Output object file. This must be done after tcc_relocate.
It only generates the file if debug is set.
The filename can be loaded with gdb command add-symbol-file */
LIBTCCAPI int elf_output_obj(TCCState *s1, const char *filename);
/* Set base address for wasm32 data/stack layout (default 1024).
Call before tcc_output_file(). Only meaningful for TCC_TARGET_WASM32. */
LIBTCCAPI void tcc_set_wasm_data_base(TCCState *s, unsigned int base);
/* custom error printer for runtime exceptions. Returning 0 stops backtrace */
typedef int TCCBtFunc(void *udata, void *pc, const char *file, int line, const char* func, const char *msg);
LIBTCCAPI void tcc_set_backtrace_func(TCCState *s1, void* userdata, TCCBtFunc*);
#ifdef __cplusplus
}
#endif
#endif

@ -0,0 +1,265 @@
Metadata-Version: 2.4
Name: msgpack
Version: 1.1.2
Summary: MessagePack serializer
Author-email: Inada Naoki <songofacandy@gmail.com>
License-Expression: Apache-2.0
Project-URL: Homepage, https://msgpack.org/
Project-URL: Documentation, https://msgpack-python.readthedocs.io/
Project-URL: Repository, https://github.com/msgpack/msgpack-python/
Project-URL: Tracker, https://github.com/msgpack/msgpack-python/issues
Project-URL: Changelog, https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst
Keywords: msgpack,messagepack,serializer,serialization,binary
Classifier: Development Status :: 5 - Production/Stable
Classifier: Operating System :: OS Independent
Classifier: Topic :: File Formats
Classifier: Intended Audience :: Developers
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Requires-Python: >=3.9
Description-Content-Type: text/markdown
License-File: COPYING
Dynamic: license-file
# MessagePack for Python
[![Build Status](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml/badge.svg)](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml)
[![Documentation Status](https://readthedocs.org/projects/msgpack-python/badge/?version=latest)](https://msgpack-python.readthedocs.io/en/latest/?badge=latest)
## What is this?
[MessagePack](https://msgpack.org/) is an efficient binary serialization format.
It lets you exchange data among multiple languages like JSON.
But it's faster and smaller.
This package provides CPython bindings for reading and writing MessagePack data.
## Install
```
$ pip install msgpack
```
### Pure Python implementation
The extension module in msgpack (`msgpack._cmsgpack`) does not support PyPy.
But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy.
### Windows
If you can't use a binary distribution, you need to install Visual Studio
or the Windows SDK on Windows.
Without the extension, the pure Python implementation on CPython runs slowly.
## How to use
### One-shot pack & unpack
Use `packb` for packing and `unpackb` for unpacking.
msgpack provides `dumps` and `loads` as aliases for compatibility with
`json` and `pickle`.
`pack` and `dump` pack to a file-like object.
`unpack` and `load` unpack from a file-like object.
```pycon
>>> import msgpack
>>> msgpack.packb([1, 2, 3])
'\x93\x01\x02\x03'
>>> msgpack.unpackb(_)
[1, 2, 3]
```
Read the docstring for options.
### Streaming unpacking
`Unpacker` is a "streaming unpacker". It unpacks multiple objects from one
stream (or from bytes provided through its `feed` method).
```py
import msgpack
from io import BytesIO
buf = BytesIO()
for i in range(100):
buf.write(msgpack.packb(i))
buf.seek(0)
unpacker = msgpack.Unpacker(buf)
for unpacked in unpacker:
print(unpacked)
```
### Packing/unpacking of custom data types
It is also possible to pack/unpack custom data types. Here is an example for
`datetime.datetime`.
```py
import datetime
import msgpack
useful_dict = {
"id": 1,
"created": datetime.datetime.now(),
}
def decode_datetime(obj):
if '__datetime__' in obj:
obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f")
return obj
def encode_datetime(obj):
if isinstance(obj, datetime.datetime):
return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")}
return obj
packed_dict = msgpack.packb(useful_dict, default=encode_datetime)
this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)
```
`Unpacker`'s `object_hook` callback receives a dict; the
`object_pairs_hook` callback may instead be used to receive a list of
key-value pairs.
NOTE: msgpack can encode datetime with tzinfo into standard ext type for now.
See `datetime` option in `Packer` docstring.
### Extended types
It is also possible to pack/unpack custom data types using the **ext** type.
```pycon
>>> import msgpack
>>> import array
>>> def default(obj):
... if isinstance(obj, array.array) and obj.typecode == 'd':
... return msgpack.ExtType(42, obj.tostring())
... raise TypeError("Unknown type: %r" % (obj,))
...
>>> def ext_hook(code, data):
... if code == 42:
... a = array.array('d')
... a.fromstring(data)
... return a
... return ExtType(code, data)
...
>>> data = array.array('d', [1.2, 3.4])
>>> packed = msgpack.packb(data, default=default)
>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook)
>>> data == unpacked
True
```
### Advanced unpacking control
As an alternative to iteration, `Unpacker` objects provide `unpack`,
`skip`, `read_array_header`, and `read_map_header` methods. The former two
read an entire message from the stream, respectively deserializing and returning
the result, or ignoring it. The latter two methods return the number of elements
in the upcoming container, so that each element in an array, or key-value pair
in a map, can be unpacked or skipped individually.
## Notes
### String and binary types in the old MessagePack spec
Early versions of msgpack didn't distinguish string and binary types.
The type for representing both string and binary types was named **raw**.
You can pack into and unpack from this old spec using `use_bin_type=False`
and `raw=True` options.
```pycon
>>> import msgpack
>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=False), raw=True)
[b'spam', b'eggs']
>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=True), raw=False)
[b'spam', 'eggs']
```
### ext type
To use the **ext** type, pass a `msgpack.ExtType` object to the packer.
```pycon
>>> import msgpack
>>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy'))
>>> msgpack.unpackb(packed)
ExtType(code=42, data='xyzzy')
```
You can use it with `default` and `ext_hook`. See below.
### Security
When unpacking data received from an unreliable source, msgpack provides
two security options.
`max_buffer_size` (default: `100*1024*1024`) limits the internal buffer size.
It is also used to limit preallocated list sizes.
`strict_map_key` (default: `True`) limits the type of map keys to bytes and str.
While the MessagePack spec doesn't limit map key types,
there is a risk of a hash DoS.
If you need to support other types for map keys, use `strict_map_key=False`.
### Performance tips
CPython's GC starts when the number of allocated objects grows.
This means unpacking may trigger unnecessary GC.
You can use `gc.disable()` when unpacking a large message.
A list is the default sequence type in Python.
However, a tuple is lighter than a list.
You can use `use_list=False` while unpacking when performance is important.
## Major breaking changes in the history
### msgpack 0.5
The package name on PyPI was changed from `msgpack-python` to `msgpack` in 0.5.
When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before
`pip install -U msgpack`.
### msgpack 1.0
* Python 2 support
* The extension module no longer supports Python 2.
The pure Python implementation (`msgpack.fallback`) is used for Python 2.
* msgpack 1.0.6 drops official support of Python 2.7, as pip and
GitHub Action "setup-python" no longer supports Python 2.7.
* Packer
* Packer uses `use_bin_type=True` by default.
Bytes are encoded in the bin type in MessagePack.
* The `encoding` option is removed. UTF-8 is always used.
* Unpacker
* Unpacker uses `raw=False` by default. It assumes str values are valid UTF-8 strings
and decodes them to Python str (Unicode) objects.
* `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str).
* The default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attacks.
You need to pass `max_buffer_size=0` if you have large but safe data.
* The default value of `strict_map_key` is changed to True to avoid hash DoS.
You need to pass `strict_map_key=False` if you have data that contain map keys
whose type is neither bytes nor str.

@ -0,0 +1,15 @@
msgpack-1.1.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
msgpack-1.1.2.dist-info/METADATA,sha256=AzsCYs3CsW_Ihmrc0TKSERKnM0C4ZRHj0obd6hZ7EWQ,8356
msgpack-1.1.2.dist-info/RECORD,,
msgpack-1.1.2.dist-info/WHEEL,sha256=JLOMsP7F5qtkAkINx5UnzbFguf8CqZeraV8o04b0I8I,101
msgpack-1.1.2.dist-info/licenses/COPYING,sha256=T73_QuukWTwW96fqcHiIOytzSHCh4rY2KEzi3OYr9Pc,628
msgpack-1.1.2.dist-info/top_level.txt,sha256=2tykSY1pXdiA2xYTDR6jPw0qI5ZGxRihyhf4S5hZyXk,8
msgpack/__init__.py,sha256=VyYtXI_OKFlyox4xlPRWvwU74d5ll6L6Oj01CJcxdqg,1164
msgpack/__pycache__/__init__.cpython-311.pyc,,
msgpack/__pycache__/exceptions.cpython-311.pyc,,
msgpack/__pycache__/ext.cpython-311.pyc,,
msgpack/__pycache__/fallback.cpython-311.pyc,,
msgpack/_cmsgpack.cp311-win_amd64.pyd,sha256=7zWzgU8v1k7_LDlx4HhhNaKcEa6b2p-HBXH988Wr-BY,128000
msgpack/exceptions.py,sha256=2fCtczricqQgdT3NtW6cTqmZn3WA7GQtmlPuT-NhLyM,1129
msgpack/ext.py,sha256=9gDKxuEHfYWdPRzcpFwFYyuBx0puprlQflDGOaccRhE,5896
msgpack/fallback.py,sha256=EAP6g9N7tTWvTw01RtnXXdYg-zZn21FcGVfdBid8aUg,33319

@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: setuptools (80.9.0)
Root-Is-Purelib: false
Tag: cp311-cp311-win_amd64

@ -0,0 +1,14 @@
Copyright (C) 2008-2011 INADA Naoki <songofacandy@gmail.com>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

@ -0,0 +1,55 @@
# ruff: noqa: F401
import os
from .exceptions import * # noqa: F403
from .ext import ExtType, Timestamp
version = (1, 1, 2)
__version__ = "1.1.2"
if os.environ.get("MSGPACK_PUREPYTHON"):
from .fallback import Packer, Unpacker, unpackb
else:
try:
from ._cmsgpack import Packer, Unpacker, unpackb
except ImportError:
from .fallback import Packer, Unpacker, unpackb
def pack(o, stream, **kwargs):
"""
Pack object `o` and write it to `stream`
See :class:`Packer` for options.
"""
packer = Packer(**kwargs)
stream.write(packer.pack(o))
def packb(o, **kwargs):
"""
Pack object `o` and return packed bytes
See :class:`Packer` for options.
"""
return Packer(**kwargs).pack(o)
def unpack(stream, **kwargs):
"""
Unpack an object from `stream`.
Raises `ExtraData` when `stream` contains extra bytes.
See :class:`Unpacker` for options.
"""
data = stream.read()
return unpackb(data, **kwargs)
# alias for compatibility to simplejson/marshal/pickle.
load = unpack
loads = unpackb
dump = pack
dumps = packb

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save