diff --git a/API_KLINE_RESPONSE.md b/API_KLINE_RESPONSE.md new file mode 100644 index 0000000..32203d3 --- /dev/null +++ b/API_KLINE_RESPONSE.md @@ -0,0 +1,167 @@ +# 股票K线接口响应数据说明 + +## 接口信息 + +**接口地址**: `GET /v1/stock/klines/{symbol}` + +**请求参数**: +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| symbol | string | 是 | 标的代码,如 000001.SZ | +| start | string | 是 | 开始日期,格式 YYYYMMDD | +| end | string | 是 | 结束日期,格式 YYYYMMDD | +| freq | string | 否 | 周期,默认 1d (1m/5m/15m/30m/60m/1d/1w/1M) | +| adjust | string | 否 | 复权类型,默认空 (qfq/hfq) | + +**请求头**: +``` +X-API-Key: your_api_key +``` + +--- + +## 响应数据结构 + +### 顶层结构 + +```json +{ + "code": 0, + "message": "success", + "data": { + "symbol": "000001.SZ", + "name": "平安银行", + "freq": "1d", + "adjust": "NONE", + "count": 10, + "items": [ + // K线数据项数组 + ] + } +} +``` + +### K线数据项 (items) + +每个 K 线数据项包含以下字段: + +#### 基础字段 + +| 字段名 | 类型 | 说明 | 示例 | +|--------|------|------|------| +| symbol | string | 标的代码 | "000001.SZ" | +| time | string (ISO8601) | 时间戳 | "2026-03-01T00:00:00" | +| open | number | 开盘价 | 10.5 | +| high | number | 最高价 | 11.2 | +| low | number | 最低价 | 10.3 | +| close | number | 收盘价 | 10.8 | +| volume | integer | 成交量(股) | 100000 | +| amount | number | 成交金额(元) | 1080000 | + +#### 扩展字段 + +| 字段名 | 类型 | 说明 | 示例 | +|--------|------|------|------| +| trade_date | string | 交易日 (YYYY-MM-DD) | "2026-03-01" | +| is_limit_up | boolean | 是否涨停 | false | +| is_limit_down | boolean | 是否跌停 | false | +| total_market_cap | number | 总市值(元) | 1500000000 | +| float_market_cap | number | 流通市值(元) | 1200000000 | +| inst_holding_ratio | number | 机构持仓占比(%) | 25.5 | +| trading_days | integer | 可交易日数(从上市至今) | 5000 | +| created_at | string (ISO8601) | 数据创建时间 | "2026-03-01T12:00:00" | + +--- + +## 完整响应示例 + +```json +{ + "code": 0, + "message": "success", + "data": { + "symbol": "000001.SZ", + "freq": "1d", + "adjust": "NONE", + "count": 2, + "items": [ + { + "symbol": "000001.SZ", + "time": "2026-03-01T00:00:00", + "open": 10.5, + "high": 11.2, + "low": 10.3, + "close": 10.8, + "volume": 100000, + "amount": 1080000, + "trade_date": "2026-03-01", + "is_limit_up": false, + "is_limit_down": false, + "total_market_cap": 1500000000, + "float_market_cap": 1200000000, + "inst_holding_ratio": 25.5, + "trading_days": 5000, + "created_at": "2026-03-01T12:00:00" + }, + { + "symbol": "000001.SZ", + "time": "2026-03-02T00:00:00", + "open": 10.8, + "high": 11.5, + "low": 10.6, + "close": 11.2, + "volume": 120000, + "amount": 1344000, + "trade_date": "2026-03-02", + "is_limit_up": true, + "is_limit_down": false, + "total_market_cap": 1550000000, + "float_market_cap": 1240000000, + "inst_holding_ratio": 25.6, + "trading_days": 5001, + "created_at": "2026-03-02T12:00:00" + } + ] + } +} +``` + +--- + +## 字段说明 + +### 涨停跌停判断 + +- **is_limit_up**: 当日是否涨停(收盘价 >= 涨停价) +- **is_limit_down**: 当日是否跌停(收盘价 <= 跌停价) + +### 市值数据 + +- **total_market_cap**: 总市值 = 收盘价 × 总股本 +- **float_market_cap**: 流通市值 = 收盘价 × 流通股本 + +### 机构持仓 + +- **inst_holding_ratio**: 机构持仓占比,表示机构投资者持有该股票的比例 + +### 交易日期 + +- **trade_date**: 格式为 "YYYY-MM-DD",方便前端展示 +- **trading_days**: 从上市至今的可交易日数 + +--- + +## 测试脚本 + +使用以下命令测试接口: + +```bash +python test_klines_api.py +``` + +或直接 curl: + +```bash +curl -X GET "http://localhost:8080/v1/stock/klines/000001.SZ?start=20260301&end=20260310&freq=1d" \ + -H "X-API-Key: demo-api-key-2024" +``` diff --git a/amazing_data_cache/infodata/equity_structure/equity_structure.h5 b/amazing_data_cache/infodata/equity_structure/equity_structure.h5 new file mode 100644 index 0000000..d929e66 Binary files /dev/null and b/amazing_data_cache/infodata/equity_structure/equity_structure.h5 differ diff --git a/app/adapters/__pycache__/amazingdata_adapter.cpython-311.pyc b/app/adapters/__pycache__/amazingdata_adapter.cpython-311.pyc index b5be738..155d636 100644 Binary files a/app/adapters/__pycache__/amazingdata_adapter.cpython-311.pyc and b/app/adapters/__pycache__/amazingdata_adapter.cpython-311.pyc differ diff --git a/app/adapters/amazingdata_adapter.py b/app/adapters/amazingdata_adapter.py index 4830580..9694dfa 100644 --- a/app/adapters/amazingdata_adapter.py +++ b/app/adapters/amazingdata_adapter.py @@ -82,6 +82,47 @@ class AmazingDataAdapter(DataSourceAdapter): else: raise ValueError(f"不支持的日期格式: {d}") + def _get_list_date(self, symbol: str) -> Optional[int]: + """获取股票上市日期 + + Returns: + 上市日期 (YYYYMMDD格式),如果获取失败返回None + """ + try: + # 方法1:尝试从代码信息中获取 + code_info_df = self._base_data.get_code_info(security_type=SecurityType.STOCK_A.value) + if symbol in code_info_df.index: + # 尝试不同的字段名 + for field in ['list_date', 'LIST_DATE', 'listDate', 'founded_date']: + if field in code_info_df.columns: + list_date_val = code_info_df.loc[symbol, field] + if pd.notna(list_date_val): + # 处理不同格式的日期 + if isinstance(list_date_val, str): + return int(list_date_val.replace('-', '')) + elif isinstance(list_date_val, (int, float)): + return int(list_date_val) + elif isinstance(list_date_val, pd.Timestamp): + return int(list_date_val.strftime('%Y%m%d')) + + # 方法2:尝试从历史代码列表获取 + try: + hist_codes = self._base_data.get_hist_code_list(security_type=SecurityType.STOCK_A.value) + if symbol in hist_codes.index and 'list_date' in hist_codes.columns: + list_date_val = hist_codes.loc[symbol, 'list_date'] + if pd.notna(list_date_val): + if isinstance(list_date_val, str): + return int(list_date_val.replace('-', '')) + elif isinstance(list_date_val, (int, float)): + return int(list_date_val) + except: + pass + + return None + except Exception as e: + print(f"[amazingdata_adapter]获取上市日期失败: {e}") + return None + async def connect(self, config: dict) -> None: """建立连接""" try: @@ -210,75 +251,167 @@ class AmazingDataAdapter(DataSourceAdapter): print(f"[amazingdata_adapter _fetch_klines_sync]正在获取K线数据: 代码={codes}, 日期范围={start_date}~{end_date}, 周期={period_value}") # 获取K线数据 - 将周期值转换为 SDK 的常量 - print(f"[amazingdata_adapter _fetch_klines_sync]SDK 周期值: {period_value}, type: {type(period_value)}") kline_dict = self._market_data.query_kline( code_list=codes, begin_date=start_int, end_date=end_int, period=period_value ) - print(f"[amazingdata_adapter _fetch_klines_sync]已同步获取 {symbol} 的 {period_value} 周期数据") - print(f"[amazingdata_adapter _fetch_klines_sync]数据预览: {kline_dict.get(symbol).head() if symbol in kline_dict else '无数据'}") - + + if symbol not in kline_dict: + info(f"No kline data found for {symbol}") + return [] + print(f"[amazingdata_adapter _fetch_klines_sync]获取到 {kline_dict} 的K线数据") # 打印原始数据结构 + df = kline_dict[symbol] + print(f"[amazingdata_adapter _fetch_klines_sync]获取到 {len(df)} 条K线数据") + + # ============================================ + # 1. 获取证券基本信息(涨停价、跌停价) + # ============================================ + print(f"[amazingdata_adapter _fetch_klines_sync]正在获取证券基本信息...") + try: + code_info_df = self._base_data.get_code_info(security_type=SecurityType.STOCK_A.value) + # 提取当前股票的涨停价和跌停价 + if symbol in code_info_df.index: + high_limited = float(code_info_df.loc[symbol, 'high_limited']) if 'high_limited' in code_info_df.columns else None + low_limited = float(code_info_df.loc[symbol, 'low_limited']) if 'low_limited' in code_info_df.columns else None + print(f"[amazingdata_adapter _fetch_klines_sync]涨停价: {high_limited}, 跌停价: {low_limited}") + else: + high_limited = None + low_limited = None + print(f"[amazingdata_adapter _fetch_klines_sync]未找到 {symbol} 的涨跌停价格") + except Exception as e: + print(f"[amazingdata_adapter _fetch_klines_sync]获取证券信息失败: {e}") + high_limited = None + low_limited = None + + # ============================================ + # 2. 获取股本结构(总股本、流通股) + # ============================================ + print(f"[amazingdata_adapter _fetch_klines_sync]正在获取股本结构...") + try: + equity_dict = self._info_data.get_equity_structure( + code_list=codes, + local_path=self.config.local_path, + is_local=self.config.use_local_cache + ) + if symbol in equity_dict: + equity_df = equity_dict[symbol] + # 获取最新的股本数据 + if not equity_df.empty: + latest_equity = equity_df.iloc[-1] + tot_share = float(latest_equity.get('TOT_SHARE', 0)) * 10000 # 万股转股 + float_share = float(latest_equity.get('FLOAT_SHARE', 0)) * 10000 # 万股转股 + print(f"[amazingdata_adapter _fetch_klines_sync]总股本: {tot_share}, 流通股: {float_share}") + else: + tot_share = 0 + float_share = 0 + else: + tot_share = 0 + float_share = 0 + except Exception as e: + print(f"[amazingdata_adapter _fetch_klines_sync]获取股本结构失败: {e}") + tot_share = 0 + float_share = 0 + + # ============================================ + # 3. 获取交易日历和上市日期 + # ============================================ + print(f"[amazingdata_adapter _fetch_klines_sync]正在获取交易日历...") + try: + # 获取交易日历 + calendar = self._base_data.get_calendar(market=Market.SH.value) + + # 获取股票上市日期 + list_date = self._get_list_date(symbol) + if list_date is None: + list_date = min(calendar) if calendar else start_int + + print(f"[amazingdata_adapter _fetch_klines_sync]上市日期: {list_date}") + except Exception as e: + print(f"[amazingdata_adapter _fetch_klines_sync]获取交易日历失败: {e}") + calendar = [] + list_date = start_int + + # ============================================ + # 4. 处理K线数据并补充字段 + # ============================================ results = [] - if symbol in kline_dict: - df = kline_dict[symbol] - print(f"[amazingdata_adapter _fetch_klines_sync]DataFrame columns: {df.columns.tolist()}") - print(f"[amazingdata_adapter _fetch_klines_sync]DataFrame head:\n{df.head()}") + for _, row in df.iterrows(): + # 从 kline_time 列获取日期 + kline_time = row.get('kline_time') + if pd.isna(kline_time) or kline_time is None: + continue - for _, row in df.iterrows(): - # 从 kline_time 列获取日期(AmazingData 返回的日期字段) - kline_time = row.get('kline_time') - if pd.isna(kline_time) or kline_time is None: - print(f"[amazingdata_adapter _fetch_klines_sync]跳过无效日期: kline_time 为空") - continue - - try: - # kline_time 可能是 Timestamp 或整数 YYYYMMDD - if isinstance(kline_time, pd.Timestamp): - ts = int(kline_time.timestamp()) - trade_date = kline_time.strftime('%Y-%m-%d') - else: - # 整数格式 YYYYMMDD - date_str = str(int(kline_time)) - if len(date_str) != 8: - print(f"[amazingdata_adapter _fetch_klines_sync]跳过无效日期: {date_str}") - continue - - dt = datetime.strptime(date_str, "%Y%m%d") - ts = int(dt.timestamp()) - trade_date = dt.strftime('%Y-%m-%d') - except (ValueError, TypeError) as e: - print(f"[amazingdata_adapter _fetch_klines_sync]日期解析错误 '{kline_time}' (type: {type(kline_time)}): {e}") - continue - - # 从 DataFrame 提取扩展字段(如果存在) - is_limit_up = bool(row.get('is_limit_up')) if 'is_limit_up' in df.columns else None - is_limit_down = bool(row.get('is_limit_down')) if 'is_limit_down' in df.columns else None - total_market_cap = float(row.get('total_market_cap')) if 'total_market_cap' in df.columns and pd.notna(row.get('total_market_cap')) else None - float_market_cap = float(row.get('float_market_cap')) if 'float_market_cap' in df.columns and pd.notna(row.get('float_market_cap')) else None - inst_holding_ratio = float(row.get('inst_holding_ratio')) if 'inst_holding_ratio' in df.columns and pd.notna(row.get('inst_holding_ratio')) else None - trading_days = int(row.get('trading_days')) if 'trading_days' in df.columns and pd.notna(row.get('trading_days')) else None - - results.append(KLineData( - symbol=symbol, - time=ts, - open=float(row.get('open', 0)), - high=float(row.get('high', 0)), - low=float(row.get('low', 0)), - close=float(row.get('close', 0)), - volume=int(row.get('volume', 0)), - amount=float(row.get('amount', 0)), - trade_date=trade_date, - is_limit_up=is_limit_up, - is_limit_down=is_limit_down, - total_market_cap=total_market_cap, - float_market_cap=float_market_cap, - inst_holding_ratio=inst_holding_ratio, - trading_days=trading_days - )) - - info(f"Fetched {len(results)} klines from AmazingData for {symbol}") + try: + # 解析日期 + if isinstance(kline_time, pd.Timestamp): + ts = int(kline_time.timestamp()) + trade_date = kline_time.strftime('%Y-%m-%d') + trade_date_int = int(kline_time.strftime('%Y%m%d')) + else: + date_str = str(int(kline_time)) + if len(date_str) != 8: + continue + dt = datetime.strptime(date_str, "%Y%m%d") + ts = int(dt.timestamp()) + trade_date = dt.strftime('%Y-%m-%d') + trade_date_int = int(date_str) + except (ValueError, TypeError) as e: + continue + + # 获取收盘价 + close = float(row.get('close', 0)) + + # ============================================ + # 4.1 判断是否涨跌停 + # ============================================ + is_limit_up = False + is_limit_down = False + if high_limited and low_limited and close > 0: + # 涨停:收盘价 >= 涨停价 * 0.995(允许0.5%误差) + is_limit_up = close >= high_limited * 0.995 + # 跌停:收盘价 <= 跌停价 * 1.005(允许0.5%误差) + is_limit_down = close <= low_limited * 1.005 + + # ============================================ + # 4.2 计算市值 + # ============================================ + total_market_cap = close * tot_share if tot_share > 0 and close > 0 else None + float_market_cap = close * float_share if float_share > 0 and close > 0 else None + + # ============================================ + # 4.3 计算可交易日数 + # ============================================ + trading_days = None + if calendar and list_date: + # 计算从上市日期到当前交易日的交易日数 + trading_days = sum(1 for d in calendar if list_date <= d <= trade_date_int) + + # 机构持仓占比( AmazingData K线数据可能包含,如果没有则设为None ) + inst_holding_ratio = None + if 'inst_holding_ratio' in df.columns and pd.notna(row.get('inst_holding_ratio')): + inst_holding_ratio = float(row.get('inst_holding_ratio')) + + results.append(KLineData( + symbol=symbol, + time=ts, + open=float(row.get('open', 0)), + high=float(row.get('high', 0)), + low=float(row.get('low', 0)), + close=close, + volume=int(row.get('volume', 0)), + amount=float(row.get('amount', 0)), + trade_date=trade_date, + is_limit_up=is_limit_up, + is_limit_down=is_limit_down, + total_market_cap=total_market_cap, + float_market_cap=float_market_cap, + inst_holding_ratio=inst_holding_ratio, + trading_days=trading_days + )) + + info(f"Fetched {len(results)} klines with extended fields from AmazingData for {symbol}") return results async def fetch_symbols(self, asset_type: str) -> List[SymbolInfo]: diff --git a/app/services/__pycache__/stock_service.cpython-311.pyc b/app/services/__pycache__/stock_service.cpython-311.pyc index 2114082..cdeff30 100644 Binary files a/app/services/__pycache__/stock_service.cpython-311.pyc and b/app/services/__pycache__/stock_service.cpython-311.pyc differ diff --git a/app/services/stock_service.py b/app/services/stock_service.py index 3db2885..45b5f74 100644 --- a/app/services/stock_service.py +++ b/app/services/stock_service.py @@ -32,23 +32,25 @@ class StockService: except ValueError as e: raise ValueError(f"Invalid date format: {e}") + # todo 暂时不从数据库获取,后期放开 # 获取K线数据(从数据库) - items = self.repository.get_klines( - req.symbol, - req.freq, - start, - end, - req.adjust - ) + # items = self.repository.get_klines( + # req.symbol, + # req.freq, + # start, + # end, + # req.adjust + # ) + items = [] # 先不从数据库获取,直接从适配器获取 # 如果数据库没有数据,尝试从适配器获取 if not items: info(f"No data in DB for {req.symbol}, fetching from adapter...") items = self._fetch_from_adapter(req.symbol, req.start, req.end, req.freq) - # 保存到数据库 - if items: - self._save_klines_to_db(req.symbol, req.freq, items) + # # 保存到数据库 + # if items: + # self._save_klines_to_db(req.symbol, req.freq, items) # 处理复权(简化实现,实际需要复权系数表) if req.adjust != AdjustType.NONE: diff --git a/test_klines_api.py b/test_klines_api.py new file mode 100644 index 0000000..14274f7 --- /dev/null +++ b/test_klines_api.py @@ -0,0 +1,112 @@ +"""测试股票K线接口返回的字段""" +import requests +import json + +# API 配置 +BASE_URL = "http://localhost:8080/v1" +API_KEY = "demo-api-key-2024" + +# 测试获取股票K线 +def test_stock_klines(): + """测试股票K线接口返回的字段""" + url = f"{BASE_URL}/stock/klines/000001.SZ" + headers = {"X-API-Key": API_KEY} + params = { + "start": "20260301", + "end": "20260310", + "freq": "1d" + } + + print(f"\n{'='*60}") + print(f"测试接口: GET {url}") + print(f"{'='*60}") + + try: + response = requests.get(url, headers=headers, params=params) + data = response.json() + + if data.get("code") == 0: + kline_data = data.get("data", {}) + items = kline_data.get("items", []) + + print(f"\n标的: {kline_data.get('symbol')}") + print(f"周期: {kline_data.get('freq')}") + print(f"数据条数: {len(items)}") + print(f"\n{'='*60}") + + if items: + # 显示第一条数据的完整字段 + first_item = items[0] + print("\n第一条数据详情:") + print(f"{'-'*60}") + + # 基础字段 + print(f"时间戳: {first_item.get('time')}") + print(f"开盘价: {first_item.get('open')}") + print(f"最高价: {first_item.get('high')}") + print(f"最低价: {first_item.get('low')}") + print(f"收盘价: {first_item.get('close')}") + print(f"成交量: {first_item.get('volume')}") + print(f"成交额: {first_item.get('amount')}") + + # 扩展字段 + print(f"\n扩展字段:") + print(f" 交易日: {first_item.get('trade_date')}") + print(f" 是否涨停: {first_item.get('is_limit_up')}") + print(f" 是否跌停: {first_item.get('is_limit_down')}") + print(f" 总市值: {first_item.get('total_market_cap')}") + print(f" 流通市值: {first_item.get('float_market_cap')}") + print(f" 机构持仓占比: {first_item.get('inst_holding_ratio')}") + print(f" 可交易日数: {first_item.get('trading_days')}") + print(f" 创建时间: {first_item.get('created_at')}") + + # 验证所有字段是否存在 + expected_fields = [ + 'symbol', 'time', 'open', 'high', 'low', 'close', + 'volume', 'amount', 'trade_date', 'is_limit_up', + 'is_limit_down', 'total_market_cap', 'float_market_cap', + 'inst_holding_ratio', 'trading_days', 'created_at' + ] + + print(f"\n{'='*60}") + print("字段完整性检查:") + print(f"{'-'*60}") + + missing_fields = [] + for field in expected_fields: + if field in first_item: + print(f" ✓ {field}") + else: + print(f" ✗ {field} (缺失)") + missing_fields.append(field) + + if missing_fields: + print(f"\n缺失字段: {', '.join(missing_fields)}") + else: + print(f"\n所有字段都存在!") + + return True + else: + print("没有获取到数据") + return False + else: + print(f"请求失败: {data.get('message')}") + return False + + except Exception as e: + print(f"请求异常: {e}") + return False + +if __name__ == "__main__": + print("\n" + "="*60) + print("股票K线接口字段测试") + print("="*60) + + success = test_stock_klines() + + print(f"\n{'='*60}") + if success: + print("测试完成!") + else: + print("测试失败!") + print("="*60 + "\n") diff --git a/test_klines_extended_fields.py b/test_klines_extended_fields.py new file mode 100644 index 0000000..622d5a1 --- /dev/null +++ b/test_klines_extended_fields.py @@ -0,0 +1,133 @@ +"""测试K线数据扩展字段获取""" +import asyncio +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from app.adapters.amazingdata_adapter import AmazingDataAdapter +from datetime import datetime + + +async def test_klines_with_extended_fields(): + """测试获取带有扩展字段的K线数据""" + print("\n" + "="*60) + print("测试K线数据扩展字段") + print("="*60) + + adapter = AmazingDataAdapter() + + # 连接配置(请根据实际情况修改) + config = { + "username": os.getenv("AMAZINGDATA_USERNAME", "11200008169"), + "password": os.getenv("AMAZINGDATA_PASSWORD", "11200008169@2026"), + "host": os.getenv("AMAZINGDATA_HOST", "140.206.44.234"), + "port": int(os.getenv("AMAZINGDATA_PORT", "8600")), + "local_path": "./amazing_data_cache/", + "use_local_cache": True + } + + try: + # 连接适配器 + print("\n[1/3] 正在连接 AmazingData...") + await adapter.connect(config) + print("✓ 连接成功") + + # 获取K线数据 + symbol = "000001.SZ" # 平安银行 + start_date = "20260301" + end_date = "20260310" + + print(f"\n[2/3] 正在获取 {symbol} 的K线数据 ({start_date} ~ {end_date})...") + klines = await adapter.fetch_klines(symbol, start_date, end_date, "1d") + print(f"✓ 获取到 {len(klines)} 条K线数据") + + # 显示第一条数据的完整信息 + if klines: + print(f"\n[3/3] 数据字段验证") + print("-"*60) + + k = klines[0] + print(f"\n标的代码: {k.symbol}") + print(f"交易日: {k.trade_date}") + print(f"时间戳: {datetime.fromtimestamp(k.time)}") + + print(f"\n基础行情:") + print(f" 开盘价: {k.open}") + print(f" 最高价: {k.high}") + print(f" 最低价: {k.low}") + print(f" 收盘价: {k.close}") + print(f" 成交量: {k.volume}") + print(f" 成交额: {k.amount}") + + print(f"\n扩展字段:") + print(f" 是否涨停: {k.is_limit_up} {'✓' if k.is_limit_up is not None else '✗'}") + print(f" 是否跌停: {k.is_limit_down} {'✓' if k.is_limit_down is not None else '✗'}") + print(f" 总市值: {k.total_market_cap:,.0f} 元" if k.total_market_cap else " 总市值: None ✗") + print(f" 流通市值: {k.float_market_cap:,.0f} 元" if k.float_market_cap else " 流通市值: None ✗") + print(f" 机构持仓占比: {k.inst_holding_ratio}%" if k.inst_holding_ratio else " 机构持仓占比: None") + print(f" 可交易日数: {k.trading_days} {'✓' if k.trading_days else '✗'}") + + # 验证字段完整性 + print(f"\n{'='*60}") + print("字段完整性检查:") + print("-"*60) + + checks = [ + ("symbol", k.symbol is not None), + ("time", k.time > 0), + ("open", k.open > 0), + ("high", k.high > 0), + ("low", k.low > 0), + ("close", k.close > 0), + ("volume", k.volume > 0), + ("amount", k.amount > 0), + ("trade_date", k.trade_date is not None), + ("is_limit_up", k.is_limit_up is not None), + ("is_limit_down", k.is_limit_down is not None), + ("total_market_cap", k.total_market_cap is not None and k.total_market_cap > 0), + ("float_market_cap", k.float_market_cap is not None and k.float_market_cap > 0), + ("trading_days", k.trading_days is not None and k.trading_days > 0), + ] + + passed = 0 + for field, check in checks: + status = "✓" if check else "✗" + print(f" {status} {field}") + if check: + passed += 1 + + print(f"\n通过: {passed}/{len(checks)}") + + # 显示涨跌停判断逻辑验证 + print(f"\n{'='*60}") + print("涨跌停判断示例:") + print("-"*60) + for k in klines[:3]: # 显示前3条 + limit_status = "" + if k.is_limit_up: + limit_status = "📈 涨停" + elif k.is_limit_down: + limit_status = "📉 跌停" + else: + limit_status = "—" + print(f" {k.trade_date}: 收盘{k.close} {limit_status}") + + # 断开连接 + await adapter.close() + print(f"\n{'='*60}") + print("测试完成!") + print("="*60 + "\n") + + return True + + except Exception as e: + print(f"\n✗ 测试失败: {e}") + import traceback + traceback.print_exc() + return False + + +if __name__ == "__main__": + success = asyncio.run(test_klines_with_extended_fields()) + sys.exit(0 if success else 1) diff --git a/venv/Lib/site-packages/blosc2-4.1.2.dist-info/INSTALLER b/venv/Lib/site-packages/blosc2-4.1.2.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/Lib/site-packages/blosc2-4.1.2.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/Lib/site-packages/blosc2-4.1.2.dist-info/METADATA b/venv/Lib/site-packages/blosc2-4.1.2.dist-info/METADATA new file mode 100644 index 0000000..7e39800 --- /dev/null +++ b/venv/Lib/site-packages/blosc2-4.1.2.dist-info/METADATA @@ -0,0 +1,186 @@ +Metadata-Version: 2.4 +Name: blosc2 +Version: 4.1.2 +Summary: A fast & compressed ndarray library with a flexible compute engine. +Author-Email: Blosc Development Team +Maintainer-Email: Blosc Development Team +License-Expression: BSD-3-Clause +License-File: LICENSE.txt +Classifier: Development Status :: 6 - Mature +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Information Technology +Classifier: Intended Audience :: Science/Research +Classifier: Programming Language :: Python +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: Unix +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 +Project-URL: homepage, https://github.com/Blosc/python-blosc2 +Project-URL: documentation, https://www.blosc.org/python-blosc2/python-blosc2.html +Requires-Python: >=3.10 +Requires-Dist: numpy>=1.26 +Requires-Dist: ndindex +Requires-Dist: msgpack +Requires-Dist: numexpr>=2.14.1; platform_machine != "wasm32" +Requires-Dist: requests +Description-Content-Type: text/x-rst + +============= +Python-Blosc2 +============= + +A fast & compressed ndarray library with a flexible compute engine +================================================================== + +:Author: The Blosc development team +:Contact: blosc@blosc.org +:Github: https://github.com/Blosc/python-blosc2 +:Actions: |actions| +:PyPi: |version| +:NumFOCUS: |numfocus| +:Code of Conduct: |Contributor Covenant| + +.. |version| image:: https://img.shields.io/pypi/v/blosc2.svg + :target: https://pypi.python.org/pypi/blosc2 +.. |Contributor Covenant| image:: https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg + :target: https://github.com/Blosc/community/blob/master/code_of_conduct.md +.. |numfocus| image:: https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A + :target: https://numfocus.org +.. |actions| image:: https://github.com/Blosc/python-blosc2/actions/workflows/build.yml/badge.svg + :target: https://github.com/Blosc/python-blosc2/actions/workflows/build.yml + + +What is Python-Blosc2? +======================= + +Python-Blosc2 is a high-performance compressed ndarray library with a flexible +compute engine, using `C-Blosc2 `_ +as its compression backend. It allows complex calculations on compressed data, +whether stored in memory, on disk, or over the network (e.g., via +`Caterva2 `_). It uses the +`C-Blosc2 simple and open format +`_ for storing +compressed data. + +More info: https://www.blosc.org/python-blosc2/getting_started/overview.html + +Installing +========== + +Binary packages are available for major OSes (Win, Mac, Linux) and platforms. +Install from PyPi using ``pip``: + +.. code-block:: console + + pip install blosc2 --upgrade + +Conda users can install from conda-forge: + +.. code-block:: console + + conda install -c conda-forge python-blosc2 + +Documentation +============= + +The documentation is available here: + +https://blosc.org/python-blosc2/python-blosc2.html + +You can find examples at: + +https://github.com/Blosc/python-blosc2/tree/main/examples + +A tutorial from PyData Global 2025 is available at: + +https://github.com/Blosc/PyData-Global-2025-Tutorial + +(`Click here `_ to watch the video recording of the tutorial) + +It contains Jupyter notebooks explaining the main features of Python-Blosc2. + +License +======= + +This software is licensed under a 3-Clause BSD license. A copy of the +python-blosc2 license can be found in +`LICENSE.txt `_. + +Discussion forum +================ + +Discussion about this package is welcome at: + +https://github.com/Blosc/python-blosc2/discussions + +Social feeds +------------ + +Stay informed about the latest developments by following us in +`Mastodon `_, +`Bluesky `_ or +`LinkedIn `_. + +Thanks +====== + +Blosc2 is supported by the `NumFOCUS foundation `_, the +`LEAPS-INNOV project `_ +and `ironArray SLU `_, among many other donors. +This allowed the following people to have contributed in an important way +to the core development of the Blosc2 library: + +- Francesc Alted +- Marta Iborra +- Luke Shaw +- Aleix Alcacer +- Oscar Guiñón +- Juan David Ibáñez +- Ivan Vilata i Balaguer +- Oumaima Ech.Chdig +- Ricardo Sales Piquer + +In addition, other people have participated in the project in different +aspects: + +- Jan Sellner, contributed the mmap support for NDArray/SChunk objects. +- Dimitri Papadopoulos, contributed a large bunch of improvements to + many aspects of the project. His attention to detail is remarkable. +- And many others that have contributed with bug reports, suggestions and + improvements. + +Developed using JetBrains IDEs. + +.. image:: https://resources.jetbrains.com/storage/products/company/brand/logos/jetbrains.svg + :target: https://jb.gg/OpenSource + :alt: JetBrains logo. + +Citing Blosc +============ + +You can cite our work on the various libraries under the Blosc umbrella as follows: + +.. code-block:: console + + @ONLINE{blosc, + author = {{Blosc Development Team}}, + title = "{A fast, compressed and persistent data store library}", + year = {2009-2025}, + note = {https://blosc.org} + } + +Support Blosc for a Sustainable Future +====================================== + +If you find Blosc useful and want to support its development, please consider +making a `donation or contract to the Blosc Development Team +`_. +Thank you! + + +**Compress Better, Compute Bigger** diff --git a/venv/Lib/site-packages/blosc2-4.1.2.dist-info/RECORD b/venv/Lib/site-packages/blosc2-4.1.2.dist-info/RECORD new file mode 100644 index 0000000..55992da --- /dev/null +++ b/venv/Lib/site-packages/blosc2-4.1.2.dist-info/RECORD @@ -0,0 +1,80 @@ +blosc2-4.1.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +blosc2-4.1.2.dist-info/METADATA,sha256=PfwyhaAAq9E0pNey0uX8XLczfOfGhPqQntCe2eKZqCA,6231 +blosc2-4.1.2.dist-info/RECORD,, +blosc2-4.1.2.dist-info/WHEEL,sha256=Iwzd8cFJYd34Bw6rlN9JB4hgsVKLIBCVy637sxMRVyo,106 +blosc2-4.1.2.dist-info/entry_points.txt,sha256=AQn8qWJhx7sMxZxwNAn9AGT77UMZpT8ZHmZoHsUY4Tw,29 +blosc2-4.1.2.dist-info/licenses/LICENSE.txt,sha256=AstwCmS9owvusCU1ghx9pxk64zven0poXe4JQkjWzxg,1655 +blosc2/__init__.py,sha256=wfFw2HAsNRTp9qjTQAA-sr5zFnhkONa3EjrqGO2wUR8,21137 +blosc2/__pycache__/__init__.cpython-311.pyc,, +blosc2/__pycache__/_wasm_jit.cpython-311.pyc,, +blosc2/__pycache__/c2array.cpython-311.pyc,, +blosc2/__pycache__/core.cpython-311.pyc,, +blosc2/__pycache__/dict_store.cpython-311.pyc,, +blosc2/__pycache__/dsl_kernel.cpython-311.pyc,, +blosc2/__pycache__/embed_store.cpython-311.pyc,, +blosc2/__pycache__/exceptions.cpython-311.pyc,, +blosc2/__pycache__/fft.cpython-311.pyc,, +blosc2/__pycache__/info.cpython-311.pyc,, +blosc2/__pycache__/lazyexpr.cpython-311.pyc,, +blosc2/__pycache__/linalg.cpython-311.pyc,, +blosc2/__pycache__/ndarray.cpython-311.pyc,, +blosc2/__pycache__/proxy.cpython-311.pyc,, +blosc2/__pycache__/schunk.cpython-311.pyc,, +blosc2/__pycache__/storage.cpython-311.pyc,, +blosc2/__pycache__/tree_store.cpython-311.pyc,, +blosc2/__pycache__/utils.cpython-311.pyc,, +blosc2/__pycache__/version.cpython-311.pyc,, +blosc2/_wasm_jit.py,sha256=H_bnZVwr2oQjZUaosoR6ykzxYBO1_HX9dfPSUaTS_Vc,19119 +blosc2/blosc2_ext.cp311-win_amd64.pyd,sha256=Pv7H9dTzotXxPSWlqwBNqpcmLClOQada7ixN-xSH5UM,2499072 +blosc2/blosc2_ext.pyx,sha256=wfKd0JLxGIld1U9ZrooUbgpTsPIF0KpFMxOF8mBzz04,146229 +blosc2/c2array.py,sha256=8X5OAjiPEQk42t7yQTP5JTlHNh-u1Sb-ScXElv8mgFc,16505 +blosc2/core.py,sha256=2smXa_RYq4vA44kZ1thA3gGKE6kW1Ol7REH2ibQGhBA,71651 +blosc2/dict_store.py,sha256=75JnpeJ9BV65TgWgwtUQydrFAt6Op3dXpecTcMggoik,23149 +blosc2/dsl_kernel.py,sha256=XzpIRx4VmcCXUio22xsb4XUabv2VvCLdhrwLmXAMHdY,46111 +blosc2/embed_store.py,sha256=LOudUZvwMO1vWEmrGUay6tdaVNpQIMCJX6e_p2Lx3PA,13349 +blosc2/exceptions.py,sha256=ZK-SPzS527CEDL7-dw1Xl8wWHt93KW2vp9sg3wIp4BI,557 +blosc2/fft.py,sha256=V_8c_-w-DOS61IWfUcx6UgeDlME35PGAyxY5ASgDnG4,979 +blosc2/include/b2nd.h,sha256=vLj3QPdmG2xzeS0f6f1xZzoeypEiApfB0MvUGjVZdLA,24902 +blosc2/include/blosc2.h,sha256=Ii9DzbSgjVdhA2Sz6oHmT8InCZEFuip9DbR_aotdFzA,102880 +blosc2/include/blosc2/blosc2-common.h,sha256=5yQGyQpkwbT-bBzurP5p155ZAWv3GKzyx43ZwilSyQM,2719 +blosc2/include/blosc2/blosc2-export.h,sha256=hsi3IiPDgyWVIhpuV8oNhgN4EYWIA3j_EvRBrIqZxck,1855 +blosc2/include/blosc2/blosc2-stdio.h,sha256=jGU3e3cgXbxW7t5E1TOt5y-Rga1MSWLk6ebo93UOkKI,4650 +blosc2/include/blosc2/codecs-registry.h,sha256=-bikcAq5rZOZzA9lZkDrvsGTTmuX4c3qPMH_qG0iPPw,2043 +blosc2/include/blosc2/filters-registry.h,sha256=TzV0nTUifbKSK6fs4NkD_CtcOHb7m5W18weVO5n4KxU,1833 +blosc2/include/blosc2/tuners-registry.h,sha256=uQ3TWsbf0QGXHP5qYRv9iqmpXcb7z5MUSmnsF7MVcv4,842 +blosc2/info.py,sha256=5eaD2K3jfyLT1y0WAQQ7tgyJmfwN5FVJ9rUInWfLfbU,2085 +blosc2/lazyexpr.py,sha256=QYfuhvxDpYfFMcPZ9zm264wH2I8qDMkxF5of5Cre62g,188780 +blosc2/lib/blosc2.lib,sha256=UxKvQdGCBE42neuY7QTq9AF1YImoo_tGjQs7G4u0khY,35744 +blosc2/lib/cmake/blosc2/Blosc2Config.cmake,sha256=64up3utTNCiNvhLYfBhRLIJw0ZbtOmyZ8LY9UOMd-3E,4351 +blosc2/lib/cmake/blosc2/Blosc2ConfigVersion.cmake,sha256=z0xZRkTV_dFBNx6fg9lJrm40x_zFwQ8JdQ_DuoiVNV0,2830 +blosc2/lib/cmake/blosc2/Blosc2Targets-release.cmake,sha256=qqvx-rb3WsAOPd_grd3xFiSJj9IE67qxwYXKsL49bn0,1771 +blosc2/lib/cmake/blosc2/Blosc2Targets.cmake,sha256=ero0ELloxyV-njwSbFuxyQHw02lqPUCUF8V3rz57-lI,4787 +blosc2/lib/cmake/blosc2/Modules/FindIPP.cmake,sha256=CuPjgtqtJ9AZ7QAKA7z5nzKBPjTEsRv20oQiN-mdoPQ,2165 +blosc2/lib/cmake/blosc2/Modules/FindLZ4.cmake,sha256=nGGvXB3GSR4Mbct1z4xv8Y3HToo6NUnpNZMGCkCTvmg,288 +blosc2/lib/cmake/blosc2/Modules/FindSIMD.cmake,sha256=VZEaOjgaV6XHvMLxnp7ahOF_wbOEEN3MPmjO_JGHpEw,2268 +blosc2/lib/cmake/blosc2/Modules/FindZLIB_NG.cmake,sha256=Ehvjx61hvBcSUdJ6SEKgQfip2X1JlCIk-owmQ8EzRWg,2105 +blosc2/lib/cmake/blosc2/Modules/FindZSTD.cmake,sha256=pxIgMvnUjLVP-3rntB-70f-tzzfuXtBUwBtOotvoQOo,212 +blosc2/lib/cmake/blosc2/Modules/toolchain-aarch64.cmake,sha256=Kmg8bElwPy5FBF5LW2N8veU-7igNTyscDMbG_J14ajc,985 +blosc2/lib/cmake/blosc2/Modules/toolchain-armhf.cmake,sha256=2bRK-4gla53SFgDV6dTuqXn6g7Wpbb_fDcc7hFd5kHo,1022 +blosc2/lib/cmake/blosc2/Modules/toolchain-armsf.cmake,sha256=RkIP5XvfdbwfzIYZJgcopLv6D4ABWOLUDOuFB1VqanI,1128 +blosc2/lib/libblosc2.dll,sha256=ZWwatYusKIPBNQqVKbeHQpviAYSO_pV9l0mRXcDdZ1k,1338368 +blosc2/lib/libblosc2.lib,sha256=gtCUPjUM8vgkydQD7Uq1JFGEBenCtErcM-SdbV1a0GA,2626708 +blosc2/lib/pkgconfig/blosc2.pc,sha256=EoRO6O8iLRBVas-RCp5koYWFiIFZzNNYgkdmMUK03qY,497 +blosc2/lib/tcc.dll,sha256=k7MezEkky81NA7O5aQTydJ18_elH_gNSIONioBGywNw,350720 +blosc2/linalg.py,sha256=H8muaJ96U4rMCL81MNxYe_-i7Fqs7bHRo0bhVsZMIZA,31541 +blosc2/me_jit_glue.js,sha256=9uyc98wu83S9PW1WJaL9KDtA5z9hTlTxg2_maHNnx7Y,23314 +blosc2/ndarray.py,sha256=dAzB6bAFA-_4sdzPcr-f58OCtlaTKPUfmTTIZqRRTfI,228824 +blosc2/proxy.py,sha256=Om2lB-w31njTJhycqXEiTv8YHmgiYGEXgIsVmKvD35c,29361 +blosc2/schunk.py,sha256=MgwCrzLI-Td9bE8Tduh42uxf_TD7xid5FOp2RkSCGds,65044 +blosc2/storage.py,sha256=k7DHIzbLphHHE_xytaQ6eawuqzoT6FtF_80rRJp_pQU,10525 +blosc2/tree_store.py,sha256=khU10mInKHJfSQhai7l_cae2V7ds7vlNmmx4tzxK6Bw,27200 +blosc2/utils.py,sha256=li0oyxsDiBZLtpBdkUKoFB-pEYwRA_kF2-GtWYw7-B4,37192 +blosc2/version.py,sha256=L7zCFLtbyYe-_iP0M_1XaXP8Dexs06fJx0XFT65XNyk,58 +include/libtcc.h,sha256=OEtbE6mBTCh2v8EeBtL5RVrI10pr_IaHbE51hy598XY,5096 +lib/tcc.dll,sha256=k7MezEkky81NA7O5aQTydJ18_elH_gNSIONioBGywNw,350720 +share/miniexpr/licenses/LICENSE,sha256=amIGietrCmqJGJurywOAM2NHty7iPWtEMhyfRprJuMU,1568 +share/miniexpr/licenses/LICENSE-LIBTCC,sha256=xOx_Td8HPikwzbvz1mnOo_J1-wLIws4bNI_Qe4059ws,27111 +share/miniexpr/licenses/LICENSE-SLEEF,sha256=vrjkLp1rQoTgMwTQWoGgdVIAqWX8jQpeCuoehM-AXW4,1361 +share/miniexpr/licenses/LICENSE-TINYEXPR,sha256=wcAtE2y-ldvajJ6rOIdTGJc2ZRHk2GXCKximY2q2VZE,897 +share/miniexpr/licenses/THIRD_PARTY_NOTICES.md,sha256=rIjDpvDa2FXLO-zTDl7ThLkhfcrkmqf-Obl5_9Cu4FI,867 +share/miniexpr/third_party/tinycc/COPYING,sha256=mxg-fwNWw5jMCmXEotLNVvIUmo4kQmTE0mrFnp2to-g,26932 diff --git a/venv/Lib/site-packages/blosc2-4.1.2.dist-info/WHEEL b/venv/Lib/site-packages/blosc2-4.1.2.dist-info/WHEEL new file mode 100644 index 0000000..51a5744 --- /dev/null +++ b/venv/Lib/site-packages/blosc2-4.1.2.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: scikit-build-core 0.12.1 +Root-Is-Purelib: false +Tag: cp311-cp311-win_amd64 + diff --git a/venv/Lib/site-packages/blosc2-4.1.2.dist-info/entry_points.txt b/venv/Lib/site-packages/blosc2-4.1.2.dist-info/entry_points.txt new file mode 100644 index 0000000..0b668b4 --- /dev/null +++ b/venv/Lib/site-packages/blosc2-4.1.2.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[array_api] +blosc2 = blosc2 + diff --git a/venv/Lib/site-packages/blosc2-4.1.2.dist-info/licenses/LICENSE.txt b/venv/Lib/site-packages/blosc2-4.1.2.dist-info/licenses/LICENSE.txt new file mode 100644 index 0000000..07e0f60 --- /dev/null +++ b/venv/Lib/site-packages/blosc2-4.1.2.dist-info/licenses/LICENSE.txt @@ -0,0 +1,31 @@ +BSD 3-Clause License + +For Blosc - A blocking, shuffling and lossless compression library + +Copyright (c) 2019-present, Blosc Development Team +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/Lib/site-packages/blosc2/__init__.py b/venv/Lib/site-packages/blosc2/__init__.py new file mode 100644 index 0000000..2444955 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/__init__.py @@ -0,0 +1,940 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Hey Ruff, please ignore the next violations +# ruff: noqa: E402 - Module level import not at top of file +# ruff: noqa: F401 - `var` imported but unused + +import contextlib +import importlib.util +import os +import platform +from enum import Enum +from pathlib import Path + +import numpy as np + +_HAS_NUMBA = False +try: + import numba + + _HAS_NUMBA = True +except ImportError: + pass +# Do the platform check once at module level +IS_WASM = platform.machine() == "wasm32" +# IS_WASM = True # for testing (comment this line out for production) +""" +Flag for WebAssembly platform. +""" + +if not IS_WASM: + import numexpr + +from .version import __array_api_version__, __version__ + + +def _configure_libtcc_runtime_path(): + """Best-effort configuration so miniexpr can find bundled libtcc at runtime.""" + if IS_WASM: + return + if os.environ.get("ME_DSL_JIT_LIBTCC_PATH"): + return + + spec = importlib.util.find_spec("blosc2.blosc2_ext") + origin = None if spec is None else spec.origin + if not origin: + return + + ext_dir = Path(origin).resolve().parent + candidate_dirs = ( + ext_dir, + ext_dir / "lib", + ext_dir.parent / "lib", + ) + if platform.system() == "Darwin": + names = ("libtcc.dylib",) + elif platform.system() == "Windows": + names = ("tcc.dll", "libtcc.dll") + else: + names = ("libtcc.so", "libtcc.so.1") + + for cdir in candidate_dirs: + for name in names: + candidate = cdir / name + if candidate.is_file(): + os.environ["ME_DSL_JIT_LIBTCC_PATH"] = str(candidate) + return + + +_configure_libtcc_runtime_path() + +_WASM_MINIEXPR_ENABLED = not IS_WASM + +__version__ = __version__ +__array_api_version__ = __array_api_version__ +""" +Python-Blosc2 version. +""" + + +class Codec(Enum): + """ + Available codecs. + """ + + BLOSCLZ = 0 + LZ4 = 1 + LZ4HC = 2 + ZLIB = 4 + ZSTD = 5 + NDLZ = 32 + ZFP_ACC = 33 + ZFP_PREC = 34 + ZFP_RATE = 35 + #: Needs to be installed with ``pip install blosc2-openhtj2k`` + OPENHTJ2K = 36 + #: Needs to be installed with ``pip install blosc2-grok`` + GROK = 37 + #: Needs to be installed with ``pip install blosc2-openzl`` + OPENZL = 38 + + +class Filter(Enum): + """ + Available filters. + For each of the filters, the integer value passed to ``filters_meta`` has the following meaning: + + - NOFILTER: Not used + - SHUFFLE: Number of byte streams for shuffle (if 0 defaults to typesize of array). + - BITSHUFFLE: Not used + - DELTA: Not used (bitwise XOR) + - TRUNC_PREC: Number of bits to which to truncate float + - NDCELL: Cellshape (i.e. for a 3-dim dataset, meta = 4 implies cellshape is 4x4x4) + - NDMEAN: Cellshape (i.e. for a 3-dim dataset, meta = 4 implies cellshape is 4x4x4) + - BYTEDELTA: Number of byte streams for delta + - INT_TRUNC: Number of bits to which to truncate integer + + For TRUNC_PREC and INT_TRUNC, positive values specify number of bits to keep; negative values specify number of bits to zero. + + For NDCELL/NDMEAN see this explanation for `NDCELL `_ and this for `NDMEAN `_. + """ + + NOFILTER = 0 + SHUFFLE = 1 + BITSHUFFLE = 2 + DELTA = 3 + TRUNC_PREC = 4 + NDCELL = 32 + NDMEAN = 33 + BYTEDELTA = 35 + INT_TRUNC = 36 + + +class SplitMode(Enum): + """ + Available split modes. + """ + + ALWAYS_SPLIT = 1 + NEVER_SPLIT = 2 + AUTO_SPLIT = 3 + FORWARD_COMPAT_SPLIT = 4 + + +class SpecialValue(Enum): + """ + Possible special values in a chunk. + """ + + NOT_SPECIAL = 0 + ZERO = 1 + NAN = 2 + VALUE = 3 + UNINIT = 4 + + +class Tuner(Enum): + """ + Available tuners. + """ + + #: A 'simple' tuner. This is the default in the Blosc2 library + STUNE = 0 + #: A more sophisticated tuner that can select different codecs/filters for different chunks + #: (more info `here `_); Needs to be installed with + #: ``pip install blosc2-btune`` + BTUNE = 32 + + +class FPAccuracy(Enum): + """ + Floating point accuracy modes for Blosc2 computing with lazy expressions. + + This is only relevant when using floating point dtypes with miniexpr. + """ + + #: Use 1.0 ULPs (Units in the Last Place) for floating point functions + HIGH = 1 + #: Use 3.5 ULPs (Units in the Last Place) for floating point functions + MEDIUM = 2 + #: Use default accuracy. This is MEDIUM, which should be enough for most applications. + DEFAULT = MEDIUM + + +from .blosc2_ext import ( + DEFINED_CODECS_STOP, + EXTENDED_HEADER_LENGTH, + GLOBAL_REGISTERED_CODECS_STOP, + MAX_BLOCKSIZE, + MAX_BUFFERSIZE, + MAX_DIM, + MAX_OVERHEAD, + MAX_TYPESIZE, + MIN_HEADER_LENGTH, + USER_REGISTERED_CODECS_STOP, + VERSION_DATE, + VERSION_STRING, +) + +DEFINED_CODECS_STOP = DEFINED_CODECS_STOP +""" +Maximum possible Blosc2-defined codec id.""" + +GLOBAL_REGISTERED_CODECS_STOP = GLOBAL_REGISTERED_CODECS_STOP +""" +Maximum possible Blosc2 global registered codec id.""" + +USER_REGISTERED_CODECS_STOP = USER_REGISTERED_CODECS_STOP +""" +Maximum possible Blosc2 user registered codec id.""" + +EXTENDED_HEADER_LENGTH = EXTENDED_HEADER_LENGTH +""" +Blosc2 extended header length in bytes.""" + +MAX_BUFFERSIZE = MAX_BUFFERSIZE +""" +Maximum buffer size in bytes for a Blosc2 chunk.""" + +MAX_FAST_PATH_SIZE = 2**30 +""" +Maximum size in bytes for a fast path evaluation. +""" + +MAX_OVERHEAD = MAX_OVERHEAD +""" +Maximum overhead during compression (in bytes). This is +equal to :py:obj:`blosc2.EXTENDED_HEADER_LENGTH `.""" + +MAX_TYPESIZE = MAX_TYPESIZE +""" +Blosc2 maximum type size (in bytes).""" + +MIN_HEADER_LENGTH = MIN_HEADER_LENGTH +""" +Blosc2 minimum header length (in bytes).""" + +VERSION_DATE = VERSION_DATE +""" +The C-Blosc2 version's date.""" + +VERSION_STRING = VERSION_STRING +""" +The C-Blosc2 version's string.""" + +if IS_WASM: + from ._wasm_jit import init_wasm_jit_helpers + + _WASM_MINIEXPR_ENABLED = init_wasm_jit_helpers() + + +# For array-api compatibility +iinfo = np.iinfo +finfo = np.finfo + + +def isdtype(a_dtype: np.dtype, kind: str | np.dtype | tuple): + """ + Returns a boolean indicating whether a provided dtype is of a specified data type "kind". + + Parameters + ---------- + dtype: dtype + The input dtype. + + kind: str | dtype | Tuple[str, dtype] + Data type kind. + + If kind is a dtype, return boolean indicating whether the input dtype is equal to the dtype specified by kind. + + If kind is a string, return boolean indicating whether the input dtype is of a specified data type kind. + The following dtype kinds are supporte: + + * 'bool': boolean data types (e.g., bool). + + * 'signed integer': signed integer data types (e.g., int8, int16, int32, int64). + + * 'unsigned integer': unsigned integer data types (e.g., uint8, uint16, uint32, uint64). + + * 'integral': integer data types. Shorthand for ('signed integer', 'unsigned integer'). + + * 'real floating': real-valued floating-point data types (e.g., float32, float64). + + * 'complex floating': complex floating-point data types (e.g., complex64, complex128). + + * 'numeric': numeric data types. Shorthand for ('integral', 'real floating', 'complex floating'). + + Returns + ------- + out: bool + Boolean indicating whether a provided dtype is of a specified data type kind. + """ + kind = (kind,) if not isinstance(kind, tuple) else kind + for _ in kind: + if a_dtype == kind: + return True + + _complex, _signedint, _uint, _rfloat = False, False, False, False + if a_dtype in (complex64, complex128): + _complex = True + if "complex floating" in kind: + return True + if a_dtype == bool_ and "bool" in kind: + return True + if a_dtype in (int8, int16, int32, int64): + _signedint = True + if "signed integer" in kind: + return True + if a_dtype in (uint8, uint16, uint32, uint64): + _uint = True + if "unsigned integer" in kind: + return True + if a_dtype in (float16, float32, float64): + _rfloat = True + if "real floating" in kind: + return True + if "integral" in kind and (_signedint or _uint): + return True + return "numeric" in kind and ( + _signedint or _uint or _rfloat or _complex + ) # checked everything, otherwise False + + +# dtypes for array-api +str_ = np.str_ +bytes_ = np.bytes_ +object_ = np.object_ + +from numpy import ( + bool_, + complex64, + complex128, + e, + euler_gamma, + float16, + float32, + float64, + inf, + int8, + int16, + int32, + int64, + nan, + newaxis, + pi, + uint8, + uint16, + uint32, + uint64, +) + +bool = bool + +DEFAULT_COMPLEX = complex128 +""" +Default complex floating dtype.""" + +DEFAULT_FLOAT = float64 +""" +Default real floating dtype.""" + +DEFAULT_INT = int64 +""" +Default integer dtype.""" + +DEFAULT_INDEX = int64 +""" +Default indexing dtype.""" + + +class Info: + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + +def __array_namespace_info__() -> Info: + """ + Return information about the array namespace following the Array API specification. + """ + + def _raise(exc): + raise exc + + return Info( + capabilities=lambda: { + "boolean indexing": True, + "data-dependent shapes": False, + "max dimensions": MAX_DIM, + }, + default_device=lambda: "cpu", + default_dtypes=lambda device=None: ( + { + "real floating": DEFAULT_FLOAT, + "complex floating": DEFAULT_COMPLEX, + "integral": DEFAULT_INT, + "indexing": DEFAULT_INDEX, + } + if (device == "cpu" or device is None) + else _raise(ValueError("Only cpu devices allowed")) + ), + dtypes=lambda device=None, kind=None: ( + np.__array_namespace_info__().dtypes(kind=kind, device=device) + if (device == "cpu" or device is None) + else _raise(ValueError("Only cpu devices allowed")) + ), + devices=lambda: ["cpu"], + name="blosc2", + version=__version__, + ) + + +# Public API for container module +from .core import ( + clib_info, + compress, + compress2, + compressor_list, + compute_chunks_blocks, + decompress, + decompress2, + detect_number_of_cores, + free_resources, + from_cframe, + get_blocksize, + get_cbuffer_sizes, + get_clib, + get_compressor, + get_cpu_info, + load_array, + load_tensor, + ndarray_from_cframe, + pack, + pack_array, + pack_array2, + pack_tensor, + print_versions, + register_codec, + register_filter, + remove_urlpath, + save_array, + save_tensor, + schunk_from_cframe, + set_blocksize, + set_compressor, + set_nthreads, + set_releasegil, + unpack, + unpack_array, + unpack_array2, + unpack_tensor, +) + +# Internal Blosc threading +# Get CPU info +cpu_info = get_cpu_info() +nthreads = ncores = cpu_info.get("count", 1) +"""Number of threads to be used in compression/decompression. +""" +# Protection against too many threads +nthreads = min(nthreads, 64) + +if IS_WASM: + nthreads = 1 + # Keep C-side runtime in sync with Python-level default in wasm32. + set_nthreads(1) +else: + # Experiments say that, when using a large number of threads, it is better to not use them all + if nthreads > 16: + nthreads -= nthreads // 8 + # Only call set_num_threads if within NUMEXPR_MAX_THREADS limit to avoid warning + numexpr_max_env = os.environ.get("NUMEXPR_MAX_THREADS") + numexpr_max: int | None = None + if numexpr_max_env is not None: + with contextlib.suppress(ValueError): + numexpr_max = int(numexpr_max_env) + if numexpr_max is None or nthreads <= numexpr_max: + numexpr.set_num_threads(nthreads) + +# This import must be before ndarray and schunk +from .storage import ( # noqa: I001 + CParams, + cparams_dflts, + DParams, + dparams_dflts, + Storage, + storage_dflts, +) + +from .ndarray import ( + Array, + NDArray, + NDField, + Operand, + are_partitions_aligned, + are_partitions_behaved, + arange, + broadcast_to, + linspace, + eye, + asarray, + astype, + indices, + sort, + reshape, + copy, + concat, + expand_dims, + empty, + empty_like, + frombuffer, + fromiter, + get_slice_nchunks, + meshgrid, + nans, + uninit, + zeros, + zeros_like, + ones, + ones_like, + full, + full_like, + save, + stack, +) +from .embed_store import EmbedStore, estore_from_cframe +from .dict_store import DictStore +from .tree_store import TreeStore + +from .c2array import c2context, C2Array, URLPath + +from .dsl_kernel import DSLSyntaxError, DSLKernel, dsl_kernel, validate_dsl +from .lazyexpr import ( + LazyExpr, + lazyudf, + lazyexpr, + LazyArray, + LazyUDF, + _open_lazyarray, + get_expr_operands, + validate_expr, + evaluate, + result_type, + can_cast, +) +from .proxy import Proxy, ProxySource, ProxyNDSource, ProxyNDField, SimpleProxy, jit, as_simpleproxy + +from .schunk import SChunk, open +from . import linalg +from .linalg import tensordot, vecdot, permute_dims, matrix_transpose, matmul, transpose, diagonal, outer +from .utils import linalg_funcs as linalg_funcs_list +from . import fft + +# Registry for postfilters +postfilter_funcs = {} +""" +Registry for postfilter functions. For more info see + :func:`SChunk.postfilter `""" +# Registry for prefilters +prefilter_funcs = {} +""" +Registry for prefilter functions. For more info see + :func:`SChunk.prefilter `""" + +# Registry for user-defined codecs +ucodecs_registry = {} +""" +Registry for user-defined codecs. For more info see + :func:`blosc2.register_codec `""" +# Registry for user-defined filters +ufilters_registry = {} +""" +Registry for user-defined filters. For more info see + :func:`blosc2.register_filter `""" + +blosclib_version = f"{VERSION_STRING} ({VERSION_DATE})" +""" +The blosc2 version + date. +""" + +# Private global variables +_disable_overloaded_equal = False +""" +Disable the overloaded equal operator. +""" + +# Delayed imports for avoiding overwriting of python builtins +from .ndarray import ( + abs, + acos, + acosh, + add, + all, + any, + arccos, + arccosh, + arcsin, + arcsinh, + arctan, + arctan2, + arctanh, + argmax, + argmin, + array_from_ffi_ptr, + asin, + asinh, + atan, + atan2, + atanh, + bitwise_and, + bitwise_invert, + bitwise_left_shift, + bitwise_or, + bitwise_right_shift, + bitwise_xor, + ceil, + clip, + conj, + contains, + copysign, + cos, + cosh, + count_nonzero, + cumulative_prod, + cumulative_sum, + divide, + endswith, + equal, + exp, + expm1, + floor, + floor_divide, + greater, + greater_equal, + hypot, + imag, + isfinite, + isinf, + isnan, + lazywhere, + less, + less_equal, + log, + log1p, + log2, + log10, + logaddexp, + logical_and, + logical_not, + logical_or, + logical_xor, + lower, + max, + maximum, + mean, + min, + minimum, + multiply, + negative, + nextafter, + not_equal, + positive, + pow, + prod, + real, + reciprocal, + remainder, + round, + sign, + signbit, + sin, + sinh, + sqrt, + square, + squeeze, + startswith, + std, + subtract, + sum, + take, + take_along_axis, + tan, + tanh, + trunc, + upper, + var, + where, +) + +__all__ = [ # noqa : RUF022 + # Constants + "EXTENDED_HEADER_LENGTH", + "MAX_BUFFERSIZE", + "MAX_TYPESIZE", + "MIN_HEADER_LENGTH", + "VERSION_DATE", + "VERSION_STRING", + # Default dtypes + "DEFAULT_COMPLEX", + "DEFAULT_FLOAT", + "DEFAULT_INDEX", + "DEFAULT_INT", + # Mathematical constants + "e", + "pi", + "inf", + "nan", + "newaxis", + # Classes + "C2Array", + "CParams", + # Enums + "Codec", + "DParams", + "DictStore", + "EmbedStore", + "Filter", + "LazyArray", + "DSLKernel", + "DSLSyntaxError", + "LazyExpr", + "LazyUDF", + "NDArray", + "NDField", + "Operand", + "Proxy", + "ProxyNDField", + "ProxyNDSource", + "ProxySource", + "SChunk", + "SimpleProxy", + "SpecialValue", + "SplitMode", + "Storage", + "TreeStore", + "Tuner", + "URLPath", + # Version + "__version__", + # Utils + "linalg_funcs_list", + # Functions + "abs", + "acos", + "acosh", + "add", + "all", + "any", + "arange", + "arccos", + "arccosh", + "arcsin", + "arcsinh", + "arctan", + "arctan2", + "arctanh", + "are_partitions_aligned", + "are_partitions_behaved", + "argmax", + "argmin", + "array_from_ffi_ptr", + "asarray", + "asin", + "asinh", + "as_simpleproxy", + "astype", + "atan", + "atan2", + "atanh", + "bitwise_and", + "bitwise_invert", + "bitwise_left_shift", + "bitwise_or", + "bitwise_right_shift", + "bitwise_xor", + "broadcast_to", + "can_cast", + "ceil", + "clib_info", + "clip", + "compress", + "compress2", + "compressor_list", + "compute_chunks_blocks", + "concat", + "conj", + "contains", + "copy", + "copysign", + "cos", + "cosh", + "count_nonzero", + "cparams_dflts", + "cpu_info", + "cumulative_prod", + "cumulative_sum", + "decompress", + "decompress2", + "detect_number_of_cores", + "divide", + "dparams_dflts", + "endswith", + "empty", + "empty_like", + "equal", + "estore_from_cframe", + "exp", + "expand_dims", + "expm1", + "eye", + "finfo", + "floor", + "floor_divide", + "free_resources", + "from_cframe", + "frombuffer", + "fromiter", + "full", + "full_like", + "get_blocksize", + "get_cbuffer_sizes", + "get_clib", + "get_compressor", + "get_cpu_info", + "get_expr_operands", + "get_slice_nchunks", + "greater", + "greater_equal", + "hypot", + "imag", + "iinfo", + "indices", + "isdtype", + "isfinite", + "isinf", + "isnan", + "jit", + "lazyexpr", + "dsl_kernel", + "validate_dsl", + "lazyudf", + "lazywhere", + "less", + "less_equal", + "linspace", + "load_array", + "load_tensor", + "log", + "log1p", + "log2", + "log10", + "logaddexp", + "logical_and", + "logical_not", + "logical_or", + "logical_xor", + "lower", + "matmul", + "matrix_transpose", + "max", + "maximum", + "mean", + "meshgrid", + "min", + "minimum", + "multiply", + "nans", + "ndarray_from_cframe", + "negative", + "nextafter", + "not_equal", + "ones", + "ones_like", + "open", + "pack", + "pack_array", + "pack_array2", + "pack_tensor", + "permute_dims", + "positive", + "postfilter_funcs", + "pow", + "prefilter_funcs", + "print_versions", + "prod", + "real", + "reciprocal", + "register_codec", + "register_filter", + "remainder", + "remove_urlpath", + "reshape", + "result_type", + "round", + "save", + "save_array", + "save_tensor", + "schunk_from_cframe", + "set_blocksize", + "set_compressor", + "set_nthreads", + "set_releasegil", + "sign", + "signbit", + "sin", + "sinh", + "sort", + "sqrt", + "square", + "squeeze", + "stack", + "startswith", + "std", + "storage_dflts", + "subtract", + "sum", + "take", + "take_along_axis", + "tan", + "tanh", + "tensordot", + "transpose", + "trunc", + "uninit", + "unpack", + "unpack_array", + "unpack_array2", + "unpack_tensor", + "upper", + "validate_expr", + "var", + "vecdot", + "where", + "zeros", + "zeros_like", +] diff --git a/venv/Lib/site-packages/blosc2/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..f92150e Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/_wasm_jit.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/_wasm_jit.cpython-311.pyc new file mode 100644 index 0000000..f20e929 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/_wasm_jit.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/c2array.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/c2array.cpython-311.pyc new file mode 100644 index 0000000..873a43b Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/c2array.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/core.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/core.cpython-311.pyc new file mode 100644 index 0000000..c39dbc6 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/core.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/dict_store.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/dict_store.cpython-311.pyc new file mode 100644 index 0000000..26680a2 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/dict_store.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/dsl_kernel.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/dsl_kernel.cpython-311.pyc new file mode 100644 index 0000000..79ab3ea Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/dsl_kernel.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/embed_store.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/embed_store.cpython-311.pyc new file mode 100644 index 0000000..e1af435 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/embed_store.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/exceptions.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/exceptions.cpython-311.pyc new file mode 100644 index 0000000..45530ba Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/exceptions.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/fft.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/fft.cpython-311.pyc new file mode 100644 index 0000000..96143fa Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/fft.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/info.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/info.cpython-311.pyc new file mode 100644 index 0000000..ad6f083 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/info.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/lazyexpr.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/lazyexpr.cpython-311.pyc new file mode 100644 index 0000000..db33ba0 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/lazyexpr.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/linalg.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/linalg.cpython-311.pyc new file mode 100644 index 0000000..fd9c804 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/linalg.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/ndarray.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/ndarray.cpython-311.pyc new file mode 100644 index 0000000..2e7f375 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/ndarray.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/proxy.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/proxy.cpython-311.pyc new file mode 100644 index 0000000..e0e63ee Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/proxy.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/schunk.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/schunk.cpython-311.pyc new file mode 100644 index 0000000..1659aea Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/schunk.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/storage.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/storage.cpython-311.pyc new file mode 100644 index 0000000..867aced Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/storage.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/tree_store.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/tree_store.cpython-311.pyc new file mode 100644 index 0000000..5901a76 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/tree_store.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/utils.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000..32f35f0 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/utils.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/__pycache__/version.cpython-311.pyc b/venv/Lib/site-packages/blosc2/__pycache__/version.cpython-311.pyc new file mode 100644 index 0000000..785c5e1 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/__pycache__/version.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/blosc2/_wasm_jit.py b/venv/Lib/site-packages/blosc2/_wasm_jit.py new file mode 100644 index 0000000..54f0841 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/_wasm_jit.py @@ -0,0 +1,627 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from __future__ import annotations + +import os +from pathlib import Path + +_HELPERS_REGISTERED = False + +_REGISTER_HELPERS_JS = r""" +(() => { + const g = globalThis; + if (g.__blosc2_me_jit_helper_ptrs) { + return g.__blosc2_me_jit_helper_ptrs; + } + + const candidates = []; + const addCandidate = (name, obj) => { + if (!obj || (typeof obj !== "object" && typeof obj !== "function")) { + return; + } + candidates.push({ name, obj }); + }; + const addDerivedCandidates = (baseName, obj) => { + if (!obj || (typeof obj !== "object" && typeof obj !== "function")) { + return; + } + addCandidate(`${baseName}._module`, obj._module); + addCandidate(`${baseName}.module`, obj.module); + addCandidate(`${baseName}.Module`, obj.Module); + addCandidate(`${baseName}.asm`, obj.asm); + addCandidate(`${baseName}.wasmExports`, obj.wasmExports); + addCandidate(`${baseName}.wasm`, obj.wasm); + addCandidate(`${baseName}.__wasm`, obj.__wasm); + addCandidate(`${baseName}.pyodide`, obj.pyodide); + addCandidate(`${baseName}._api`, obj._api); + }; + + addCandidate("globalThis", g); + addCandidate("globalThis.Module", g.Module); + addCandidate("globalThis.__blosc2_pyodide_module", g.__blosc2_pyodide_module); + addCandidate("globalThis.__blosc2_pyodide_api", g.__blosc2_pyodide_api); + addCandidate("globalThis.pyodide", g.pyodide); + addCandidate("globalThis.pyodide._module", g.pyodide && g.pyodide._module); + addCandidate("globalThis.pyodide.module", g.pyodide && g.pyodide.module); + addCandidate("globalThis.pyodide.Module", g.pyodide && g.pyodide.Module); + addCandidate("globalThis.pyodide._api", g.pyodide && g.pyodide._api); + addCandidate("globalThis.pyodide._api._module", g.pyodide && g.pyodide._api && g.pyodide._api._module); + addCandidate("globalThis.pyodide._api.Module", g.pyodide && g.pyodide._api && g.pyodide._api.Module); + addDerivedCandidates("globalThis", g); + addDerivedCandidates("globalThis.pyodide", g.pyodide); + addDerivedCandidates("globalThis.__blosc2_pyodide_module", g.__blosc2_pyodide_module); + addDerivedCandidates("globalThis.__blosc2_pyodide_api", g.__blosc2_pyodide_api); + + const resolve = (name) => { + for (const cand of candidates) { + let value; + try { + value = cand.obj[name]; + } catch (_e) { + value = undefined; + } + if (value !== undefined && value !== null) { + if (typeof value === "function") { + return value.bind(cand.obj); + } + return value; + } + } + if (g[name] !== undefined && g[name] !== null) { + return g[name]; + } + return null; + }; + + const wasmExports = resolve("wasmExports") || resolve("exports"); + const asmObj = resolve("asm"); + + const isWasmMemory = (value) => + typeof WebAssembly !== "undefined" && + typeof WebAssembly.Memory !== "undefined" && + value instanceof WebAssembly.Memory; + const isWasmTable = (value) => + typeof WebAssembly !== "undefined" && + typeof WebAssembly.Table !== "undefined" && + value instanceof WebAssembly.Table; + const heapU8ForProbe = resolve("HEAPU8"); + const heapBufferForProbe = heapU8ForProbe && heapU8ForProbe.buffer ? heapU8ForProbe.buffer : null; + const heapBufferLenForProbe = + heapBufferForProbe && typeof heapBufferForProbe.byteLength === "number" + ? heapBufferForProbe.byteLength + : -1; + + const isMemoryLike = (value) => { + if (!value) { + return false; + } + if (isWasmMemory(value)) { + return true; + } + let buf = null; + try { + buf = value.buffer; + } catch (_e) { + buf = null; + } + if (!buf || typeof buf.byteLength !== "number") { + return false; + } + if (typeof value.grow !== "function") { + return false; + } + if (heapBufferForProbe && buf !== heapBufferForProbe) { + const bufLen = typeof buf.byteLength === "number" ? buf.byteLength : -1; + if (heapBufferLenForProbe > 0 && bufLen > 0 && bufLen < heapBufferLenForProbe) { + return false; + } + } + return true; + }; + + const isTableLike = (value) => { + if (!value) { + return false; + } + if (isWasmTable(value)) { + return true; + } + return ( + typeof value.get === "function" && + typeof value.grow === "function" && + typeof value.length === "number" + ); + }; + + const findMemoryOrTableByType = (wantMemory) => { + const isObj = (v) => v && (typeof v === "object" || typeof v === "function"); + const seen = new Set(); + const queue = []; + const maxDepth = 6; + const maxVisited = 5000; + + for (const cand of candidates) { + if (isObj(cand.obj)) { + queue.push({ value: cand.obj, depth: 0 }); + } + } + + while (queue.length > 0 && seen.size < maxVisited) { + const node = queue.shift(); + const obj = node.value; + const depth = node.depth; + if (!isObj(obj) || seen.has(obj)) { + continue; + } + seen.add(obj); + + if (wantMemory && isMemoryLike(obj)) { + return obj; + } + if (!wantMemory && isTableLike(obj)) { + return obj; + } + if (depth >= maxDepth) { + continue; + } + + let keys = []; + try { + keys = Object.getOwnPropertyNames(obj); + } catch (_e) { + keys = []; + } + let symKeys = []; + try { + symKeys = Object.getOwnPropertySymbols(obj); + } catch (_e) { + symKeys = []; + } + const allKeys = keys.concat(symKeys); + + for (const key of allKeys) { + let value; + try { + value = obj[key]; + } catch (_e) { + continue; + } + + if (wantMemory && isMemoryLike(value)) { + return value; + } + if (!wantMemory && isTableLike(value)) { + return value; + } + if (isObj(value)) { + if (wantMemory && isMemoryLike(value.memory)) { + return value.memory; + } + if (!wantMemory && isTableLike(value.__indirect_function_table)) { + return value.__indirect_function_table; + } + queue.push({ value, depth: depth + 1 }); + } + } + + let proto = null; + try { + proto = Object.getPrototypeOf(obj); + } catch (_e) { + proto = null; + } + if (isObj(proto)) { + queue.push({ value: proto, depth: depth + 1 }); + } + } + + return null; + }; + + const captureMemoryViaGrowHook = () => { + if ( + typeof WebAssembly === "undefined" || + typeof WebAssembly.Memory === "undefined" || + !WebAssembly.Memory.prototype || + typeof WebAssembly.Memory.prototype.grow !== "function" + ) { + return null; + } + + const growMemory = resolve("growMemory"); + const resizeHeap = resolve("_emscripten_resize_heap"); + if (typeof growMemory !== "function" && typeof resizeHeap !== "function") { + return null; + } + + const heapU8 = resolve("HEAPU8"); + const currentBytes = + heapU8 && heapU8.buffer && typeof heapU8.buffer.byteLength === "number" + ? heapU8.buffer.byteLength + : 0; + if (currentBytes <= 0) { + return null; + } + const onePage = 64 * 1024; + let targetBytes = currentBytes + onePage; + const getHeapMax = resolve("getHeapMax"); + if (typeof getHeapMax === "function") { + try { + const maxBytes = getHeapMax(); + if (typeof maxBytes === "number" && maxBytes > 0) { + targetBytes = Math.min(targetBytes, maxBytes); + } + } catch (_e) { + /* ignore */ + } + } + if (targetBytes <= currentBytes) { + return null; + } + + let captured = null; + const originalGrow = WebAssembly.Memory.prototype.grow; + WebAssembly.Memory.prototype.grow = function patchedGrow(pages) { + captured = this; + return originalGrow.call(this, pages); + }; + + try { + if (typeof growMemory === "function") { + growMemory(targetBytes); + } else if (typeof resizeHeap === "function") { + resizeHeap(targetBytes); + } + } catch (_e) { + /* best effort only */ + } finally { + WebAssembly.Memory.prototype.grow = originalGrow; + } + + if (captured && isMemoryLike(captured)) { + return captured; + } + return null; + }; + + const deriveRuntimeFromAdjustedImports = () => { + for (const cand of candidates) { + const obj = cand.obj; + if (!obj || typeof obj.adjustWasmImports !== "function") { + continue; + } + try { + const importsObj = { env: {} }; + const adjustedMaybe = obj.adjustWasmImports(importsObj); + const adjusted = + adjustedMaybe && (typeof adjustedMaybe === "object" || typeof adjustedMaybe === "function") + ? adjustedMaybe + : importsObj; + const env = + (adjusted && adjusted.env) || + (importsObj && importsObj.env) || + null; + if (!env) { + continue; + } + const mem = + env.memory || + env.wasmMemory || + (adjusted && (adjusted.memory || adjusted.wasmMemory)) || + null; + const tbl = + env.__indirect_function_table || + env.wasmTable || + (adjusted && (adjusted.__indirect_function_table || adjusted.wasmTable)) || + null; + if (mem || tbl) { + return { memory: mem, table: tbl }; + } + } catch (_e) { + continue; + } + } + return null; + }; + + const adjustedRuntime = deriveRuntimeFromAdjustedImports(); + + const wasmMemory = + resolve("wasmMemory") || + resolve("memory") || + resolve("wasmMemoryObject") || + resolve("__wasmMemory") || + (asmObj && asmObj.memory) || + (asmObj && asmObj.wasmMemory) || + (wasmExports && wasmExports.memory) || + (adjustedRuntime && adjustedRuntime.memory) || + captureMemoryViaGrowHook() || + findMemoryOrTableByType(true) || + null; + const wasmTable = + resolve("wasmTable") || + resolve("__indirect_function_table") || + (asmObj && asmObj.__indirect_function_table) || + (asmObj && asmObj.wasmTable) || + (wasmExports && wasmExports.__indirect_function_table) || + (adjustedRuntime && adjustedRuntime.table) || + findMemoryOrTableByType(false) || + null; + const runtime = { + HEAPF32: resolve("HEAPF32"), + HEAPF64: resolve("HEAPF64"), + HEAPU8: heapU8ForProbe, + wasmMemory, + wasmTable, + addFunction: resolve("addFunction"), + removeFunction: resolve("removeFunction"), + stackSave: resolve("stackSave"), + stackAlloc: resolve("stackAlloc"), + stackRestore: resolve("stackRestore"), + lengthBytesUTF8: resolve("lengthBytesUTF8"), + stringToUTF8: resolve("stringToUTF8"), + err: resolve("err"), + }; + + const required = [ + "HEAPF32", + "HEAPF64", + "HEAPU8", + "wasmMemory", + "wasmTable", + "addFunction", + "removeFunction", + "stackSave", + "stackAlloc", + "stackRestore", + "lengthBytesUTF8", + "stringToUTF8", + ]; + const missing = required.filter((name) => !runtime[name]); + if (missing.length > 0) { + const aliasKeys = [ + "wasmMemory", + "memory", + "wasmExports", + "asm", + "__indirect_function_table", + "wasmTable", + "adjustWasmImports", + ]; + const keyRegex = /(mem|wasm|asm|module|heap)/i; + const diag = candidates.map((cand) => { + const have = required.filter((name) => { + try { + return !!cand.obj[name]; + } catch (_e) { + return false; + } + }); + const aliases = aliasKeys.filter((name) => { + try { + return cand.obj[name] !== undefined && cand.obj[name] !== null; + } catch (_e) { + return false; + } + }); + let ownKeys = []; + try { + ownKeys = Object.getOwnPropertyNames(cand.obj); + } catch (_e) { + ownKeys = []; + } + const interesting = ownKeys.filter((k) => keyRegex.test(k)).slice(0, 20); + return `${cand.name}=[${have.join(",")}],aliases=[${aliases.join(",")}],keys=[${interesting.join(",")}]`; + }).join(" | "); + return { + instantiatePtr: 0, + freePtr: 0, + error: `missing runtime members: ${missing.join(", ")}; candidates: ${diag}`, + }; + } + + if (typeof g._meJitInstantiate !== "function" || typeof g._meJitFreeFn !== "function") { + return { instantiatePtr: 0, freePtr: 0, error: "me_jit_glue exports unavailable" }; + } + + const refreshRuntimeViews = () => { + const updater = resolve("updateMemoryViews"); + if (typeof updater === "function") { + try { + updater(); + } catch (_e) { + /* best effort only */ + } + runtime.HEAPU8 = resolve("HEAPU8") || runtime.HEAPU8; + runtime.HEAPF32 = resolve("HEAPF32") || runtime.HEAPF32; + runtime.HEAPF64 = resolve("HEAPF64") || runtime.HEAPF64; + } + + const mem = runtime.wasmMemory; + const buffer = mem && mem.buffer ? mem.buffer : null; + if (!buffer || typeof buffer.byteLength !== "number" || buffer.byteLength === 0) { + return null; + } + + const heapU8 = runtime.HEAPU8; + if (!heapU8 || heapU8.buffer !== buffer || heapU8.byteLength === 0) { + runtime.HEAPU8 = new Uint8Array(buffer); + } + const heapF32 = runtime.HEAPF32; + if (!heapF32 || heapF32.buffer !== buffer || heapF32.byteLength === 0) { + runtime.HEAPF32 = new Float32Array(buffer); + } + const heapF64 = runtime.HEAPF64; + if (!heapF64 || heapF64.buffer !== buffer || heapF64.byteLength === 0) { + runtime.HEAPF64 = new Float64Array(buffer); + } + + return runtime.HEAPU8; + }; + + const instantiateWrapper = (wasmPtr, wasmLen, bridgeLookupFnIdx) => { + const start = wasmPtr >>> 0; + const len = wasmLen >>> 0; + if (start === 0 || len === 0) { + return 0; + } + const heapU8 = refreshRuntimeViews(); + if (!heapU8) { + return 0; + } + const end = (start + len) >>> 0; + if (end > heapU8.byteLength || end < start) { + return 0; + } + const wasmBytes = new Uint8Array(len); + wasmBytes.set(heapU8.subarray(start, end)); + return g._meJitInstantiate(runtime, wasmBytes, bridgeLookupFnIdx | 0) | 0; + }; + const freeWrapper = (fnIdx) => { + g._meJitFreeFn(runtime, fnIdx | 0); + }; + + const instantiatePtr = runtime.addFunction(instantiateWrapper, "iiii"); + const freePtr = runtime.addFunction(freeWrapper, "vi"); + g.__blosc2_me_jit_helper_ptrs = { + instantiatePtr, + freePtr, + instantiateWrapper, + freeWrapper, + runtime, + }; + return g.__blosc2_me_jit_helper_ptrs; +})() +""" + + +def _trace_enabled() -> bool: + value = os.environ.get("ME_DSL_TRACE", "") + return value.lower() in {"1", "true", "on", "yes"} + + +def _trace(message: str) -> None: + if _trace_enabled(): + print(f"[blosc2.wasm-jit] {message}") + + +def _js_eval(js_mod, source: str): + evaluator = getattr(js_mod, "eval", None) + if evaluator is not None: + return evaluator(source) + return js_mod.globalThis.eval(source) + + +def _load_glue_once(js_mod) -> bool: + has_exports = _js_eval( + js_mod, + "typeof globalThis._meJitInstantiate === 'function' && " + "typeof globalThis._meJitFreeFn === 'function'", + ) + if bool(has_exports): + return True + + glue_path = Path(__file__).with_name("me_jit_glue.js") + try: + glue_source = glue_path.read_text(encoding="utf-8") + except OSError as exc: + _trace(f"could not read {glue_path.name}: {exc}") + return False + + try: + _js_eval(js_mod, glue_source) + except Exception as exc: # pragma: no cover - pyodide-specific error path + _trace(f"failed to evaluate {glue_path.name}: {exc}") + return False + + has_exports = _js_eval( + js_mod, + "typeof globalThis._meJitInstantiate === 'function' && " + "typeof globalThis._meJitFreeFn === 'function'", + ) + return bool(has_exports) + + +def _inject_pyodide_runtime_handles(js_mod) -> None: + try: + import pyodide_js + except ImportError: + return + + module_obj = None + for name in ("_module", "module", "Module"): + module_obj = getattr(pyodide_js, name, None) + if module_obj is not None: + break + if module_obj is not None: + js_mod.globalThis.__blosc2_pyodide_module = module_obj + _trace("captured pyodide_js module handle") + + api_obj = getattr(pyodide_js, "_api", None) + if api_obj is not None: + js_mod.globalThis.__blosc2_pyodide_api = api_obj + _trace("captured pyodide_js API handle") + + +def _create_helper_ptrs(js_mod) -> tuple[int, int] | None: + try: + result = _js_eval(js_mod, _REGISTER_HELPERS_JS) + except Exception as exc: # pragma: no cover - pyodide-specific error path + _trace(f"helper setup JS failed: {exc}") + return None + + try: + instantiate_ptr = int(result.instantiatePtr) + free_ptr = int(result.freePtr) + except Exception as exc: # pragma: no cover - pyodide-specific error path + _trace(f"unexpected helper setup result: {exc}") + return None + + if instantiate_ptr == 0 or free_ptr == 0: + with_error = getattr(result, "error", None) + if with_error: + _trace(str(with_error)) + return None + return instantiate_ptr, free_ptr + + +def init_wasm_jit_helpers() -> bool: + global _HELPERS_REGISTERED + if _HELPERS_REGISTERED: + return True + + try: + import js + except ImportError: + return False + + from . import blosc2_ext + + if not hasattr(blosc2_ext, "_register_wasm_jit_helpers"): + _trace("extension does not expose _register_wasm_jit_helpers") + return False + + _inject_pyodide_runtime_handles(js) + if not _load_glue_once(js): + _trace("me_jit_glue.js was not loaded") + return False + + helper_ptrs = _create_helper_ptrs(js) + if helper_ptrs is None: + _trace("could not allocate addFunction helper pointers") + return False + + instantiate_ptr, free_ptr = helper_ptrs + try: + blosc2_ext._register_wasm_jit_helpers(instantiate_ptr, free_ptr) + except Exception as exc: # pragma: no cover - pyodide-specific error path + _trace(f"C helper registration failed: {exc}") + return False + _HELPERS_REGISTERED = True + _trace(f"registered wasm JIT helper pointers instantiate={instantiate_ptr} free={free_ptr}") + return True diff --git a/venv/Lib/site-packages/blosc2/blosc2_ext.cp311-win_amd64.pyd b/venv/Lib/site-packages/blosc2/blosc2_ext.cp311-win_amd64.pyd new file mode 100644 index 0000000..c496890 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/blosc2_ext.cp311-win_amd64.pyd differ diff --git a/venv/Lib/site-packages/blosc2/blosc2_ext.pyx b/venv/Lib/site-packages/blosc2/blosc2_ext.pyx new file mode 100644 index 0000000..c56104a --- /dev/null +++ b/venv/Lib/site-packages/blosc2/blosc2_ext.pyx @@ -0,0 +1,3546 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +#cython: language_level=3 + +import os +import ast +import atexit +import pathlib + +import _ctypes + +import cython +from cpython cimport ( + Py_buffer, + PyBUF_SIMPLE, + PyBuffer_Release, + PyBytes_FromStringAndSize, + PyObject_GetBuffer, +) +from cpython.ref cimport Py_INCREF, Py_DECREF +from cpython.pycapsule cimport PyCapsule_GetPointer, PyCapsule_New +from cython.operator cimport dereference +from libc.stdint cimport uintptr_t +from libc.stdlib cimport free, malloc, realloc, calloc +from libc.stdlib cimport abs as c_abs +from libc.string cimport memcpy, memset, strcpy, strdup, strlen +from libcpp cimport bool as c_bool + +from enum import Enum + +import numpy as np +from msgpack import packb, unpackb + +import blosc2 + +cimport numpy as np + +np.import_array() + + +cdef extern from "": + ctypedef signed char int8_t + ctypedef signed short int16_t + ctypedef signed int int32_t + ctypedef signed long int64_t + ctypedef unsigned char uint8_t + ctypedef unsigned short uint16_t + ctypedef unsigned int uint32_t + ctypedef unsigned long long uint64_t + +cdef extern from "": + int printf(const char *format, ...) nogil + +cdef extern from "blosc2.h": + + ctypedef enum: + BLOSC2_MAX_FILTERS + BLOSC2_DEFINED_FILTERS_START + BLOSC2_DEFINED_FILTERS_STOP + BLOSC2_GLOBAL_REGISTERED_FILTERS_START + BLOSC2_GLOBAL_REGISTERED_FILTERS_STOP + BLOSC2_GLOBAL_REGISTERED_FILTERS + BLOSC2_USER_REGISTERED_FILTERS_START + BLOSC2_USER_REGISTERED_FILTERS_STOP + BLOSC2_MAX_UDFILTERS + BLOSC2_MAX_METALAYERS + BLOSC2_MAX_VLMETALAYERS + BLOSC2_PREFILTER_INPUTS_MAX + BLOSC_MAX_CODECS + BLOSC_MIN_HEADER_LENGTH + BLOSC_EXTENDED_HEADER_LENGTH + BLOSC2_MAX_OVERHEAD + BLOSC2_MAX_BUFFERSIZE + BLOSC2_MAXBLOCKSIZE + BLOSC2_MAXTYPESIZE + BLOSC_MAX_TYPESIZE + BLOSC_MIN_BUFFERSIZE + + ctypedef enum: + BLOSC2_SPECIAL_ZERO + BLOSC2_SPECIAL_NAN + BLOSC2_SPECIAL_UNINIT + + ctypedef enum: + BLOSC2_VERSION_STRING + BLOSC2_VERSION_REVISION + BLOSC2_VERSION_DATE + + ctypedef enum: + BLOSC2_ERROR_SUCCESS + BLOSC2_ERROR_FAILURE + BLOSC2_ERROR_STREAM + BLOSC2_ERROR_DATA + BLOSC2_ERROR_MEMORY_ALLOC + BLOSC2_ERROR_READ_BUFFER + BLOSC2_ERROR_WRITE_BUFFER + BLOSC2_ERROR_CODEC_SUPPORT + BLOSC2_ERROR_CODEC_PARAM + BLOSC2_ERROR_CODEC_DICT + BLOSC2_ERROR_VERSION_SUPPORT + BLOSC2_ERROR_INVALID_HEADER + BLOSC2_ERROR_INVALID_PARAM + BLOSC2_ERROR_FILE_READ + BLOSC2_ERROR_FILE_WRITE + BLOSC2_ERROR_FILE_OPEN + BLOSC2_ERROR_NOT_FOUND + BLOSC2_ERROR_RUN_LENGTH + BLOSC2_ERROR_FILTER_PIPELINE + BLOSC2_ERROR_CHUNK_INSERT + BLOSC2_ERROR_CHUNK_APPEND + BLOSC2_ERROR_CHUNK_UPDATE + BLOSC2_ERROR_2GB_LIMIT + BLOSC2_ERROR_SCHUNK_COPY + BLOSC2_ERROR_FRAME_TYPE + BLOSC2_ERROR_FILE_TRUNCATE + BLOSC2_ERROR_THREAD_CREATE + BLOSC2_ERROR_POSTFILTER + BLOSC2_ERROR_FRAME_SPECIAL + BLOSC2_ERROR_SCHUNK_SPECIAL + BLOSC2_ERROR_PLUGIN_IO + BLOSC2_ERROR_FILE_REMOVE + + ctypedef enum: + BLOSC2_DEFINED_CODECS_START + BLOSC2_DEFINED_CODECS_STOP + BLOSC2_GLOBAL_REGISTERED_CODECS_START + BLOSC2_GLOBAL_REGISTERED_CODECS_STOP + BLOSC2_GLOBAL_REGISTERED_CODECS + BLOSC2_USER_REGISTERED_CODECS_START + BLOSC2_USER_REGISTERED_CODECS_STOP + + ctypedef enum: + BLOSC2_IO_FILESYSTEM + BLOSC2_IO_FILESYSTEM_MMAP + BLOSC_IO_LAST_BLOSC_DEFINED + BLOSC_IO_LAST_REGISTERED + + cdef int INT_MAX + + void blosc2_init() + void blosc2_destroy() + + int blosc1_compress(int clevel, int doshuffle, size_t typesize, + size_t nbytes, const void* src, void* dest, + size_t destsize) + + int blosc1_decompress(const void* src, void* dest, size_t destsize) + + int blosc1_getitem(const void* src, int start, int nitems, void* dest) + + int blosc2_getitem(const void* src, int32_t srcsize, int start, int nitems, + void* dest, int32_t destsize) + + ctypedef void(*blosc2_threads_callback)(void *callback_data, void (*dojob)(void *), int numjobs, + size_t jobdata_elsize, void *jobdata) + + void blosc2_set_threads_callback(blosc2_threads_callback callback, void *callback_data) + + int16_t blosc2_set_nthreads(int16_t nthreads) + + const char* blosc1_get_compressor() + + int blosc1_set_compressor(const char* compname) + + void blosc2_set_delta(int dodelta) + + int blosc2_compcode_to_compname(int compcode, const char** compname) + + int blosc2_compname_to_compcode(const char* compname) + + const char* blosc2_list_compressors() + + int blosc2_get_complib_info(const char* compname, char** complib, + char** version) + + int blosc2_free_resources() + + int blosc2_cbuffer_sizes(const void* cbuffer, int32_t* nbytes, + int32_t* cbytes, int32_t* blocksize) nogil + + int blosc1_cbuffer_validate(const void* cbuffer, size_t cbytes, size_t* nbytes) + + void blosc1_cbuffer_metainfo(const void* cbuffer, size_t* typesize, int* flags) + + void blosc1_cbuffer_versions(const void* cbuffer, int* version, int* versionlz) + + const char* blosc2_cbuffer_complib(const void* cbuffer) + + + ctypedef struct blosc2_context: + pass + + ctypedef struct blosc2_prefilter_params: + void* user_data + const uint8_t* input + uint8_t* output + int32_t output_size + int32_t output_typesize + int32_t output_offset + int64_t nchunk + int32_t nblock + int32_t tid + uint8_t* ttmp + size_t ttmp_nbytes + blosc2_context* ctx + c_bool output_is_disposable + + ctypedef struct blosc2_postfilter_params: + void *user_data + const uint8_t *input + uint8_t *output + int32_t size + int32_t typesize + int32_t offset + int64_t nchunk + int32_t nblock + int32_t tid + uint8_t *ttmp + size_t ttmp_nbytes + blosc2_context *ctx + + ctypedef int(*blosc2_prefilter_fn)(blosc2_prefilter_params* params) + + ctypedef int(*blosc2_postfilter_fn)(blosc2_postfilter_params *params) + + ctypedef struct blosc2_cparams: + uint8_t compcode + uint8_t compcode_meta + uint8_t clevel + int use_dict + int32_t typesize + int16_t nthreads + int32_t blocksize + int32_t splitmode + void *schunk + uint8_t filters[BLOSC2_MAX_FILTERS] + uint8_t filters_meta[BLOSC2_MAX_FILTERS] + blosc2_prefilter_fn prefilter + blosc2_prefilter_params* preparams + int tuner_id + void* tuner_params + c_bool instr_codec + void* codec_params + void* filter_params[BLOSC2_MAX_FILTERS] + + cdef const blosc2_cparams BLOSC2_CPARAMS_DEFAULTS + + ctypedef struct blosc2_dparams: + int16_t nthreads + void* schunk + blosc2_postfilter_fn postfilter + blosc2_postfilter_params *postparams + int32_t typesize + + cdef const blosc2_dparams BLOSC2_DPARAMS_DEFAULTS + + blosc2_context* blosc2_create_cctx(blosc2_cparams cparams) nogil + + blosc2_context* blosc2_create_dctx(blosc2_dparams dparams) nogil + + void blosc2_free_ctx(blosc2_context * context) nogil + + int blosc2_set_maskout(blosc2_context *ctx, c_bool *maskout, int nblocks) + + + int blosc2_compress(int clevel, int doshuffle, int32_t typesize, + const void * src, int32_t srcsize, void * dest, + int32_t destsize) nogil + + int blosc2_decompress(const void * src, int32_t srcsize, + void * dest, int32_t destsize) + + int blosc2_compress_ctx( + blosc2_context * context, const void * src, int32_t srcsize, void * dest, + int32_t destsize) nogil + + int blosc2_decompress_ctx(blosc2_context * context, const void * src, + int32_t srcsize, void * dest, int32_t destsize) nogil + + int blosc2_getitem_ctx(blosc2_context* context, const void* src, + int32_t srcsize, int start, int nitems, void* dest, + int32_t destsize) nogil + + + + ctypedef struct blosc2_storage: + c_bool contiguous + char* urlpath + blosc2_cparams* cparams + blosc2_dparams* dparams + blosc2_io *io + + cdef const blosc2_storage BLOSC2_STORAGE_DEFAULTS + + ctypedef struct blosc2_frame: + pass + + ctypedef struct blosc2_metalayer: + char* name + uint8_t* content + int32_t content_len + + + ctypedef struct blosc2_tuner: + void(*init)(void *config, blosc2_context*cctx, blosc2_context*dctx) + void (*next_blocksize)(blosc2_context *context) + void(*next_cparams)(blosc2_context *context) + void(*update)(blosc2_context *context, double ctime) + void (*free)(blosc2_context *context) + int id + char *name + + ctypedef struct blosc2_io: + uint8_t id + const char *name + void* params + + ctypedef struct blosc2_stdio_mmap: + const char* mode + int64_t initial_mapping_size + c_bool needs_free + + cdef const blosc2_stdio_mmap BLOSC2_STDIO_MMAP_DEFAULTS + + ctypedef struct blosc2_schunk: + uint8_t version + uint8_t compcode + uint8_t compcode_meta + uint8_t clevel + uint8_t splitmode + int32_t typesize + int32_t blocksize + int32_t chunksize + uint8_t filters[BLOSC2_MAX_FILTERS] + uint8_t filters_meta[BLOSC2_MAX_FILTERS] + int64_t nchunks + int64_t current_nchunk + int64_t nbytes + int64_t cbytes + uint8_t** data + size_t data_len + blosc2_storage* storage + blosc2_frame* frame + blosc2_context* cctx + blosc2_context* dctx + blosc2_metalayer *metalayers[BLOSC2_MAX_METALAYERS] + uint16_t nmetalayers + blosc2_metalayer *vlmetalayers[BLOSC2_MAX_VLMETALAYERS] + int16_t nvlmetalayers + int tuner_id + void *tuner_params + int8_t ndim + int64_t *blockshape + + blosc2_schunk *blosc2_schunk_new(blosc2_storage *storage) + blosc2_schunk *blosc2_schunk_copy(blosc2_schunk *schunk, blosc2_storage *storage) + blosc2_schunk *blosc2_schunk_from_buffer(uint8_t *cframe, int64_t len, c_bool copy) + blosc2_schunk *blosc2_schunk_open_offset(const char* urlpath, int64_t offset) + blosc2_schunk* blosc2_schunk_open_offset_udio(const char* urlpath, int64_t offset, const blosc2_io *udio) + + int64_t blosc2_schunk_to_buffer(blosc2_schunk* schunk, uint8_t** cframe, c_bool* needs_free) + void blosc2_schunk_avoid_cframe_free(blosc2_schunk *schunk, c_bool avoid_cframe_free) + int64_t blosc2_schunk_to_file(blosc2_schunk* schunk, const char* urlpath) + int64_t blosc2_schunk_free(blosc2_schunk *schunk) + int64_t blosc2_schunk_append_chunk(blosc2_schunk *schunk, uint8_t *chunk, c_bool copy) + int64_t blosc2_schunk_update_chunk(blosc2_schunk *schunk, int64_t nchunk, uint8_t *chunk, c_bool copy) + int64_t blosc2_schunk_insert_chunk(blosc2_schunk *schunk, int64_t nchunk, uint8_t *chunk, c_bool copy) + int64_t blosc2_schunk_delete_chunk(blosc2_schunk *schunk, int64_t nchunk) + int64_t blosc2_schunk_fill_special(blosc2_schunk *schunk, int64_t nitems, int special_value, + int32_t chunksize); + + int64_t blosc2_schunk_append_buffer(blosc2_schunk *schunk, void *src, int32_t nbytes) + int blosc2_schunk_decompress_chunk(blosc2_schunk *schunk, int64_t nchunk, void *dest, int32_t nbytes) + + int blosc2_schunk_get_chunk(blosc2_schunk *schunk, int64_t nchunk, uint8_t ** chunk, + c_bool *needs_free) nogil + int blosc2_schunk_get_lazychunk(blosc2_schunk *schunk, int64_t nchunk, uint8_t ** chunk, + c_bool *needs_free) nogil + int blosc2_schunk_get_slice_buffer(blosc2_schunk *schunk, int64_t start, int64_t stop, void *buffer) + int blosc2_schunk_set_slice_buffer(blosc2_schunk *schunk, int64_t start, int64_t stop, void *buffer) + int blosc2_schunk_get_cparams(blosc2_schunk *schunk, blosc2_cparams** cparams) + int blosc2_schunk_get_dparams(blosc2_schunk *schunk, blosc2_dparams** dparams) + int blosc2_schunk_reorder_offsets(blosc2_schunk *schunk, int64_t *offsets_order) + int64_t blosc2_schunk_frame_len(blosc2_schunk* schunk) + + int blosc2_chunk_repeatval(blosc2_cparams cparams, const int32_t nbytes, + void *dest, int32_t destsize, const void *repeatval) + + int blosc2_meta_exists(blosc2_schunk *schunk, const char *name) + int blosc2_meta_add(blosc2_schunk *schunk, const char *name, uint8_t *content, + int32_t content_len) + int blosc2_meta_update(blosc2_schunk *schunk, const char *name, uint8_t *content, + int32_t content_len) + int blosc2_meta_get(blosc2_schunk *schunk, const char *name, uint8_t **content, + int32_t *content_len) + int blosc2_vlmeta_exists(blosc2_schunk *schunk, const char *name) + int blosc2_vlmeta_add(blosc2_schunk *schunk, const char *name, + uint8_t *content, int32_t content_len, blosc2_cparams *cparams) + int blosc2_vlmeta_update(blosc2_schunk *schunk, const char *name, + uint8_t *content, int32_t content_len, blosc2_cparams *cparams) + int blosc2_vlmeta_get(blosc2_schunk *schunk, const char *name, + uint8_t **content, int32_t *content_len) + int blosc2_vlmeta_delete(blosc2_schunk *schunk, const char *name) + int blosc2_vlmeta_get_names(blosc2_schunk *schunk, char **names) + + + int blosc1_get_blocksize() + void blosc1_set_blocksize(size_t blocksize) + void blosc1_set_schunk(blosc2_schunk *schunk) + + int blosc2_remove_dir(const char *path) + int blosc2_remove_urlpath(const char *path) + + ctypedef int(*blosc2_codec_encoder_cb)(const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len, + uint8_t meta, blosc2_cparams *cparams, const void *chunk) + ctypedef int(*blosc2_codec_decoder_cb)(const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len, + uint8_t meta, blosc2_dparams *dparams, const void *chunk) + + ctypedef struct blosc2_codec: + uint8_t compcode + char* compname + uint8_t complib + uint8_t version + blosc2_codec_encoder_cb encoder + blosc2_codec_decoder_cb decoder + + int blosc2_register_codec(blosc2_codec *codec) + + ctypedef int(*blosc2_filter_forward_cb)(const uint8_t *, uint8_t *, int32_t, uint8_t, blosc2_cparams *, uint8_t) + ctypedef int(*blosc2_filter_backward_cb)(const uint8_t *, uint8_t *, int32_t, uint8_t, blosc2_dparams *, uint8_t) + + ctypedef struct blosc2_filter: + uint8_t id + char* name + blosc2_filter_forward_cb forward + blosc2_filter_backward_cb backward + + int blosc2_register_filter(blosc2_filter *filter) + + int blosc2_get_slice_nchunks(blosc2_schunk * schunk, int64_t *start, int64_t *stop, int64_t ** chunks_idx) + + +cdef extern from "b2nd.h": + ctypedef enum: + B2ND_MAX_DIM + B2ND_MAX_METALAYERS + B2ND_DEFAULT_DTYPE_FORMAT + + cdef struct chunk_cache_s: + uint8_t *data + int64_t nchunk + + ctypedef struct b2nd_array_t: + blosc2_schunk* sc + int64_t shape[B2ND_MAX_DIM] + int32_t chunkshape[B2ND_MAX_DIM] + int64_t extshape[B2ND_MAX_DIM] + int32_t blockshape[B2ND_MAX_DIM] + int64_t extchunkshape[B2ND_MAX_DIM] + int64_t nitems + int32_t chunknitems + int64_t extnitems + int32_t blocknitems + int64_t extchunknitems + int8_t ndim + chunk_cache_s chunk_cache + int64_t item_array_strides[B2ND_MAX_DIM] + int64_t item_chunk_strides[B2ND_MAX_DIM] + int64_t item_extchunk_strides[B2ND_MAX_DIM] + int64_t item_block_strides[B2ND_MAX_DIM] + int64_t block_chunk_strides[B2ND_MAX_DIM] + int64_t chunk_array_strides[B2ND_MAX_DIM] + char *dtype + int8_t dtype_format + + ctypedef struct b2nd_context_t: + pass + b2nd_context_t *b2nd_create_ctx(blosc2_storage *b2_storage, int8_t ndim, int64_t *shape, + int32_t *chunkshape, int32_t *blockshape, char *dtype, + int8_t dtype_format, blosc2_metalayer *metalayers, int32_t nmetalayers) + int b2nd_free_ctx(b2nd_context_t *ctx) + + int b2nd_uninit(b2nd_context_t *ctx, b2nd_array_t ** array) + + int b2nd_nans(b2nd_context_t * ctx, b2nd_array_t ** array) + + int b2nd_empty(b2nd_context_t *ctx, b2nd_array_t **array) + int b2nd_zeros(b2nd_context_t *ctx, b2nd_array_t **array) + int b2nd_full(b2nd_context_t *ctx, b2nd_array_t ** array, void *fill_value) + + int b2nd_free(b2nd_array_t *array) + int b2nd_get_slice_cbuffer(b2nd_array_t *array, + int64_t *start, int64_t *stop, + void *buffer, int64_t *buffershape, int64_t buffersize) + int b2nd_set_slice_cbuffer(void *buffer, int64_t *buffershape, int64_t buffersize, + int64_t *start, int64_t *stop, b2nd_array_t *array) + int b2nd_get_slice(b2nd_context_t *ctx, b2nd_array_t **array, b2nd_array_t *src, const int64_t *start, + const int64_t *stop) + int b2nd_from_cbuffer(b2nd_context_t *ctx, b2nd_array_t **array, void *buffer, int64_t buffersize) + int b2nd_to_cbuffer(b2nd_array_t *array, void *buffer, int64_t buffersize) + int b2nd_from_cframe(uint8_t *cframe, int64_t cframe_len, c_bool copy, b2nd_array_t ** array); + int b2nd_to_cframe(const b2nd_array_t *array, uint8_t ** cframe, int64_t *cframe_len, + c_bool *needs_free); + + int b2nd_squeeze(b2nd_array_t *array, b2nd_array_t **view) + int b2nd_squeeze_index(b2nd_array_t *array, b2nd_array_t **view, const c_bool *index) + int b2nd_resize(b2nd_array_t *array, const int64_t *new_shape, const int64_t *start) + int b2nd_copy(b2nd_context_t *ctx, b2nd_array_t *src, b2nd_array_t **array) + int b2nd_concatenate(b2nd_context_t *ctx, b2nd_array_t *src1, b2nd_array_t *src2, + int8_t axis, c_bool copy, b2nd_array_t **array) + int b2nd_expand_dims(const b2nd_array_t *array, b2nd_array_t ** view, const c_bool *axis, const uint8_t final_dims) + int b2nd_get_orthogonal_selection(const b2nd_array_t *array, int64_t ** selection, + int64_t *selection_size, void *buffer, + int64_t *buffershape, int64_t buffersize) + int b2nd_set_orthogonal_selection(const b2nd_array_t *array, int64_t ** selection, + int64_t *selection_size, void *buffer, + int64_t *buffershape, int64_t buffersize) + int b2nd_from_schunk(blosc2_schunk *schunk, b2nd_array_t **array) + + void blosc2_unidim_to_multidim(uint8_t ndim, int64_t *shape, int64_t i, int64_t *index) nogil + int b2nd_copy_buffer2(int8_t ndim, + int32_t itemsize, + const void *src, const int64_t *src_pad_shape, + const int64_t *src_start, const int64_t *src_stop, + void *dst, const int64_t *dst_pad_shape, + const int64_t *dst_start) + + +# miniexpr C API declarations +cdef extern from "miniexpr.h": + ctypedef enum me_dtype: + ME_AUTO, + ME_BOOL + ME_INT8 + ME_INT16 + ME_INT32 + ME_INT64 + ME_UINT8 + ME_UINT16 + ME_UINT32 + ME_UINT64 + ME_FLOAT32 + ME_FLOAT64 + ME_COMPLEX64 + ME_COMPLEX128 + ME_STRING + + # typedef struct me_variable + ctypedef struct me_variable: + const char *name + me_dtype dtype + const void *address + int type + void *context + size_t itemsize + + ctypedef struct me_expr: + int type + double value + const double *bound + const void *function + void *output + int nitems + me_dtype dtype + me_dtype input_dtype + void *bytecode + int ncode + void *parameters[1] + + int me_compile_nd_jit(const char *expression, const me_variable *variables, + int var_count, me_dtype dtype, int ndims, + const int64_t *shape, const int32_t *chunkshape, + const int32_t *blockshape, int jit_mode, + int *error, me_expr **out) + + ctypedef enum me_compile_status: + ME_COMPILE_SUCCESS + ME_COMPILE_ERR_OOM + ME_COMPILE_ERR_PARSE + ME_COMPILE_ERR_INVALID_ARG + ME_COMPILE_ERR_COMPLEX_UNSUPPORTED + ME_COMPILE_ERR_REDUCTION_INVALID + ME_COMPILE_ERR_VAR_MIXED + ME_COMPILE_ERR_VAR_UNSPECIFIED + ME_COMPILE_ERR_INVALID_ARG_TYPE + ME_COMPILE_ERR_MIXED_TYPE_NESTED + + ctypedef enum me_simd_ulp_mode: + ME_SIMD_ULP_DEFAULT + ME_SIMD_ULP_1 + ME_SIMD_ULP_3_5 + + ctypedef enum me_jit_mode: + ME_JIT_DEFAULT + ME_JIT_ON + ME_JIT_OFF + + ctypedef struct me_eval_params: + c_bool disable_simd + me_simd_ulp_mode simd_ulp_mode + me_jit_mode jit_mode + + int me_eval(const me_expr *expr, const void **vars_block, + int n_vars, void *output_block, int chunk_nitems, + const me_eval_params *params) nogil + + int me_eval_nd(const me_expr *expr, const void **vars_block, + int n_vars, void *output_block, int block_nitems, + int64_t nchunk, int64_t nblock, const me_eval_params *params) nogil + + int me_nd_valid_nitems(const me_expr *expr, int64_t nchunk, int64_t nblock, int64_t *valid_nitems) nogil + + void me_print(const me_expr *n) nogil + void me_free(me_expr *n) nogil + + ctypedef int (*me_wasm_jit_instantiate_helper)( + const unsigned char *wasm_bytes, + int wasm_len, + int bridge_lookup_fn_idx + ) + ctypedef void (*me_wasm_jit_free_helper)(int fn_idx) + void me_register_wasm_jit_helpers(me_wasm_jit_instantiate_helper instantiate_helper, + me_wasm_jit_free_helper free_helper) + + +cdef extern from "miniexpr_numpy.h": + me_dtype me_dtype_from_numpy(int numpy_type_num) + +cdef extern from "pythread.h": + ctypedef void* PyThread_type_lock + PyThread_type_lock PyThread_allocate_lock() nogil + int PyThread_acquire_lock(PyThread_type_lock lock, int waitflag) nogil + void PyThread_release_lock(PyThread_type_lock lock) nogil + void PyThread_free_lock(PyThread_type_lock lock) nogil + + +ctypedef struct user_filters_udata: + char* py_func + int input_cdtype + int output_cdtype + int32_t chunkshape + +ctypedef struct filler_udata: + char* py_func + uintptr_t inputs_id + int output_cdtype + int32_t chunkshape + +ctypedef struct udf_udata: + char* py_func + uintptr_t inputs_id + int output_cdtype + b2nd_array_t *array + int64_t chunks_in_array[B2ND_MAX_DIM] + int64_t blocks_in_chunk[B2ND_MAX_DIM] + +ctypedef struct me_udata: + b2nd_array_t** inputs + int ninputs + me_eval_params* eval_params + b2nd_array_t* array + void* aux_reduc_ptr + int64_t chunks_in_array[B2ND_MAX_DIM] + int64_t blocks_in_chunk[B2ND_MAX_DIM] + me_expr* miniexpr_handle + +MAX_TYPESIZE = BLOSC2_MAXTYPESIZE +MAX_BUFFERSIZE = BLOSC2_MAX_BUFFERSIZE +MAX_BLOCKSIZE = BLOSC2_MAXBLOCKSIZE +MAX_OVERHEAD = BLOSC2_MAX_OVERHEAD +MAX_DIM = B2ND_MAX_DIM +VERSION_STRING = (BLOSC2_VERSION_STRING).decode("utf-8") +VERSION_DATE = (BLOSC2_VERSION_DATE).decode("utf-8") +MIN_HEADER_LENGTH = BLOSC_MIN_HEADER_LENGTH +EXTENDED_HEADER_LENGTH = BLOSC_EXTENDED_HEADER_LENGTH +DEFINED_CODECS_STOP = BLOSC2_DEFINED_CODECS_STOP +GLOBAL_REGISTERED_CODECS_STOP = BLOSC2_GLOBAL_REGISTERED_CODECS_STOP +USER_REGISTERED_CODECS_STOP = BLOSC2_USER_REGISTERED_CODECS_STOP +DEFAULT_DTYPE_FORMAT = B2ND_DEFAULT_DTYPE_FORMAT + +cdef _check_comp_length(comp_name, comp_len): + if comp_len < BLOSC_MIN_HEADER_LENGTH: + raise ValueError(f"{comp_name} cannot be less than {BLOSC_MIN_HEADER_LENGTH} bytes") + + +blosc2_init() +cdef PyThread_type_lock chunk_cache_lock = PyThread_allocate_lock() +if chunk_cache_lock == NULL: + raise MemoryError("Could not allocate chunk cache lock") + +@atexit.register +def destroy(): + if chunk_cache_lock != NULL: + PyThread_free_lock(chunk_cache_lock) + blosc2_destroy() + + +def _register_wasm_jit_helpers(uintptr_t instantiate_ptr, uintptr_t free_ptr): + cdef me_wasm_jit_instantiate_helper instantiate_helper = ( + instantiate_ptr + ) + cdef me_wasm_jit_free_helper free_helper = free_ptr + me_register_wasm_jit_helpers(instantiate_helper, free_helper) + + +cdef inline me_dtype _me_dtype_from_numpy_dtype(dtype_obj): + dtype = np.dtype(dtype_obj) + cdef int itemsize = dtype.itemsize + kind = dtype.kind + if kind == "b": + return ME_BOOL + if kind == "i": + if itemsize == 1: + return ME_INT8 + if itemsize == 2: + return ME_INT16 + if itemsize == 4: + return ME_INT32 + if itemsize == 8: + return ME_INT64 + elif kind == "u": + if itemsize == 1: + return ME_UINT8 + if itemsize == 2: + return ME_UINT16 + if itemsize == 4: + return ME_UINT32 + if itemsize == 8: + return ME_UINT64 + elif kind == "f": + if itemsize == 4: + return ME_FLOAT32 + if itemsize == 8: + return ME_FLOAT64 + elif kind == "c": + if itemsize == 8: + return ME_COMPLEX64 + if itemsize == 16: + return ME_COMPLEX128 + elif kind == "U": + # miniexpr string variables use fixed-size UCS4 (numpy unicode) storage. + if itemsize <= 0 or itemsize % 4 != 0: + raise TypeError( + f"miniexpr string operands require unicode dtype with UCS4 itemsize; got '{dtype}'" + ) + return ME_STRING + return -1 + + +cdef inline str _me_compile_status_name(int rc): + if rc == ME_COMPILE_SUCCESS: + return "ME_COMPILE_SUCCESS" + if rc == ME_COMPILE_ERR_OOM: + return "ME_COMPILE_ERR_OOM" + if rc == ME_COMPILE_ERR_PARSE: + return "ME_COMPILE_ERR_PARSE" + if rc == ME_COMPILE_ERR_INVALID_ARG: + return "ME_COMPILE_ERR_INVALID_ARG" + if rc == ME_COMPILE_ERR_COMPLEX_UNSUPPORTED: + return "ME_COMPILE_ERR_COMPLEX_UNSUPPORTED" + if rc == ME_COMPILE_ERR_REDUCTION_INVALID: + return "ME_COMPILE_ERR_REDUCTION_INVALID" + if rc == ME_COMPILE_ERR_VAR_MIXED: + return "ME_COMPILE_ERR_VAR_MIXED" + if rc == ME_COMPILE_ERR_VAR_UNSPECIFIED: + return "ME_COMPILE_ERR_VAR_UNSPECIFIED" + if rc == ME_COMPILE_ERR_INVALID_ARG_TYPE: + return "ME_COMPILE_ERR_INVALID_ARG_TYPE" + if rc == ME_COMPILE_ERR_MIXED_TYPE_NESTED: + return "ME_COMPILE_ERR_MIXED_TYPE_NESTED" + return "ME_COMPILE_ERR_UNKNOWN" + + +cdef inline str _me_compile_error_details(int rc, int error): + cdef str details = f"{_me_compile_status_name(rc)} ({rc})" + if rc == ME_COMPILE_ERR_PARSE and error > 0: + details += f", parse_error_pos={error}" + elif error != 0: + details += f", error_pos={error}" + return details + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +def nearest_divisor(int64_t a, int64_t b, bint strict=False): + """Find the divisor of `a` that is closest to `b`. + + Parameters + ---------- + a : int + The number for which to find divisors. + b : int + The reference value to compare divisors against. + strict : bool, optional + If True, always use the downward search algorithm. + + Returns + ------- + int + The divisor of `a` that is closest to `b`. + + Notes + ----- + This is a *much* faster version than its Python counterpart. + """ + cdef: + int64_t i, closest, min_diff, diff + bint found + + if a > 100_000 or strict: + # For large numbers or when strict=True, search downwards from b + i = b + while i > 0: + if a % i == 0: + return i + i -= 1 + return 1 # Fallback to 1, which is always a divisor + + # For smaller numbers, find the closest divisor + closest = 1 + min_diff = a # Initialize to a large value + found = False + + # Search for divisors up to sqrt(a) + i = 1 + while i * i <= a: + if a % i == 0: + # Check i as a divisor + diff = c_abs(i - b) + if diff < min_diff: + min_diff = diff + closest = i + found = True + + # Check a/i as a divisor + diff = c_abs(a // i - b) + if diff < min_diff: + min_diff = diff + closest = a // i + found = True + i += 1 + + return closest if found else 1 + + +def cbuffer_sizes(src): + cdef const uint8_t[:] typed_view_src + mem_view_src = memoryview(src) + typed_view_src = mem_view_src.cast('B') + _check_comp_length('src', typed_view_src.nbytes) + cdef int32_t nbytes + cdef int32_t cbytes + cdef int32_t blocksize + blosc2_cbuffer_sizes(&typed_view_src[0], &nbytes, &cbytes, &blocksize) + return nbytes, cbytes, blocksize + + +cpdef compress(src, int32_t typesize=8, int clevel=9, filter=blosc2.Filter.SHUFFLE, codec=blosc2.Codec.BLOSCLZ): + set_compressor(codec) + cdef int32_t len_src = len(src) + cdef Py_buffer buf + PyObject_GetBuffer(src, &buf, PyBUF_SIMPLE) + dest = bytes(buf.len + BLOSC2_MAX_OVERHEAD) + cdef int32_t len_dest = len(dest) + cdef int size + cdef int filter_ = filter.value if isinstance(filter, Enum) else 0 + if RELEASEGIL: + _dest = dest + with nogil: + size = blosc2_compress(clevel, filter_, typesize, buf.buf, buf.len, _dest, len_dest) + else: + size = blosc2_compress(clevel, filter_, typesize, buf.buf, buf.len, dest, len_dest) + PyBuffer_Release(&buf) + if size > 0: + return dest[:size] + else: + raise ValueError("Cannot compress") + + +def decompress(src, dst=None, as_bytearray=False): + cdef int32_t nbytes + cdef int32_t cbytes + cdef int32_t blocksize + cdef const uint8_t[:] typed_view_src + + mem_view_src = memoryview(src) + typed_view_src = mem_view_src.cast('B') + _check_comp_length('src', len(typed_view_src)) + blosc2_cbuffer_sizes(&typed_view_src[0], &nbytes, &cbytes, &blocksize) + cdef Py_buffer buf + if dst is not None: + PyObject_GetBuffer(dst, &buf, PyBUF_SIMPLE) + if buf.len == 0: + raise ValueError("The dst length must be greater than 0") + size = blosc1_decompress(&typed_view_src[0], buf.buf, buf.len) + PyBuffer_Release(&buf) + else: + dst = PyBytes_FromStringAndSize(NULL, nbytes) + if dst is None: + raise RuntimeError("Could not get a bytes object") + size = blosc1_decompress(&typed_view_src[0], dst, len(dst)) + if as_bytearray: + dst = bytearray(dst) + if size >= 0: + return dst + if size < 0: + raise RuntimeError("Cannot decompress") + + +def set_compressor(codec): + codec = codec.name.lower().encode("utf-8") + size = blosc1_set_compressor(codec) + if size == -1: + raise ValueError("The code is not available") + else: + return size + +def free_resources(): + rc = blosc2_free_resources() + if rc < 0: + raise ValueError("Could not free the resources") + +def set_nthreads(nthreads): + if nthreads > INT_MAX: + raise ValueError("nthreads must be less or equal than 2^31 - 1.") + rc = blosc2_set_nthreads(nthreads) + if rc < 0: + raise ValueError("nthreads must be a positive integer.") + else: + return rc + +def set_blocksize(size_t blocksize=0): + blosc1_set_blocksize(blocksize) + +def clib_info(codec): + cdef char* clib + cdef char* version + codec = codec.name.lower().encode("utf-8") + rc = blosc2_get_complib_info(codec, &clib, &version) + if rc >= 0: + return clib, version + else: + raise ValueError("The compression library is not supported.") + +def get_clib(bytesobj): + rc = blosc2_cbuffer_complib( bytesobj) + if rc == NULL: + raise ValueError("Cannot get the info for the compressor") + else: + return rc + +def get_compressor(): + return blosc1_get_compressor() + + +cdef c_bool RELEASEGIL = False + +def set_releasegil(c_bool gilstate): + global RELEASEGIL + oldstate = RELEASEGIL + RELEASEGIL = gilstate + return oldstate + +def get_blocksize(): + return blosc1_get_blocksize() + +cdef _check_cparams(blosc2_cparams *cparams): + if cparams.nthreads > 1: + if BLOSC2_USER_REGISTERED_CODECS_START <= cparams.compcode <= BLOSC2_USER_REGISTERED_CODECS_STOP\ + and cparams.compcode in blosc2.ucodecs_registry.keys(): + raise ValueError("Cannot use multi-threading with user defined Python codecs") + + ufilters = [BLOSC2_USER_REGISTERED_FILTERS_START <= filter <= BLOSC2_USER_REGISTERED_FILTERS_STOP + for filter in cparams.filters] + for i in range(len(ufilters)): + if ufilters[i] and cparams.filters[i] in blosc2.ufilters_registry.keys(): + raise ValueError("Cannot use multi-threading with user defined Python filters") + + if cparams.prefilter != NULL and cparams.prefilter != miniexpr_prefilter: + # Note: miniexpr_prefilter uses miniexpr C API which is thread-friendly, + raise ValueError("`nthreads` must be 1 when a prefilter is set") + +cdef _check_dparams(blosc2_dparams* dparams, blosc2_cparams* cparams=NULL): + if cparams == NULL: + return + if dparams.nthreads > 1: + if BLOSC2_USER_REGISTERED_CODECS_START <= cparams.compcode <= BLOSC2_USER_REGISTERED_CODECS_STOP\ + and cparams.compcode in blosc2.ucodecs_registry.keys(): + raise ValueError("Cannot use multi-threading with user defined Python codecs") + + ufilters = [BLOSC2_USER_REGISTERED_FILTERS_START <= filter <= BLOSC2_USER_REGISTERED_FILTERS_STOP + for filter in cparams.filters] + for i in range(len(ufilters)): + if ufilters[i] and cparams.filters[i] in blosc2.ufilters_registry.keys(): + raise ValueError("Cannot use multi-threading with user defined Python filters") + + if dparams.postfilter != NULL: + raise ValueError("`nthreads` must be 1 when a postfilter is set") + + +cdef create_cparams_from_kwargs(blosc2_cparams *cparams, kwargs): + if "compcode" in kwargs: + raise NameError("`compcode` has been renamed to `codec`. Please go update your code.") + if "shuffle" in kwargs: + raise NameError("`shuffle` has been substituted by `filters`. Please go update your code.") + codec = kwargs.get('codec', blosc2.cparams_dflts['codec']) + cparams.compcode = codec if not isinstance(codec, blosc2.Codec) else codec.value + cparams.compcode_meta = kwargs.get('codec_meta', blosc2.cparams_dflts['codec_meta']) + cparams.clevel = kwargs.get('clevel', blosc2.cparams_dflts['clevel']) + cparams.use_dict = kwargs.get('use_dict', blosc2.cparams_dflts['use_dict']) + cparams.typesize = typesize = kwargs.get('typesize', blosc2.cparams_dflts['typesize']) + cparams.nthreads = kwargs.get('nthreads', blosc2.nthreads) + cparams.blocksize = kwargs.get('blocksize', blosc2.cparams_dflts['blocksize']) + splitmode = kwargs.get('splitmode', blosc2.cparams_dflts['splitmode']) + cparams.splitmode = splitmode.value + # TODO: support the commented ones in the future + #schunk_c = kwargs.get('schunk', blosc2.cparams_dflts['schunk']) + #cparams.schunk = schunk_c + cparams.schunk = NULL + for i in range(BLOSC2_MAX_FILTERS): + cparams.filters[i] = 0 + cparams.filters_meta[i] = 0 + + filters = kwargs.get('filters', blosc2.cparams_dflts['filters']) + if len(filters) > BLOSC2_MAX_FILTERS: + raise ValueError(f"filters list cannot exceed {BLOSC2_MAX_FILTERS}") + for i, filter in enumerate(filters): + cparams.filters[i] = filter.value if isinstance(filter, Enum) else filter + # Bytedelta does not work on typesize 1 + if cparams.filters[i] == blosc2.Filter.BYTEDELTA.value and typesize == 1: + cparams.filters[i] = 0 + + if "filters_meta" not in kwargs: + # If not specified, we can still assign a 0 list to it + filters_meta = [0] * len(filters) + else: + filters_meta = kwargs['filters_meta'] + if len(filters) != len(filters_meta): + raise ValueError("filters and filters_meta lists must have same length") + cdef int8_t meta_value + for i, meta in enumerate(filters_meta): + # We still may want to encode negative values + meta_value = meta if meta < 0 else meta + if meta_value == 0 and cparams.filters[i] == blosc2.Filter.BYTEDELTA.value: + # bytedelta typesize cannot be zero when using compress2 + cparams.filters_meta[i] = typesize + else: + cparams.filters_meta[i] = meta_value + + cparams.prefilter = NULL + cparams.preparams = NULL + tuner = kwargs.get('tuner', blosc2.cparams_dflts['tuner']) + cparams.tuner_id = tuner.value + cparams.tuner_params = NULL + cparams.instr_codec = False + cparams.codec_params = NULL + for i in range(len(filters)): + cparams.filter_params[i] = NULL + + _check_cparams(cparams) + + +def compress2(src, **kwargs): + cdef blosc2_cparams cparams + create_cparams_from_kwargs(&cparams, kwargs) + + cdef blosc2_context *cctx + cdef Py_buffer buf + PyObject_GetBuffer(src, &buf, PyBUF_SIMPLE) + cdef int size + cdef int32_t len_dest = (buf.len + BLOSC2_MAX_OVERHEAD) + dest = bytes(len_dest) + _dest = dest + cctx = blosc2_create_cctx(cparams) + if cctx == NULL: + raise RuntimeError("Could not create the compression context") + if RELEASEGIL: + with nogil: + size = blosc2_compress_ctx(cctx, buf.buf, buf.len, _dest, len_dest) + else: + size = blosc2_compress_ctx(cctx, buf.buf, buf.len, _dest, len_dest) + blosc2_free_ctx(cctx) + PyBuffer_Release(&buf) + if size < 0: + raise RuntimeError("Could not compress the data") + elif size == 0: + del dest + raise RuntimeError("The result could not fit ") + return dest[:size] + +cdef create_dparams_from_kwargs(blosc2_dparams *dparams, kwargs, blosc2_cparams* cparams=NULL): + dparams.nthreads = kwargs.get('nthreads', blosc2.nthreads) + dparams.schunk = NULL + dparams.postfilter = NULL + dparams.postparams = NULL + # TODO: support the next ones in the future + #dparams.schunk = kwargs.get('schunk', blosc2.dparams_dflts['schunk']) + #dparams.typesize = typesize = kwargs.get('typesize', blosc2.dparams_dflts['typesize']) + _check_dparams(dparams, cparams) + +def decompress2(src, dst=None, **kwargs): + cdef blosc2_dparams dparams + cdef char *dst_buf + cdef void *view + create_dparams_from_kwargs(&dparams, kwargs) + + cdef blosc2_context *dctx = blosc2_create_dctx(dparams) + if dctx == NULL: + raise RuntimeError("Could not create decompression context") + cdef const uint8_t[:] typed_view_src + mem_view_src = memoryview(src) + typed_view_src = mem_view_src.cast('B') + _check_comp_length('src', typed_view_src.nbytes) + cdef int32_t nbytes + cdef int32_t cbytes + cdef int32_t blocksize + blosc2_cbuffer_sizes(&typed_view_src[0], &nbytes, &cbytes, &blocksize) + cdef Py_buffer buf + if dst is not None: + PyObject_GetBuffer(dst, &buf, PyBUF_SIMPLE) + if buf.len == 0: + blosc2_free_ctx(dctx) + raise ValueError("The dst length must be greater than 0") + view = &typed_view_src[0] + if RELEASEGIL: + with nogil: + size = blosc2_decompress_ctx(dctx, view, cbytes, buf.buf, nbytes) + else: + size = blosc2_decompress_ctx(dctx, view, cbytes, buf.buf, nbytes) + blosc2_free_ctx(dctx) + PyBuffer_Release(&buf) + else: + dst = PyBytes_FromStringAndSize(NULL, nbytes) + if dst is None: + blosc2_free_ctx(dctx) + raise RuntimeError("Could not get a bytes object") + dst_buf = dst + view = &typed_view_src[0] + if RELEASEGIL: + with nogil: + size = blosc2_decompress_ctx(dctx, view, cbytes, dst_buf, nbytes) + else: + size = blosc2_decompress_ctx(dctx, view, cbytes, dst_buf, nbytes) + blosc2_free_ctx(dctx) + if size >= 0: + return dst + if size < 0: + raise ValueError("Error while decompressing, check the src data and/or the dparams") + + +cdef create_storage(blosc2_storage *storage, kwargs): + contiguous = kwargs.get('contiguous', blosc2.storage_dflts['contiguous']) + storage.contiguous = contiguous + urlpath = kwargs.get('urlpath', blosc2.storage_dflts['urlpath']) + if urlpath is None: + storage.urlpath = NULL + else: + storage.urlpath = urlpath + + create_cparams_from_kwargs(storage.cparams, kwargs.get('cparams', {})) + create_dparams_from_kwargs(storage.dparams, kwargs.get('dparams', {}), storage.cparams) + + cdef blosc2_io* io + cdef blosc2_stdio_mmap* mmap_file + mmap_mode = kwargs.get("mmap_mode") + initial_mapping_size = kwargs.get("initial_mapping_size") + if mmap_mode is not None: + if urlpath is None: + raise ValueError("urlpath must be set when using mmap_mode") + if not contiguous: + raise ValueError("Only contiguous storage is supported for memory-mapped files") + + # sizeof(BLOSC2_STDIO_MMAP_DEFAULTS) yields the size of the full struct as defined in the C header + mmap_file = malloc(sizeof(BLOSC2_STDIO_MMAP_DEFAULTS)) + memcpy(mmap_file, &BLOSC2_STDIO_MMAP_DEFAULTS, sizeof(BLOSC2_STDIO_MMAP_DEFAULTS)) + + # The storage for the bytes for the mmap_mode parameter need to be available even after this function + kwargs["_mmap_mode_bytes"] = kwargs["mmap_mode"].encode("utf-8") + mmap_file.mode = kwargs["_mmap_mode_bytes"] + mmap_file.needs_free = True + if initial_mapping_size is not None: + mmap_file.initial_mapping_size = initial_mapping_size + + io = malloc(sizeof(blosc2_io)) + io.id = BLOSC2_IO_FILESYSTEM_MMAP + io.params = mmap_file + storage.io = io + else: + storage.io = NULL + + +cdef get_chunk_repeatval(blosc2_cparams cparams, const int32_t nbytes, + void *dest, int32_t destsize, Py_buffer *repeatval): + if blosc2_chunk_repeatval(cparams, nbytes, dest, destsize, repeatval.buf) < 0: + free(dest) + PyBuffer_Release(repeatval) + raise RuntimeError("Problems when creating the repeated values chunk") + + +cdef class SChunk: + cdef blosc2_schunk *schunk + cdef c_bool _is_view + + def __init__(self, _schunk=None, chunksize=2 ** 24, data=None, **kwargs): + # hold on to a bytestring of urlpath for the lifetime of the instance + # because its value is referenced via a C-pointer + urlpath = kwargs.get("urlpath", None) + if urlpath is not None: + if isinstance(urlpath, pathlib.PurePath): + urlpath = str(urlpath) + self._urlpath = urlpath.encode() if isinstance(urlpath, str) else urlpath + kwargs["urlpath"] = self._urlpath + + self.mode = blosc2.Storage().mode if kwargs.get("mode", None) is None else kwargs.get("mode") + self.mmap_mode = kwargs.get("mmap_mode") + self.initial_mapping_size = kwargs.get("initial_mapping_size") + if self.mmap_mode is not None: + self.mode = mode_from_mmap_mode(self.mmap_mode) + if self.initial_mapping_size is not None: + if self.mmap_mode is None: + raise ValueError("initial_mapping_size can only be used with mmap_mode") + + if self.mmap_mode == "r": + raise ValueError("initial_mapping_size can only be used with writing modes (r+, w+, c)") + + # `_is_view` indicates if a free should be done on this instance + self._is_view = kwargs.get("_is_view", False) + + if _schunk is not None: + self.schunk = PyCapsule_GetPointer(_schunk, "blosc2_schunk*") + if self.mode == "w" and urlpath is not None: + blosc2.remove_urlpath(urlpath) + self.schunk = blosc2_schunk_new(self.schunk.storage) + return + + if kwargs is not None: + if self.mode == "w": + blosc2.remove_urlpath(urlpath) + elif self.mode == "r": + if urlpath is None: + raise ValueError("Cannot open the SChunk in reading mode (mode or mmap_mode is 'r') because you " + "did not specify a urlpath pointing to an existing file on-disk") + if not os.path.exists(urlpath): + raise ValueError("Cannot open the SChunk in reading mode (mode or mmap_mode is 'r') because the " + f"file {urlpath} does not exist. Please use a writing mode if you want to create " + "a new SChunk") + + cdef blosc2_storage storage + # Create space for cparams and dparams in the stack + cdef blosc2_cparams cparams + cdef blosc2_dparams dparams + storage.cparams = &cparams + storage.dparams = &dparams + if kwargs is None: + storage = BLOSC2_STORAGE_DEFAULTS + else: + create_storage(&storage, kwargs) + + if self.mode == "r": + offset = 0 + if self.mmap_mode is not None: + self.schunk = blosc2_schunk_open_offset_udio(storage.urlpath, offset, storage.io) + else: + self.schunk = blosc2_schunk_open_offset(storage.urlpath, offset) + + if kwargs is not None: + check_schunk_params(self.schunk, kwargs) + if schunk_is_ndarray(self.schunk): + raise ValueError("Cannot open an NDArray as a SChunk. Please use blosc2.open instead") + else: + self.schunk = blosc2_schunk_new(&storage) + + if self.schunk == NULL: + if self.mmap_mode is not None: + free(storage.io) + raise RuntimeError("Could not create the Schunk") + + # Add metalayers + meta = kwargs.get("meta") + if meta is not None: + for (name, content) in meta.items(): + name = name.encode("utf-8") if isinstance(name, str) else name + content = packb(content, default=encode_tuple, strict_types=True, use_bin_type=True) + _check_rc(blosc2_meta_add(self.schunk, name, content, len(content)), + "Error while adding the metalayers") + + if chunksize > INT_MAX: + raise ValueError("Maximum chunksize allowed is 2^31 - 1") + self.schunk.chunksize = chunksize + cdef const uint8_t[:] typed_view + cdef int64_t index + cdef Py_buffer buf + cdef uint8_t *buf_ptr + if data is not None and len(data) > 0: + PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE) + buf_ptr = buf.buf + len_data = buf.len + nchunks = len_data // chunksize + 1 if len_data % chunksize != 0 else len_data // chunksize + len_chunk = chunksize + for i in range(nchunks): + if i == (nchunks - 1): + len_chunk = len_data - i * chunksize + index = i * chunksize + nchunks_ = blosc2_schunk_append_buffer(self.schunk, buf_ptr + index, len_chunk) + if nchunks_ != (i + 1): + PyBuffer_Release(&buf) + raise RuntimeError("An error occurred while appending the chunks") + PyBuffer_Release(&buf) + + @property + def c_schunk(self): + return self.schunk + + @property + def chunksize(self): + return self.schunk.chunksize + + @property + def blocksize(self): + return self.schunk.blocksize + + @property + def nchunks(self): + return self.schunk.nchunks + + @property + def nbytes(self): + return self.schunk.nbytes + + @property + def cbytes(self): + return self.schunk.cbytes + + @property + def typesize(self): + return self.schunk.typesize + + @property + def urlpath(self): + urlpath = self.schunk.storage.urlpath + return urlpath.decode() if urlpath != NULL else None + + @property + def contiguous(self): + return self.schunk.storage.contiguous + + def get_cparams(self): + if self.schunk.storage.cparams.compcode in blosc2.Codec._value2member_map_: + codec = blosc2.Codec(self.schunk.storage.cparams.compcode) + else: + # User codec + codec = self.schunk.storage.cparams.compcode + + filters = [0] * BLOSC2_MAX_FILTERS + filters_meta = [0] * BLOSC2_MAX_FILTERS + for i in range(BLOSC2_MAX_FILTERS): + if self.schunk.filters[i] in blosc2.Filter._value2member_map_: + filters[i] = blosc2.Filter(self.schunk.filters[i]) + else: + # User filter + filters[i] = self.schunk.filters[i] + filters_meta[i] = self.schunk.filters_meta[i] + + cparams = blosc2.CParams( + codec=codec, + codec_meta=self.schunk.storage.cparams.compcode_meta, + clevel=self.schunk.storage.cparams.clevel, + use_dict=bool(self.schunk.storage.cparams.use_dict), + typesize=self.schunk.storage.cparams.typesize, + nthreads=self.schunk.storage.cparams.nthreads, + blocksize=self.schunk.storage.cparams.blocksize, + splitmode=blosc2.SplitMode(self.schunk.storage.cparams.splitmode), + tuner=blosc2.Tuner(self.schunk.storage.cparams.tuner_id), + filters=filters, + filters_meta=filters_meta, + ) + + return cparams + + def update_cparams(self, new_cparams): + cdef blosc2_cparams* cparams = self.schunk.storage.cparams + codec = new_cparams.codec + cparams.compcode = codec if not isinstance(codec, blosc2.Codec) else codec.value + cparams.compcode_meta = new_cparams.codec_meta + cparams.clevel = new_cparams.clevel + cparams.use_dict = new_cparams.use_dict + cparams.typesize = new_cparams.typesize + cparams.nthreads = new_cparams.nthreads + cparams.blocksize = new_cparams.blocksize + cparams.splitmode = new_cparams.splitmode.value + cparams.tuner_id = new_cparams.tuner.value + + filters = new_cparams.filters + for i, filter in enumerate(filters): + cparams.filters[i] = filter.value if isinstance(filter, Enum) else filter + for i in range(len(filters), BLOSC2_MAX_FILTERS): + cparams.filters[i] = 0 + + filters_meta = new_cparams.filters_meta + cdef int8_t meta_value + for i, meta in enumerate(filters_meta): + # We still may want to encode negative values + meta_value = meta if meta < 0 else meta + cparams.filters_meta[i] = meta_value + for i in range(len(filters_meta), BLOSC2_MAX_FILTERS): + cparams.filters_meta[i] = 0 + + _check_cparams(cparams) + + blosc2_free_ctx(self.schunk.cctx) + self.schunk.cctx = blosc2_create_cctx(dereference(self.schunk.storage.cparams)) + if self.schunk.cctx == NULL: + raise RuntimeError("Could not create compression context") + self.schunk.compcode = self.schunk.storage.cparams.compcode + self.schunk.compcode_meta = self.schunk.storage.cparams.compcode_meta + self.schunk.clevel = self.schunk.storage.cparams.clevel + self.schunk.splitmode = self.schunk.storage.cparams.splitmode + self.schunk.typesize = self.schunk.storage.cparams.typesize + self.schunk.blocksize = self.schunk.storage.cparams.blocksize + self.schunk.filters = self.schunk.storage.cparams.filters + self.schunk.filters_meta = self.schunk.storage.cparams.filters_meta + + def get_dparams(self): + return blosc2.DParams(nthreads=self.schunk.storage.dparams.nthreads) + + def update_dparams(self, new_dparams): + cdef blosc2_dparams* dparams = self.schunk.storage.dparams + dparams.nthreads = new_dparams.nthreads + + _check_dparams(dparams, self.schunk.storage.cparams) + + blosc2_free_ctx(self.schunk.dctx) + self.schunk.dctx = blosc2_create_dctx(dereference(self.schunk.storage.dparams)) + if self.schunk.dctx == NULL: + raise RuntimeError("Could not create decompression context") + + def append_data(self, data): + cdef Py_buffer buf + PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE) + rc = blosc2_schunk_append_buffer(self.schunk, buf.buf, buf.len) + PyBuffer_Release(&buf) + if rc < 0: + raise RuntimeError("Could not append the buffer") + return rc + + def fill_special(self, nitems, special_value, value): + if value is None: + return blosc2_schunk_fill_special(self.schunk, nitems, special_value, self.chunksize) + + if nitems == 0: + return 0 + if nitems * self.typesize / self.chunksize > INT_MAX: + raise RuntimeError("nitems is too large. Try increasing the chunksize") + if self.nbytes > 0 or self.cbytes > 0: + raise RuntimeError("Filling with special values only works on empty SChunks") + # Get a void pointer to the value + array = np.array([value]) + if array.dtype.itemsize != self.typesize: + if isinstance(value, int): + dtype = np.dtype('i'+ str(self.typesize)) + elif isinstance(value, float): + dtype = np.dtype('f' + str(self.typesize)) + else: + raise ValueError("value size in bytes must match with typesize") + array = np.array([value], dtype=dtype) + cdef Py_buffer buf + PyObject_GetBuffer(array, &buf, PyBUF_SIMPLE) + # Create chunk with repeated values + nchunks = nitems // self.chunkshape + cdef blosc2_schunk *c_schunk = self.c_schunk + cdef blosc2_cparams *cparams = self.schunk.storage.cparams + chunksize = BLOSC_EXTENDED_HEADER_LENGTH + self.typesize + cdef void *chunk = malloc(chunksize) + get_chunk_repeatval(dereference(cparams), self.chunksize, chunk, chunksize, &buf) + + for i in range(nchunks): + if blosc2_schunk_append_chunk(self.schunk, chunk, True) < 0: + free(chunk) + PyBuffer_Release(&buf) + raise RuntimeError("Error while appending the chunk") + # Create and append last chunk if it is smaller than chunkshape + remainder = nitems % self.chunkshape + rc = 0 + if remainder != 0: + get_chunk_repeatval(dereference(cparams), remainder * self.typesize, chunk, chunksize, &buf) + rc = blosc2_schunk_append_chunk(self.schunk, chunk, True) + free(chunk) + PyBuffer_Release(&buf) + if rc < 0: + raise RuntimeError("Error while appending the chunk") + + return self.nchunks + + def decompress_chunk(self, nchunk, dst=None): + cdef uint8_t *chunk + cdef c_bool needs_free + rc = blosc2_schunk_get_chunk(self.schunk, nchunk, &chunk, &needs_free) + + if rc < 0: + raise RuntimeError("Error while getting the chunk") + + cdef int32_t nbytes + cdef int32_t cbytes + cdef int32_t blocksize + blosc2_cbuffer_sizes(chunk, &nbytes, &cbytes, &blocksize) + if needs_free: + free(chunk) + + cdef Py_buffer buf + if dst is not None: + PyObject_GetBuffer(dst, &buf, PyBUF_SIMPLE) + if buf.len == 0: + raise ValueError("The dst length must be greater than 0") + size = blosc2_schunk_decompress_chunk(self.schunk, nchunk, buf.buf, buf.len) + PyBuffer_Release(&buf) + else: + dst = PyBytes_FromStringAndSize(NULL, nbytes) + if dst is None: + raise RuntimeError("Could not get a bytes object") + size = blosc2_schunk_decompress_chunk(self.schunk, nchunk, dst, nbytes) + if size >= 0: + return dst + + if size < 0: + raise RuntimeError(f"Error while decompressing the specified chunk, error code: {size}") + + def get_chunk(self, nchunk): + cdef uint8_t *chunk + cdef c_bool needs_free + cbytes = blosc2_schunk_get_chunk(self.schunk, nchunk, &chunk, &needs_free) + if cbytes < 0: + raise RuntimeError("Error while getting the chunk") + ret_chunk = PyBytes_FromStringAndSize(chunk, cbytes) + if needs_free: + free(chunk) + return ret_chunk + + def get_lazychunk(self, nchunk): + cdef uint8_t *chunk + cdef c_bool needs_free + cbytes = blosc2_schunk_get_lazychunk(self.schunk, nchunk, &chunk, &needs_free) + if cbytes < 0: + raise RuntimeError("Error while getting the lazychunk") + # The next does not always work (bug) + # cdef uint8_t is_lazy = chunk[BLOSC2_MAX_OVERHEAD - 1] & 0x08 + # Workaround + cdef uint8_t is_lazy = chunk[BLOSC2_MAX_OVERHEAD - 1] & 0x70 + if not is_lazy: + # Put a cap on the buffer size for the non-lazy chunk + cbytes = MAX_OVERHEAD + ret_chunk = PyBytes_FromStringAndSize(chunk, cbytes) + if needs_free: + free(chunk) + return ret_chunk + + def delete_chunk(self, nchunk): + rc = blosc2_schunk_delete_chunk(self.schunk, nchunk) + if rc < 0: + raise RuntimeError("Could not delete the desired chunk") + return rc + + def insert_chunk(self, nchunk, chunk): + cdef const uint8_t[:] typed_view_chunk + mem_view_chunk = memoryview(chunk) + typed_view_chunk = mem_view_chunk.cast('B') + _check_comp_length('chunk', len(typed_view_chunk)) + rc = blosc2_schunk_insert_chunk(self.schunk, nchunk, &typed_view_chunk[0], True) + if rc < 0: + raise RuntimeError("Could not insert the desired chunk") + return rc + + def insert_data(self, nchunk, data, copy): + cdef blosc2_context *cctx + cdef Py_buffer buf + PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE) + cdef int size + cdef int32_t len_chunk = (buf.len + BLOSC2_MAX_OVERHEAD) + cdef uint8_t* chunk = malloc(len_chunk) + self.schunk.current_nchunk = nchunk # prefilter needs this value to be set + if RELEASEGIL: + with nogil: + # No need to create another cctx + size = blosc2_compress_ctx(self.schunk.cctx, buf.buf, buf.len, chunk, len_chunk) + else: + size = blosc2_compress_ctx(self.schunk.cctx, buf.buf, buf.len, chunk, len_chunk) + PyBuffer_Release(&buf) + if size < 0: + raise RuntimeError("Could not compress the data") + elif size == 0: + free(chunk) + raise RuntimeError("The result could not fit ") + + chunk = realloc(chunk, size) + _check_comp_length('chunk', size) + rc = blosc2_schunk_insert_chunk(self.schunk, nchunk, chunk, copy) + if copy: + free(chunk) + if rc < 0: + raise RuntimeError("Could not insert the desired chunk") + return rc + + def update_chunk(self, nchunk, chunk): + cdef const uint8_t[:] typed_view_chunk + mem_view_chunk = memoryview(chunk) + typed_view_chunk = mem_view_chunk.cast('B') + _check_comp_length('chunk', len(typed_view_chunk)) + rc = blosc2_schunk_update_chunk(self.schunk, nchunk, &typed_view_chunk[0], True) + if rc < 0: + raise RuntimeError("Could not update the desired chunk") + return rc + + def update_data(self, nchunk, data, copy): + cdef Py_buffer buf + PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE) + cdef int size + cdef int32_t len_chunk = (buf.len + BLOSC2_MAX_OVERHEAD) + cdef uint8_t* chunk = malloc(len_chunk) + self.schunk.current_nchunk = nchunk # prefilter needs this value to be set + if RELEASEGIL: + with nogil: + size = blosc2_compress_ctx(self.schunk.cctx, buf.buf, buf.len, chunk, len_chunk) + else: + size = blosc2_compress_ctx(self.schunk.cctx, buf.buf, buf.len, chunk, len_chunk) + + PyBuffer_Release(&buf) + if size < 0: + raise RuntimeError("Could not compress the data") + elif size == 0: + free(chunk) + raise RuntimeError("The result could not fit ") + + chunk = realloc(chunk, size) + _check_comp_length('chunk', size) + rc = blosc2_schunk_update_chunk(self.schunk, nchunk, chunk, copy) + if copy: + free(chunk) + if rc < 0: + raise RuntimeError("Could not update the desired chunk") + return rc + + # This is used internally for prefiltering + def _prefilter_data(self, nchunk, data, chunk_data): + cdef Py_buffer buf + PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE) + cdef Py_buffer chunk_buf + PyObject_GetBuffer(chunk_data, &chunk_buf, PyBUF_SIMPLE) + self.schunk.current_nchunk = nchunk # prefilter needs this value to be set + cdef int size = blosc2_compress_ctx(self.schunk.cctx, buf.buf, buf.len, chunk_buf.buf, chunk_buf.len) + PyBuffer_Release(&buf) + PyBuffer_Release(&chunk_buf) + if size < 0: + raise RuntimeError("Could not compress the data") + elif size == 0: + raise RuntimeError("The result could not fit ") + return size + + def get_slice(self, start=0, stop=None, out=None): + cdef int64_t nitems = self.schunk.nbytes // self.schunk.typesize + start, stop, _ = slice(start, stop, 1).indices(nitems) + if start >= stop: + return b'' + + cdef Py_ssize_t nbytes = (stop - start) * self.schunk.typesize + cdef Py_buffer buf + if out is not None: + PyObject_GetBuffer(out, &buf, PyBUF_SIMPLE) + if buf.len < nbytes: + raise ValueError("Not enough space for writing the slice in out") + rc = blosc2_schunk_get_slice_buffer(self.schunk, start, stop, buf.buf) + PyBuffer_Release(&buf) + else: + out = PyBytes_FromStringAndSize(NULL, nbytes) + if out is None: + raise RuntimeError("Could not get a bytes object") + rc = blosc2_schunk_get_slice_buffer(self.schunk, start, stop, out) + if rc >= 0: + return out + if rc < 0: + raise RuntimeError("Error while getting the slice") + + def set_slice(self, value, start=0, stop=None): + cdef int64_t nitems = self.schunk.nbytes // self.schunk.typesize + start, stop = self._massage_key(start, stop, nitems) + if start > nitems: + raise ValueError("`start` cannot be greater than the SChunk nitems") + + cdef int64_t nbytes = (stop - start) * self.schunk.typesize + + cdef Py_buffer buf + PyObject_GetBuffer(value, &buf, PyBUF_SIMPLE) + cdef uint8_t *buf_ptr = buf.buf + cdef int64_t buf_pos = 0 + cdef int64_t nbytes_copy = min(nbytes, buf.len - buf_pos) + cdef int64_t data_start + cdef uint8_t *data + cdef uint8_t *chunk + if buf.len < nbytes: + raise ValueError("Not enough data for writing the slice") + + if stop > nitems: + # Increase SChunk's size + if start < nitems: + rc = blosc2_schunk_set_slice_buffer(self.schunk, start, nitems, buf.buf) + buf_pos = (nitems - start) * self.schunk.typesize + if self.schunk.nbytes % self.schunk.chunksize != 0: + # Update last chunk before appending any other + if stop * self.schunk.typesize >= self.schunk.chunksize * self.schunk.nchunks: + chunk_nbytes = self.schunk.chunksize + nbytes_copy = min(nbytes_copy, self.schunk.chunksize * self.schunk.nchunks - nitems * self.schunk.typesize) + else: + chunk_nbytes = (stop * self.schunk.typesize) % self.schunk.chunksize + data = malloc(chunk_nbytes) + rc = blosc2_schunk_decompress_chunk(self.schunk, self.schunk.nchunks - 1, data, chunk_nbytes) + if rc < 0: + free(data) + raise RuntimeError("Error while decompressing the chunk") + data_start = self.schunk.nbytes - (self.schunk.nchunks - 1) * self.schunk.chunksize + memcpy(data + data_start, buf_ptr + buf_pos, nbytes_copy) + chunk = malloc(chunk_nbytes + BLOSC2_MAX_OVERHEAD) + rc = blosc2_compress_ctx(self.schunk.cctx, data, chunk_nbytes, chunk, chunk_nbytes + BLOSC2_MAX_OVERHEAD) + free(data) + if rc < 0: + free(chunk) + raise RuntimeError("Error while compressing the data") + rc = blosc2_schunk_update_chunk(self.schunk, self.schunk.nchunks - 1, chunk, True) + free(chunk) + if rc < 0: + raise RuntimeError("Error while updating the chunk") + buf_pos += nbytes_copy + # Append data if needed + if buf_pos < buf.len: + nappends = int(stop * self.schunk.typesize / self.schunk.chunksize - self.schunk.nchunks) + if (stop * self.schunk.typesize) % self.schunk.chunksize != 0: + nappends += 1 + for i in range(nappends): + if (self.schunk.nchunks + 1) * self.schunk.chunksize <= stop * self.schunk.typesize: + chunksize = self.schunk.chunksize + else: + chunksize = (stop * self.schunk.typesize) % self.schunk.chunksize + rc = blosc2_schunk_append_buffer(self.schunk, buf_ptr + buf_pos, chunksize) + if rc < 0: + raise RuntimeError("Error while appending the chunk") + buf_pos += chunksize + else: + rc = blosc2_schunk_set_slice_buffer(self.schunk, start, stop, buf.buf) + PyBuffer_Release(&buf) + if rc < 0: + raise RuntimeError("Error while setting the slice") + + def to_cframe(self): + cdef c_bool needs_free + cdef uint8_t *cframe + cframe_len = blosc2_schunk_to_buffer(self.schunk, &cframe, &needs_free) + if cframe_len < 0: + raise RuntimeError("Error while getting the cframe") + out = PyBytes_FromStringAndSize(cframe, cframe_len) + if needs_free: + free(cframe) + + return out + + def _avoid_cframe_free(self, avoid_cframe_free): + blosc2_schunk_avoid_cframe_free(self.schunk, avoid_cframe_free) + + def _massage_key(self, start, stop, nitems): + if stop is None: + stop = nitems + elif stop < 0: + stop += nitems + if start is None: + start = 0 + elif start < 0: + start += nitems + if stop - start <= 0: + raise ValueError("`stop` mut be greater than `start`") + + return start, stop + + def _set_postfilter(self, func, dtype_input, dtype_output=None): + # Get user data + func_id = func.__name__ + blosc2.postfilter_funcs[func_id] = func + func_id = func_id.encode("utf-8") if isinstance(func_id, str) else func_id + + dtype_output = dtype_input if dtype_output is None else dtype_output + dtype_input = np.dtype(dtype_input) + dtype_output = np.dtype(dtype_output) + if dtype_output.itemsize != dtype_input.itemsize: + del blosc2.postfilter_funcs[func_id] + raise ValueError("`dtype_input` and `dtype_output` must have the same size") + + # Set postfilter + cdef blosc2_dparams* dparams = self.schunk.storage.dparams + dparams.postfilter = general_postfilter + # Fill postparams + cdef blosc2_postfilter_params* postparams = malloc(sizeof(blosc2_postfilter_params)) + cdef user_filters_udata* postf_udata = malloc(sizeof(user_filters_udata)) + postf_udata.py_func = malloc(strlen(func_id) + 1) + strcpy(postf_udata.py_func, func_id) + postf_udata.input_cdtype = dtype_input.num + postf_udata.output_cdtype = dtype_output.num + postf_udata.chunkshape = self.schunk.chunksize // self.schunk.typesize + + postparams.user_data = postf_udata + dparams.postparams = postparams + _check_dparams(dparams, self.schunk.storage.cparams) + + blosc2_free_ctx(self.schunk.dctx) + self.schunk.dctx = blosc2_create_dctx(dereference(dparams)) + if self.schunk.dctx == NULL: + raise RuntimeError("Could not create decompression context") + + cpdef remove_postfilter(self, func_name, _new_ctx=True): + if func_name is not None: + del blosc2.postfilter_funcs[func_name] + + cdef user_filters_udata* udata = self.schunk.storage.dparams.postparams.user_data + free(udata.py_func) + free(self.schunk.storage.dparams.postparams.user_data) + free(self.schunk.storage.dparams.postparams) + self.schunk.storage.dparams.postparams = NULL + self.schunk.storage.dparams.postfilter = NULL + + blosc2_free_ctx(self.schunk.dctx) + if _new_ctx: + self.schunk.dctx = blosc2_create_dctx(dereference(self.schunk.storage.dparams)) + if self.schunk.dctx == NULL: + raise RuntimeError("Could not create decompression context") + else: + # Avoid creating new dctx when calling this from the __dealloc__ + self.schunk.dctx = NULL + + def _set_filler(self, func, inputs_id, dtype_output): + if self.schunk.storage.cparams.nthreads > 1: + raise AttributeError("compress `nthreads` must be 1 when assigning a prefilter") + + func_id = func.__name__ + blosc2.prefilter_funcs[func_id] = func + func_id = func_id.encode("utf-8") if isinstance(func_id, str) else func_id + + # Set prefilter + cdef blosc2_cparams* cparams = self.schunk.storage.cparams + cparams.prefilter = general_filler + + cdef blosc2_prefilter_params* preparams = calloc(1, sizeof(blosc2_prefilter_params)) + cdef filler_udata* fill_udata = malloc(sizeof(filler_udata)) + fill_udata.py_func = malloc(strlen(func_id) + 1) + strcpy(fill_udata.py_func, func_id) + fill_udata.inputs_id = inputs_id + fill_udata.output_cdtype = np.dtype(dtype_output).num + fill_udata.chunkshape = self.schunk.chunksize // self.schunk.typesize + + preparams.user_data = fill_udata + cparams.preparams = preparams + _check_cparams(cparams) + + blosc2_free_ctx(self.schunk.cctx) + self.schunk.cctx = blosc2_create_cctx(dereference(cparams)) + if self.schunk.cctx == NULL: + raise RuntimeError("Could not create compression context") + + def _set_prefilter(self, func, dtype_input, dtype_output=None): + if self.schunk.storage.cparams.nthreads > 1: + raise AttributeError("compress `nthreads` must be 1 when assigning a prefilter") + func_id = func.__name__ + blosc2.prefilter_funcs[func_id] = func + func_id = func_id.encode("utf-8") if isinstance(func_id, str) else func_id + + dtype_output = dtype_input if dtype_output is None else dtype_output + dtype_input = np.dtype(dtype_input) + dtype_output = np.dtype(dtype_output) + if dtype_output.itemsize != dtype_input.itemsize: + del blosc2.prefilter_funcs[func_id] + raise ValueError("`dtype_input` and `dtype_output` must have the same size") + + cdef blosc2_cparams* cparams = self.schunk.storage.cparams + cparams.prefilter = general_prefilter + cdef blosc2_prefilter_params* preparams = calloc(1, sizeof(blosc2_prefilter_params)) + cdef user_filters_udata* pref_udata = malloc(sizeof(user_filters_udata)) + pref_udata.py_func = malloc(strlen(func_id) + 1) + strcpy(pref_udata.py_func, func_id) + pref_udata.input_cdtype = dtype_input.num + pref_udata.output_cdtype = dtype_output.num + pref_udata.chunkshape = self.schunk.chunksize // self.schunk.typesize + + preparams.user_data = pref_udata + cparams.preparams = preparams + _check_cparams(cparams) + + if self.schunk.cctx != NULL: + # Freeing NULL context can lead to segmentation fault + blosc2_free_ctx(self.schunk.cctx) + self.schunk.cctx = blosc2_create_cctx(dereference(cparams)) + if self.schunk.cctx == NULL: + raise RuntimeError("Could not create compression context") + + cpdef remove_prefilter(self, func_name, _new_ctx=True): + cdef udf_udata* udf_data + cdef user_filters_udata* udata + + if func_name is not None and func_name in blosc2.prefilter_funcs: + del blosc2.prefilter_funcs[func_name] + + # Clean up the miniexpr handle if this is a miniexpr_prefilter + if self.schunk.storage.cparams.prefilter == miniexpr_prefilter: + if self.schunk.storage.cparams.preparams != NULL: + me_data = self.schunk.storage.cparams.preparams.user_data + if me_data != NULL: + if me_data.inputs != NULL: + for i in range(me_data.ninputs): + if me_data.inputs[i].chunk_cache.data != NULL: + free(me_data.inputs[i].chunk_cache.data) + me_data.inputs[i].chunk_cache.data = NULL + me_data.inputs[i].chunk_cache.nchunk = -1 + free(me_data.inputs) + if me_data.miniexpr_handle != NULL: # XXX do we really need the conditional? + me_free(me_data.miniexpr_handle) + if me_data.eval_params != NULL: + free(me_data.eval_params) + free(me_data) + elif self.schunk.storage.cparams.prefilter != NULL: + # From Python the preparams->udata with always have the field py_func + if self.schunk.storage.cparams.preparams != NULL: + udata = self.schunk.storage.cparams.preparams.user_data + if udata != NULL: + if udata.py_func != NULL: + free(udata.py_func) + free(udata) + + if self.schunk.storage.cparams.preparams != NULL: + free(self.schunk.storage.cparams.preparams) + self.schunk.storage.cparams.preparams = NULL + self.schunk.storage.cparams.prefilter = NULL + + if self.schunk.cctx != NULL: + # Freeing NULL context can lead to segmentation fault + blosc2_free_ctx(self.schunk.cctx) + if _new_ctx: + self.schunk.cctx = blosc2_create_cctx(dereference(self.schunk.storage.cparams)) + if self.schunk.cctx == NULL: + raise RuntimeError("Could not create compression context") + else: + # Avoid creating new cctx when calling this from the __dealloc__ + self.schunk.cctx = NULL + + def __dealloc__(self): + if self.schunk != NULL and not self._is_view: + # Free prefilters and postfilters params + if self.schunk.storage.cparams.prefilter != NULL: + self.remove_prefilter(func_name=None, _new_ctx=False) + if self.schunk.storage.dparams.postfilter != NULL: + self.remove_postfilter(func_name=None, _new_ctx=False) + + blosc2_schunk_free(self.schunk) + + +# postfilter +cdef int general_postfilter(blosc2_postfilter_params *params): + cdef user_filters_udata *udata = params.user_data + cdef int nd = 1 + cdef np.npy_intp dims = params.size // params.typesize + input = np.PyArray_SimpleNewFromData(nd, &dims, udata.input_cdtype, params.input) + output = np.PyArray_SimpleNewFromData(nd, &dims, udata.output_cdtype, params.output) + offset = params.nchunk * udata.chunkshape + params.offset // params.typesize + func_id = udata.py_func.decode("utf-8") + blosc2.postfilter_funcs[func_id](input, output, offset) + return 0 + + +# filler +cdef int general_filler(blosc2_prefilter_params *params): + cdef filler_udata *udata = params.user_data + cdef int nd = 1 + cdef np.npy_intp dims = params.output_size // params.output_typesize + + inputs_tuple = _ctypes.PyObj_FromPtr(udata.inputs_id) + + output = np.PyArray_SimpleNewFromData(nd, &dims, udata.output_cdtype, params.output) + offset = params.nchunk * udata.chunkshape + params.output_offset // params.output_typesize + + inputs = [] + for obj, dtype in inputs_tuple: + if isinstance(obj, blosc2.SChunk): + out = np.empty(dims, dtype=dtype) + obj.get_slice(start=offset, stop=offset + dims, out=out) + inputs.append(out) + elif isinstance(obj, np.ndarray): + inputs.append(obj[offset : offset + dims]) + elif isinstance(obj, (int, float, bool, complex)): + inputs.append(np.full(dims, obj, dtype=dtype)) + else: + raise ValueError("Unsupported operand") + + func_id = udata.py_func.decode("utf-8") + blosc2.prefilter_funcs[func_id](tuple(inputs), output, offset) + + return 0 + + +# Auxiliary function for miniexpr as a prefilter +# Only meant for (input and output) arrays that are blosc2.NDArray objects. +cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock, + c_bool is_postfilter, uint8_t *params_output, int32_t typesize) nogil: + # Declare all C variables at the beginning + cdef int64_t chunk_ndim[B2ND_MAX_DIM] + cdef int64_t block_ndim[B2ND_MAX_DIM] + cdef int64_t start_ndim[B2ND_MAX_DIM] + cdef int64_t stop_ndim[B2ND_MAX_DIM] + cdef int64_t buffershape[B2ND_MAX_DIM] + + cdef b2nd_array_t* ndarr + cdef int rc + cdef void** input_buffers = malloc(udata.ninputs * sizeof(uint8_t*)) + cdef float *buf + cdef uint8_t* src + cdef uint8_t* chunk + cdef c_bool needs_free + cdef int32_t chunk_nbytes, chunk_cbytes, block_nbytes + cdef int start, blocknitems, expected_blocknitems + cdef int64_t valid_nitems + cdef int32_t input_typesize + cdef blosc2_context* dctx + expected_blocknitems = -1 + valid_nitems = 0 + + cdef me_expr* miniexpr_handle = udata.miniexpr_handle + cdef void* aux_reduc_ptr + + if miniexpr_handle == NULL: + raise ValueError("miniexpr: handle not assigned") + if input_buffers == NULL: + raise MemoryError("miniexpr: cannot allocate input buffer table") + memset(input_buffers, 0, udata.ninputs * sizeof(uint8_t*)) + + # Query valid (unpadded) items for this block + rc = me_nd_valid_nitems(miniexpr_handle, nchunk, nblock, &valid_nitems) + if rc != 0: + raise RuntimeError(f"miniexpr: invalid block; error code: {rc}") + if valid_nitems <= 0: + # Nothing to compute for this block. + # For reductions, keep aux_reduc neutral values untouched. + if udata.aux_reduc_ptr == NULL: + memset(params_output, 0, udata.array.blocknitems * typesize) + free(input_buffers) + return 0 + + for i in range(udata.ninputs): + ndarr = udata.inputs[i] + if ndarr.sc.storage.urlpath == NULL: + src = ndarr.sc.data[nchunk] + else: + # We need to get the chunk from disk/network + if ndarr.chunk_cache.nchunk != nchunk: + PyThread_acquire_lock(chunk_cache_lock, 1) + # We need to check again, as another thread may have updated the cache already + if ndarr.chunk_cache.nchunk != nchunk: + if ndarr.chunk_cache.data != NULL: + free(ndarr.chunk_cache.data) + ndarr.chunk_cache.data = NULL + rc = blosc2_schunk_get_chunk(ndarr.sc, nchunk, &chunk, &needs_free) + if rc < 0: + PyThread_release_lock(chunk_cache_lock) + raise ValueError("miniexpr: error getting chunk") + if not needs_free: + src = malloc(rc) + if src == NULL: + PyThread_release_lock(chunk_cache_lock) + raise MemoryError("miniexpr: cannot allocate chunk copy") + memcpy(src, chunk, rc) + else: + src = chunk + ndarr.chunk_cache.data = src + ndarr.chunk_cache.nchunk = nchunk + PyThread_release_lock(chunk_cache_lock) + src = ndarr.chunk_cache.data + rc = blosc2_cbuffer_sizes(src, &chunk_nbytes, &chunk_cbytes, &block_nbytes) + if rc < 0: + raise ValueError("miniexpr: error getting cbuffer sizes") + if block_nbytes <= 0: + raise ValueError("miniexpr: invalid block size") + input_buffers[i] = malloc(block_nbytes) + if input_buffers[i] == NULL: + raise MemoryError("miniexpr: cannot allocate input block buffer") + input_typesize = ndarr.sc.typesize + blocknitems = block_nbytes // input_typesize + if expected_blocknitems == -1: + expected_blocknitems = blocknitems + elif blocknitems != expected_blocknitems: + raise ValueError("miniexpr: inconsistent block element counts across inputs") + start = nblock * blocknitems + # This is needed for thread safety, but adds a pretty low overhead (< 400ns on a modern CPU) + # In the future, perhaps one can create a specific (serial) context just for + # blosc2_getitem_ctx, but this is probably never going to be necessary. + dctx = blosc2_create_dctx(BLOSC2_DPARAMS_DEFAULTS) + # Unsafe, but it works for special arrays (e.g. blosc2.ones), and can be used for profiling + # dctx = ndarr.sc.dctx + if valid_nitems > blocknitems: + raise ValueError("miniexpr: valid items exceed padded block size") + rc = blosc2_getitem_ctx(dctx, src, chunk_cbytes, start, blocknitems, + input_buffers[i], block_nbytes) + blosc2_free_ctx(dctx) + if rc < 0: + raise ValueError("miniexpr: error decompressing the chunk") + # For reduction operations, we need to track which block we're processing + # The linear_block_index should be based on the INPUT array structure, not the output array + # Get the first input array's chunk and block structure + cdef b2nd_array_t* first_input = udata.inputs[0] + cdef int nblocks_per_chunk = 1 + for i in range(first_input.ndim): + nblocks_per_chunk *= udata.blocks_in_chunk[i] + # Calculate the global linear block index: nchunk * blocks_per_chunk + nblock + # This works because blocks never span chunks (chunks are padded to block boundaries) + cdef int64_t linear_block_index = nchunk * nblocks_per_chunk + nblock + cdef uintptr_t offset_bytes = typesize * linear_block_index + + # Call thread-safe miniexpr C API + # NOTE: me_eval_nd expects the OUTPUT block size (in items), not the input block size. + # For element-wise operations with same dtypes, they're equal, but for type-changing + # operations (e.g., arccos(int32) -> float64), we must use the output's block item count. + cdef int output_blocknitems = udata.array.blocknitems + + if udata.aux_reduc_ptr == NULL: + aux_reduc_ptr = params_output + else: + # Reduction operation: evaluate only valid items into a single output element. + # NOTE: miniexpr handles scalar outputs in me_eval_nd without touching tail bytes. + aux_reduc_ptr = ( udata.aux_reduc_ptr + offset_bytes) + rc = me_eval_nd(miniexpr_handle, input_buffers, udata.ninputs, + aux_reduc_ptr, output_blocknitems, nchunk, nblock, udata.eval_params) + if rc != 0: + raise RuntimeError(f"miniexpr: issues during evaluation; error code: {rc}") + + # Free resources + for i in range(udata.ninputs): + free(input_buffers[i]) + free(input_buffers) + + return 0 + + +# Aux function for prefilter and postfilter udf +cdef int aux_udf(udf_udata *udata, int64_t nchunk, int32_t nblock, + c_bool is_postfilter, uint8_t *params_output, int32_t typesize): + cdef int64_t chunk_ndim[B2ND_MAX_DIM] + blosc2_unidim_to_multidim(udata.array.ndim, udata.chunks_in_array, nchunk, chunk_ndim) + cdef int64_t block_ndim[B2ND_MAX_DIM] + blosc2_unidim_to_multidim(udata.array.ndim, udata.blocks_in_chunk, nblock, block_ndim) + cdef int64_t start_ndim[B2ND_MAX_DIM] + for i in range(udata.array.ndim): + start_ndim[i] = chunk_ndim[i] * udata.array.chunkshape[i] + block_ndim[i] * udata.array.blockshape[i] + + padding = False + blockshape = [] + for i in range(udata.array.ndim): + if start_ndim[i] + udata.array.blockshape[i] > udata.array.shape[i]: + padding = True + blockshape.append(udata.array.shape[i] - start_ndim[i]) + if blockshape[i] <= 0: + # This block contains only padding, skip it + return 0 + else: + blockshape.append(udata.array.blockshape[i]) + cdef np.npy_intp dims[B2ND_MAX_DIM] + for i in range(udata.array.ndim): + dims[i] = blockshape[i] + + if padding: + output = np.empty(blockshape, udata.array.dtype) + else: + output = np.PyArray_SimpleNewFromData(udata.array.ndim, dims, udata.output_cdtype, params_output) + + inputs_tuple = _ctypes.PyObj_FromPtr(udata.inputs_id) + inputs_slice = [] + # Get slice of each operand + l = [] + for i in range(udata.array.ndim): + l.append(slice(start_ndim[i], start_ndim[i] + blockshape[i])) + slices = tuple(l) + for obj in inputs_tuple: + if isinstance(obj, blosc2.NDArray | np.ndarray | blosc2.C2Array): + inputs_slice.append(obj[slices]) + elif np.isscalar(obj): + inputs_slice.append(obj) + else: + raise ValueError("Unsupported operand") + + # Call udf function + func_id = udata.py_func.decode("utf-8") + offset = tuple(start_ndim[i] for i in range(udata.array.ndim)) + if is_postfilter: + blosc2.postfilter_funcs[func_id](tuple(inputs_slice), output, offset) + else: + blosc2.prefilter_funcs[func_id](tuple(inputs_slice), output, offset) + + cdef int64_t start[B2ND_MAX_DIM] + cdef int64_t slice_shape[B2ND_MAX_DIM] + cdef int64_t blockshape_int64[B2ND_MAX_DIM] + cdef Py_buffer buf + if padding: + for i in range(udata.array.ndim): + start[i] = 0 + slice_shape[i] = blockshape[i] + blockshape_int64[i] = udata.array.blockshape[i] + PyObject_GetBuffer(output, &buf, PyBUF_SIMPLE) + rc = b2nd_copy_buffer2(udata.array.ndim, typesize, + buf.buf, slice_shape, start, slice_shape, + params_output, blockshape_int64, start) + PyBuffer_Release(&buf) + _check_rc(rc, "Could not copy the result into the buffer") + + return 0 + + +cdef int miniexpr_prefilter(blosc2_prefilter_params *params): + return aux_miniexpr( params.user_data, params.nchunk, params.nblock, False, + params.output, params.output_typesize) + + +cdef int general_udf_prefilter(blosc2_prefilter_params *params): + cdef udf_udata *udata = params.user_data + return aux_udf(udata, params.nchunk, params.nblock, False, params.output, params.output_typesize) + + +cdef int general_udf_postfilter(blosc2_postfilter_params *params): + cdef udf_udata *udata = params.user_data + return aux_udf(udata, params.nchunk, params.nblock, True, params.output, params.typesize) + + +def nelem_from_inputs(inputs_tuple, nelem=None): + for obj, dtype in inputs_tuple: + if isinstance(obj, blosc2.SChunk): + if nelem is not None and nelem != (obj.nbytes / obj.typesize): + raise ValueError("operands must have same nelems") + nelem = obj.nbytes / obj.typesize + elif isinstance(obj, np.ndarray): + if nelem is not None and nelem != obj.size: + raise ValueError("operands must have same nelems") + nelem = obj.size + if nelem is None: + raise ValueError("`nelem` must be set if none of the operands is a SChunk or a np.ndarray") + return nelem + +# prefilter +cdef int general_prefilter(blosc2_prefilter_params *params): + cdef user_filters_udata *udata = params.user_data + cdef int nd = 1 + cdef np.npy_intp dims = params.output_size // params.output_typesize + + + input = np.PyArray_SimpleNewFromData(nd, &dims, udata.input_cdtype, params.input) + output = np.PyArray_SimpleNewFromData(nd, &dims, udata.output_cdtype, params.output) + offset = params.nchunk * udata.chunkshape + params.output_offset // params.output_typesize + + func_id = udata.py_func.decode("utf-8") + blosc2.prefilter_funcs[func_id](input, output, offset) + + return 0 + + +def remove_urlpath(path): + blosc2_remove_urlpath(path) + + +# See https://github.com/dask/distributed/issues/3716#issuecomment-632913789 +def encode_tuple(obj): + if isinstance(obj, tuple): + obj = ["__tuple__", *obj] + return obj + + +def decode_tuple(obj): + if obj[0] == "__tuple__": + obj = tuple(obj[1:]) + return obj + + +cdef class vlmeta: + cdef blosc2_schunk* schunk + def __init__(self, schunk): + self.schunk = schunk + + def set_vlmeta(self, name, content, **cparams): + cdef blosc2_cparams ccparams + create_cparams_from_kwargs(&ccparams, cparams) + name = name.encode("utf-8") if isinstance(name, str) else name + content = content.encode("utf-8") if isinstance(content, str) else content + cdef uint32_t len_content = len(content) + rc = blosc2_vlmeta_exists(self.schunk, name) + if rc >= 0: + rc = blosc2_vlmeta_update(self.schunk, name, content, len_content, &ccparams) + else: + rc = blosc2_vlmeta_add(self.schunk, name, content, len_content, &ccparams) + + if rc < 0: + raise RuntimeError + + def get_vlmeta(self, name): + name = name.encode("utf-8") if isinstance(name, str) else name + rc = blosc2_vlmeta_exists(self.schunk, name) + cdef uint8_t* content + cdef int32_t content_len + if rc < 0: + raise KeyError + if rc >= 0: + rc = blosc2_vlmeta_get(self.schunk, name, &content, &content_len) + if rc < 0: + raise RuntimeError + return content[:content_len] + + def del_vlmeta(self, name): + name = name.encode("utf-8") if isinstance(name, str) else name + rc = blosc2_vlmeta_delete(self.schunk, name) + if rc < 0: + raise RuntimeError("Could not delete the vlmeta") + + def nvlmetalayers(self): + return self.schunk.nvlmetalayers + + def get_names(self): + cdef char** names = malloc(self.schunk.nvlmetalayers * sizeof (char *)) + rc = blosc2_vlmeta_get_names(self.schunk, names) + if rc != self.schunk.nvlmetalayers: + raise RuntimeError + res = [names[i].decode("utf-8") for i in range(rc)] + return res + + def to_dict(self): + cdef char** names = malloc(self.schunk.nvlmetalayers * sizeof (char*)) + rc = blosc2_vlmeta_get_names(self.schunk, names) + if rc != self.schunk.nvlmetalayers: + raise RuntimeError + res = {} + for i in range(rc): + res[names[i]] = unpackb(self.get_vlmeta(names[i]), list_hook=decode_tuple) + return res + + +def meta__contains__(self, name): + cdef blosc2_schunk *schunk = self.c_schunk + name = name.encode("utf-8") if isinstance(name, str) else name + n = blosc2_meta_exists(schunk, name) + return False if n < 0 else True + +def meta__getitem__(self, name): + cdef blosc2_schunk *schunk = self.c_schunk + name = name.encode("utf-8") if isinstance(name, str) else name + cdef uint8_t *content + cdef int32_t content_len + n = blosc2_meta_get(schunk, name, &content, &content_len) + res = PyBytes_FromStringAndSize( content, content_len) + free(content) + + return res + +def meta__setitem__(self, name, content): + cdef blosc2_schunk *schunk = self.c_schunk + name = name.encode("utf-8") if isinstance(name, str) else name + old_content = meta__getitem__(self, name) + if len(old_content) != len(content): + raise ValueError("The length of the content in a metalayer cannot change.") + blosc2_meta_update(schunk, name, content, len(content)) + +def meta__len__(self): + cdef blosc2_schunk *schunk = self.c_schunk + return schunk.nmetalayers + +def meta_keys(self): + cdef blosc2_schunk *schunk = self.c_schunk + keys = [] + for i in range(meta__len__(self)): + name = schunk.metalayers[i].name.decode("utf-8") + keys.append(name) + return keys + + +def open(urlpath, mode, offset, **kwargs): + urlpath_ = urlpath.encode("utf-8") if isinstance(urlpath, str) else urlpath + cdef blosc2_schunk* schunk + cdef blosc2_stdio_mmap* mmap_file + cdef blosc2_io* io + + mmap_mode = kwargs.get("mmap_mode") + if mmap_mode is not None: + if mmap_mode == "w+": + raise ValueError("w+ mmap_mode cannot be used to open an existing file") + else: + mode = mode_from_mmap_mode(mmap_mode) + + initial_mapping_size = kwargs.get("initial_mapping_size") + if initial_mapping_size is not None: + if mmap_mode is None: + raise ValueError("initial_mapping_size can only be used with mmap_mode") + + if mmap_mode == "r": + raise ValueError("initial_mapping_size can only be used with writing modes (r+, c)") + + if mmap_mode is None: + schunk = blosc2_schunk_open_offset(urlpath_, offset) + else: + mmap_file = malloc(sizeof(BLOSC2_STDIO_MMAP_DEFAULTS)) + memcpy(mmap_file, &BLOSC2_STDIO_MMAP_DEFAULTS, sizeof(BLOSC2_STDIO_MMAP_DEFAULTS)) + + mmap_mode_ = mmap_mode.encode("utf-8") + mmap_file.mode = mmap_mode_ + mmap_file.needs_free = True + if initial_mapping_size is not None: + mmap_file.initial_mapping_size = initial_mapping_size + + io = malloc(sizeof(blosc2_io)) + io.id = BLOSC2_IO_FILESYSTEM_MMAP + io.params = mmap_file + schunk = blosc2_schunk_open_offset_udio(urlpath_, offset, io) + + if schunk == NULL: + if mmap_mode is not None: + free(io) + raise RuntimeError(f'blosc2_schunk_open_offset({urlpath!r}, {offset!r}) returned NULL') + + is_ndarray = schunk_is_ndarray(schunk) + + cdef b2nd_array_t *array + if is_ndarray: + _check_rc(b2nd_from_schunk(schunk, &array), + "Could not create array from schunk") + + kwargs["urlpath"] = urlpath + kwargs["contiguous"] = schunk.storage.contiguous + if mode != "w" and kwargs is not None: + check_schunk_params(schunk, kwargs) + cparams = kwargs.get("cparams") + # For reading with the default number of threads + dparams = kwargs.get("dparams", blosc2.DParams()) + + if is_ndarray: + res = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) + if cparams is not None: + res.schunk.cparams = cparams if isinstance(cparams, blosc2.CParams) else blosc2.CParams(**cparams) + if dparams is not None: + res.schunk.dparams = dparams if isinstance(dparams, blosc2.DParams) else blosc2.DParams(**dparams) + res.schunk.mode = mode + else: + res = blosc2.SChunk(_schunk=PyCapsule_New(schunk, "blosc2_schunk*", NULL), + mode=mode, **kwargs) + if cparams is not None: + res.cparams = cparams if isinstance(cparams, blosc2.CParams) else blosc2.CParams(**cparams) + if dparams is not None: + res.dparams = dparams if isinstance(dparams, blosc2.DParams) else blosc2.DParams(**dparams) + + return res + + +def check_access_mode(urlpath, mode): + if urlpath is not None and mode == "r": + raise ValueError("Cannot do this action with reading mode") + + +def mode_from_mmap_mode(mmap_mode): + # We ignore the user-supplied mode with mmap files and use a fixed mapping instead + if mmap_mode == "r": + mode = "r" + elif mmap_mode == "r+": + mode = "a" + elif mmap_mode == "w+": + mode = "w" + elif mmap_mode == "c": + # In terms of (internal) blosc, it is allowed to modify the file contents + # The actual file is opened in read-only mode + mode = "a" + else: + raise ValueError(f"Invalid mmap_mode: {mmap_mode}") + + return mode + + +cdef check_schunk_params(blosc2_schunk* schunk, kwargs): + cparams = kwargs.get("cparams", None) + if cparams is not None: + blocksize = kwargs.get("blocksize", schunk.blocksize) + if blocksize not in [0, schunk.blocksize]: + raise ValueError("Cannot change blocksize with this mode") + typesize = kwargs.get("typesize", schunk.typesize) + if typesize != schunk.typesize: + raise ValueError("Cannot change typesize with this mode") + + +cdef schunk_is_ndarray(blosc2_schunk* schunk): + meta = "b2nd" + meta = meta.encode("utf-8") if isinstance(meta, str) else meta + return blosc2_meta_exists(schunk, meta) >= 0 + + +def schunk_from_cframe(cframe, copy=False): + cdef Py_buffer buf + PyObject_GetBuffer(cframe, &buf, PyBUF_SIMPLE) + cdef blosc2_schunk *schunk_ = blosc2_schunk_from_buffer(buf.buf, buf.len, copy) + if schunk_ == NULL: + raise RuntimeError("Could not get the schunk from the cframe") + schunk = blosc2.SChunk(_schunk=PyCapsule_New(schunk_, "blosc2_schunk*", NULL)) + PyBuffer_Release(&buf) + if not copy: + schunk._avoid_cframe_free(True) + return schunk + + +cdef int general_encoder(const uint8_t* input_buffer, int32_t input_len, + uint8_t* output_buffer, int32_t output_len, + uint8_t meta, + blosc2_cparams* cparams, const void* chunk): + cdef int nd = 1 + cdef np.npy_intp input_dims = input_len + cdef np.npy_intp output_dims = output_len + input = np.PyArray_SimpleNewFromData(nd, &input_dims, np.NPY_UINT8, input_buffer) + output = np.PyArray_SimpleNewFromData(nd, &output_dims, np.NPY_UINT8, output_buffer) + + cdef blosc2_schunk *sc = cparams.schunk + if sc != NULL: + schunk = blosc2.SChunk(_schunk=PyCapsule_New(sc, "blosc2_schunk*", NULL), _is_view=True) + else: + raise RuntimeError("Cannot apply user codec without an SChunk") + rc = blosc2.ucodecs_registry[cparams.compcode][1](input, output, meta, schunk) + if rc is None: + raise RuntimeError("encoder must return the number of compressed bytes") + + return rc + + +cdef int general_decoder(const uint8_t* input_buffer, int32_t input_len, + uint8_t* output_buffer, int32_t output_len, + uint8_t meta, + blosc2_dparams *dparams, const void* chunk): + cdef int nd = 1 + cdef np.npy_intp input_dims = input_len + cdef np.npy_intp output_dims = output_len + input = np.PyArray_SimpleNewFromData(nd, &input_dims, np.NPY_UINT8, input_buffer) + output = np.PyArray_SimpleNewFromData(nd, &output_dims, np.NPY_UINT8, output_buffer) + + cdef blosc2_schunk *sc = dparams.schunk + if sc != NULL: + schunk = blosc2.SChunk(_schunk=PyCapsule_New(sc, "blosc2_schunk*", NULL), _is_view=True) + else: + raise RuntimeError("Cannot apply user codec without an SChunk") + + rc = blosc2.ucodecs_registry[sc.compcode][2](input, output, meta, schunk) + if rc is None: + raise RuntimeError("decoder must return the number of decompressed bytes") + + return rc + + +def register_codec(codec_name, id, encoder=None, decoder=None, version=1): + if id < BLOSC2_USER_REGISTERED_CODECS_START or id > BLOSC2_USER_REGISTERED_CODECS_STOP: + raise ValueError("`id` must be between ", BLOSC2_USER_REGISTERED_CODECS_START, + " and ", BLOSC2_USER_REGISTERED_CODECS_STOP) + + if (encoder is None and decoder is not None) or (encoder is not None and decoder is None): + raise ValueError("both encoder and decoder must be given, or none") + + cdef blosc2_codec codec + codec.compcode = id + codec.version = version + codec.complib = id + codec_name_ = codec_name.encode() if isinstance(codec_name, str) else codec_name + codec.compname = malloc(strlen(codec_name_) + 1) + strcpy(codec.compname, codec_name_) + if encoder is None: + codec.encoder = NULL + else: + codec.encoder = general_encoder + if decoder is None: + codec.decoder = NULL + else: + codec.decoder = general_decoder + + rc = blosc2_register_codec(&codec) + if rc < 0: + raise RuntimeError("Error while registering codec") + + if encoder and decoder: + blosc2.ucodecs_registry[id] = (codec_name, encoder, decoder) + + +cdef int general_forward(const uint8_t* input_buffer, uint8_t* output_buffer, int32_t size, + uint8_t meta, blosc2_cparams* cparams, uint8_t id): + cdef int nd = 1 + cdef np.npy_intp dims = size + input = np.PyArray_SimpleNewFromData(nd, &dims, np.NPY_UINT8, input_buffer) + output = np.PyArray_SimpleNewFromData(nd, &dims, np.NPY_UINT8, output_buffer) + + cdef blosc2_schunk *sc = cparams.schunk + if sc != NULL: + schunk = blosc2.SChunk(_schunk=PyCapsule_New(sc, "blosc2_schunk*", NULL), _is_view=True) + else: + raise RuntimeError("Cannot apply user codec without an SChunk") + blosc2.ufilters_registry[id][0](input, output, meta, schunk) + + return BLOSC2_ERROR_SUCCESS + + +cdef int general_backward(const uint8_t* input_buffer, uint8_t* output_buffer, int32_t size, + uint8_t meta, blosc2_dparams* dparams, uint8_t id): + cdef int nd = 1 + cdef np.npy_intp dims = size + input = np.PyArray_SimpleNewFromData(nd, &dims, np.NPY_UINT8, input_buffer) + output = np.PyArray_SimpleNewFromData(nd, &dims, np.NPY_UINT8, output_buffer) + + cdef blosc2_schunk *sc = dparams.schunk + if sc != NULL: + schunk = blosc2.SChunk(_schunk=PyCapsule_New(sc, "blosc2_schunk*", NULL), _is_view=True) + else: + raise RuntimeError("Cannot apply user filter without an SChunk") + + blosc2.ufilters_registry[id][1](input, output, meta, schunk) + + return BLOSC2_ERROR_SUCCESS + + +def register_filter(id, forward, backward, filter_name): + if id < BLOSC2_USER_REGISTERED_FILTERS_START or id > BLOSC2_USER_REGISTERED_FILTERS_STOP: + raise ValueError("`id` must be between ", BLOSC2_USER_REGISTERED_FILTERS_START, + " and ", BLOSC2_USER_REGISTERED_FILTERS_STOP) + if (forward is None and backward is not None) or (forward is not None and backward is None): + raise ValueError("both encoder and decoder must be given, or none") + + cdef blosc2_filter filter + filter.id = id + if forward is None: + filter.forward = NULL + else: + filter.forward = general_forward + if backward is None: + filter.backward = NULL + else: + filter.backward = general_backward + if filter_name is None and not forward and not backward: + raise ValueError("You need to pass the filter name or the forward and backward functions") + if filter_name: + filter_name_ = filter_name.encode() if isinstance(filter_name, str) else filter_name + filter.name = malloc(strlen(filter_name_) + 1) + strcpy(filter.name, filter_name_) + + rc = blosc2_register_filter(&filter) + if rc < 0: + raise RuntimeError("Error while registering filter") + if forward and backward: + blosc2.ufilters_registry[id] = (forward, backward) + +cdef _check_rc(rc, message): + if rc < 0: + raise RuntimeError(message) + + +cdef class slice_flatter: + cdef long long ndim + cdef int done + cdef long long[:] shape + cdef long long[:] start + cdef long long[:] stop + cdef long long[:] strides + cdef long long[:] indices + cdef long long current_slice_start + cdef long long current_slice_end + cdef long long current_flat_idx # Track the current flat index + + def __cinit__(self, long long[:] start not None, long long[:] stop not None, long long[:] strides not None): + self.ndim = start.shape[0] + self.done = 0 + self.start = start + self.stop = stop + self.strides = strides + self.current_slice_start = -1 + self.current_slice_end = -1 + shape = tuple(stop[i] - start[i] for i in range(self.ndim)) + self.shape = np.array(shape, dtype=np.int64) + self.indices = np.zeros(self.ndim, dtype=np.int64) + # Initialize the flat index + self.current_flat_idx = 0 + for j in range(self.ndim): + self.current_flat_idx += self.start[j] * self.strides[j] + + def __iter__(self): + return self + + @cython.boundscheck(False) + @cython.wraparound(False) + def __next__(self): + cdef long long j, next_flat_idx + cdef int extended_slice = 0 + + # Check if we're done + if self.done: + if self.current_slice_start != -1: + result = slice(self.current_slice_start, self.current_slice_end + 1) + self.current_slice_start = -1 + return result + raise StopIteration + + # Initialize first slice point if needed + if self.current_slice_start == -1: + next_flat_idx = 0 + for j in range(self.ndim): + next_flat_idx += (self.start[j] + self.indices[j]) * self.strides[j] + self.current_slice_start = next_flat_idx + self.current_slice_end = next_flat_idx + self.current_flat_idx = next_flat_idx + self.incr_indices() + + # If we're done after the first element, return it + if self.done: + result = slice(self.current_slice_start, self.current_slice_end + 1) + self.current_slice_start = -1 + return result + + # Extend slice as long as indices remain contiguous + while not self.done: + # Calculate next flat index + next_flat_idx = 0 + for j in range(self.ndim): + next_flat_idx += (self.start[j] + self.indices[j]) * self.strides[j] + + # If indices are contiguous, extend current slice + if next_flat_idx == self.current_slice_end + 1: + self.current_slice_end = next_flat_idx + self.current_flat_idx = next_flat_idx + self.incr_indices() + extended_slice = 1 + else: + # Non-contiguous index found, return current slice + result = slice(self.current_slice_start, self.current_slice_end + 1) + self.current_slice_start = next_flat_idx + self.current_slice_end = next_flat_idx + self.current_flat_idx = next_flat_idx + self.incr_indices() + return result + + # If we've reached the end after extending the slice + if extended_slice: + result = slice(self.current_slice_start, self.current_slice_end + 1) + self.current_slice_start = -1 + return result + + # Should never reach here + raise StopIteration + + @cython.boundscheck(False) + @cython.wraparound(False) + cdef void incr_indices(self) nogil: + cdef long long i + for i in range(self.ndim - 1, -1, -1): + self.indices[i] += 1 + if self.indices[i] < self.shape[i]: + break + self.indices[i] = 0 + if i == 0: + self.done = 1 + + +cdef class NDArray: + cdef b2nd_array_t* array + + def __init__(self, array, base=None): + self._dtype = None + self.array = PyCapsule_GetPointer(array, "b2nd_array_t*") + self.base = base # add reference to base if NDArray is a view + + @property + def c_array(self): + return self.array + + @property + def shape(self) -> tuple[int]: + return tuple([self.array.shape[i] for i in range(self.array.ndim)]) + + @property + def ext_shape(self): + return tuple([self.array.extshape[i] for i in range(self.array.ndim)]) + + @property + def chunks(self): + return tuple([self.array.chunkshape[i] for i in range(self.array.ndim)]) + + @property + def ext_chunks(self): + return tuple([self.array.extchunkshape[i] for i in range(self.array.ndim)]) + + @property + def blocks(self): + return tuple([self.array.blockshape[i] for i in range(self.array.ndim)]) + + @property + def ndim(self): + return self.array.ndim + + @property + def size(self): + return self.array.nitems + + @property + def chunksize(self): + return self.array.chunknitems * self.array.sc.typesize + + @property + def dtype(self): + if self._dtype is not None: + return self._dtype + + # Not in cache yet + if self.array.dtype == NULL: + return np.dtype(f"S{self.array.sc.typesize}") + if self.array.dtype_format != B2ND_DEFAULT_DTYPE_FORMAT: + raise ValueError("Only NumPy dtypes are supported") + cdef char *bytes_dtype = self.array.dtype + str_dtype = bytes_dtype.decode("utf-8") + try: + dtype = np.dtype(str_dtype) + except (ValueError, TypeError): + dtype = np.dtype(ast.literal_eval(str_dtype)) + self._dtype = dtype + return dtype + + def get_slice_numpy(self, arr, key): + start, stop = key + + cdef int64_t[B2ND_MAX_DIM] start_, stop_ + cdef int64_t[B2ND_MAX_DIM] buffershape_ + for i in range(self.ndim): + start_[i] = start[i] + stop_[i] = stop[i] + buffershape_[i] = stop_[i] - start_[i] + + cdef Py_buffer view + PyObject_GetBuffer(arr, &view, PyBUF_SIMPLE) + _check_rc(b2nd_get_slice_cbuffer(self.array, start_, stop_, + view.buf, buffershape_, view.len), + "Error while getting the buffer") + PyBuffer_Release(&view) + + return arr + + def get_oindex_numpy(self, arr, key): + """ + Orthogonal indexing. Key is a tuple of lists of integer indices. + """ + if len(key) != self.array.ndim: + raise ValueError(f"Key must have {self.array.ndim} dimensions, got {len(key)}.") + cdef int64_t[B2ND_MAX_DIM] buffershape_ + cdef int64_t** key_ + cdef int64_t buffersize_ = self.array.sc.typesize + cdef int64_t[B2ND_MAX_DIM] sel_size + + key_ = malloc(len(key) * sizeof(int64_t *)) + + for i in range(self.array.ndim): + buffershape_[i] = len(key[i]) + buffersize_ *= buffershape_[i] + sel_size[i] = len(key[i]) + key_[i] = malloc(sel_size[i] * sizeof(int64_t)) + for j in range(len(key[i])): + key_[i][j] = key[i][j] + + cdef Py_buffer buf + PyObject_GetBuffer(arr, &buf, PyBUF_SIMPLE) + + _check_rc(b2nd_get_orthogonal_selection(self.array, key_, sel_size, buf.buf, + buffershape_, buffersize_), "Error while getting orthogonal selection") + PyBuffer_Release(&buf) + for i in range(len(key)): + free(key_[i]) # Free the allocated memory for each key + free(key_) + return arr + + def set_oindex_numpy(self, key, arr): + """ + Orthogonal indexing. Set elements of self with arr using key. + """ + if len(key) != self.array.ndim: + raise ValueError(f"Key must have {self.array.ndim} dimensions, got {len(key)}.") + cdef int64_t[B2ND_MAX_DIM] buffershape_ + cdef int64_t** key_ + cdef int64_t buffersize_ = self.array.sc.typesize + cdef int64_t[B2ND_MAX_DIM] sel_size + + key_ = malloc(len(key) * sizeof(int64_t *)) + + for i in range(self.array.ndim): + buffershape_[i] = len(key[i]) + buffersize_ *= buffershape_[i] + sel_size[i] = len(key[i]) + key_[i] = malloc(sel_size[i] * sizeof(int64_t)) + for j in range(len(key[i])): + key_[i][j] = key[i][j] + + cdef Py_buffer buf + PyObject_GetBuffer(arr, &buf, PyBUF_SIMPLE) + + _check_rc(b2nd_set_orthogonal_selection(self.array, key_, sel_size, buf.buf, + buffershape_, buffersize_), "Error while getting orthogonal selection") + PyBuffer_Release(&buf) + for i in range(len(key)): + free(key_[i]) # Free the allocated memory for each key + free(key_) + return arr + + + def get_slice(self, key, mask, **kwargs): + start, stop = key + shape = tuple(sp - st for sp, st in zip(stop, start)) + chunks = kwargs.pop("chunks", None) + blocks = kwargs.pop("blocks", None) + if blocks and len(shape) != len(blocks): + for i in range(len(shape)): + if shape[i] == 1: + blocks.insert(i, 1) + if chunks and len(shape) != len(chunks): + for i in range(len(shape)): + if shape[i] == 1: + chunks.insert(i, 1) + chunks, blocks = blosc2.compute_chunks_blocks(shape, chunks, blocks, self.dtype) + + # shape will be overwritten by get_slice + cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, + self.dtype, kwargs) + if ctx == NULL: + raise RuntimeError("Error while creating the context") + ndim = self.ndim + cdef int64_t[B2ND_MAX_DIM] start_, stop_ + for i in range(ndim): + start_[i] = start[i] + stop_[i] = stop[i] + + cdef b2nd_array_t *array + _check_rc(b2nd_get_slice(ctx, &array, self.array, start_, stop_), + "Error while getting the slice") + _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") + + cdef c_bool mask_[B2ND_MAX_DIM] + for i in range(ndim): + mask_[i] = mask[i] + _check_rc(b2nd_squeeze_index(array, &array, mask_), "Error while squeezing sliced array") + ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) + + + return ndarray + + def set_slice(self, key, ndarray): + ndim = self.ndim + start, stop = key + cdef Py_buffer buf + PyObject_GetBuffer(ndarray, &buf, PyBUF_SIMPLE) + + cdef int64_t[B2ND_MAX_DIM] buffershape_, start_, stop_ + for i in range(ndim): + start_[i] = start[i] + stop_[i] = stop[i] + buffershape_[i] = stop[i] - start[i] + + _check_rc(b2nd_set_slice_cbuffer(buf.buf, buffershape_, buf.len, start_, stop_, self.array), + "Error while setting the slice") + PyBuffer_Release(&buf) + + return self + + def tobytes(self): + buffersize = self.size * self.array.sc.typesize + buffer = bytes(buffersize) + _check_rc(b2nd_to_cbuffer(self.array, buffer, buffersize), + "Error while filling the buffer") + + return buffer + + def to_cframe(self): + cdef c_bool needs_free + cdef uint8_t *cframe + cdef int64_t cframe_len; + cdef int rc; + rc = b2nd_to_cframe(self.array, &cframe, &cframe_len, &needs_free) + if rc < 0: + raise RuntimeError("Error while getting the cframe") + out = PyBytes_FromStringAndSize(cframe, cframe_len) + if needs_free: + free(cframe) + + return out + + def copy(self, dtype, **kwargs): + chunks = kwargs.pop("chunks", self.chunks) + blocks = kwargs.pop("blocks", self.blocks) + kwargs["contiguous"] = kwargs.get("contiguous", self.array.sc.storage.contiguous) + + chunks, blocks = blosc2.compute_chunks_blocks(self.shape, chunks, blocks, dtype, **kwargs) + cdef b2nd_context_t *ctx = create_b2nd_context(self.shape, chunks, blocks, dtype, kwargs) + if ctx == NULL: + raise RuntimeError("Error while creating the context") + + cdef b2nd_array_t *array + _check_rc(b2nd_copy(ctx, self.array, &array), + "Error while copying the array") + + ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) + _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") + + return ndarray + + def resize(self, new_shape): + cdef int64_t new_shape_[B2ND_MAX_DIM] + for i, s in enumerate(new_shape): + new_shape_[i] = s + _check_rc(b2nd_resize(self.array, new_shape_, NULL), + "Error while resizing the array") + + def as_ffi_ptr(self): + return PyCapsule_New(self.array, "b2nd_array_t*", NULL) + + cdef udf_udata *_fill_udf_udata(self, func_id, inputs): + cdef udf_udata *udata = malloc(sizeof(udf_udata)) + udata.py_func = malloc(strlen(func_id) + 1) + strcpy(udata.py_func, func_id) + udata.inputs_id = id(inputs) + udata.output_cdtype = np.dtype(self.dtype).num + udata.array = self.array + # Save these in udf_udata to avoid computing them for each block + for i in range(self.array.ndim): + udata.chunks_in_array[i] = udata.array.extshape[i] // udata.array.chunkshape[i] + udata.blocks_in_chunk[i] = udata.array.extchunkshape[i] // udata.array.blockshape[i] + + return udata + + cdef me_udata *_fill_me_udata(self, inputs, fp_accuracy, aux_reduc, jit=None): + cdef me_udata *udata = malloc(sizeof(me_udata)) + operands = list(inputs.values()) + ninputs = len(operands) + cdef b2nd_array_t** inputs_ = malloc(ninputs * sizeof(b2nd_array_t*)) + for i, operand in enumerate(operands): + inputs_[i] = operand.c_array + inputs_[i].chunk_cache.nchunk = -1 + inputs_[i].chunk_cache.data = NULL + udata.inputs = inputs_ + udata.ninputs = ninputs + cdef me_eval_params* eval_params = malloc(sizeof(me_eval_params)) + eval_params.disable_simd = False + eval_params.simd_ulp_mode = ME_SIMD_ULP_3_5 if fp_accuracy == blosc2.FPAccuracy.MEDIUM else ME_SIMD_ULP_1 + if jit is None: + eval_params.jit_mode = ME_JIT_DEFAULT + elif jit: + eval_params.jit_mode = ME_JIT_ON + else: + eval_params.jit_mode = ME_JIT_OFF + udata.eval_params = eval_params + udata.array = self.array + cdef void* aux_reduc_ptr = NULL + if aux_reduc is not None: + if not isinstance(aux_reduc, np.ndarray): + raise TypeError("aux_reduc must be a NumPy array") + aux_reduc_ptr = np.PyArray_DATA( aux_reduc) + udata.aux_reduc_ptr = aux_reduc_ptr + # Save these in udf_udata to avoid computing them for each block + for i in range(self.array.ndim): + udata.chunks_in_array[i] = udata.array.extshape[i] // udata.array.chunkshape[i] + udata.blocks_in_chunk[i] = udata.array.extchunkshape[i] // udata.array.blockshape[i] + + return udata + + def _set_pref_expr(self, expression, inputs, fp_accuracy, aux_reduc=None, jit=None): + # Set prefilter for miniexpr + cdef blosc2_cparams* cparams = self.array.sc.storage.cparams + cparams.prefilter = miniexpr_prefilter + + cdef int jit_mode = ME_JIT_DEFAULT + if jit is True: + jit_mode = ME_JIT_ON + elif jit is False: + jit_mode = ME_JIT_OFF + + cdef me_udata* udata = self._fill_me_udata(inputs, fp_accuracy, aux_reduc, jit=jit) + + # Get the compiled expression handle for multi-threading + cdef Py_ssize_t n = len(inputs) + cdef me_variable* variables = malloc(sizeof(me_variable) * n) + if variables == NULL: + raise MemoryError() + cdef me_variable *var + cdef np.dtype out_np_dtype = np.dtype(self.dtype) + cdef me_dtype me_output_dtype = _me_dtype_from_numpy_dtype(out_np_dtype) + if me_output_dtype < 0: + raise TypeError(f"miniexpr does not support output dtype: {out_np_dtype}") + + cdef np.dtype operand_dtype + for i, (k, v) in enumerate(inputs.items()): + var = &variables[i] + var_name = k.encode("utf-8") if isinstance(k, str) else k + var.name = malloc(strlen(var_name) + 1) + strcpy(var.name, var_name) + operand_dtype = np.dtype(v.dtype) + var.dtype = _me_dtype_from_numpy_dtype(operand_dtype) + if var.dtype < 0: + raise TypeError(f"miniexpr does not support operand dtype '{operand_dtype}' for input '{k}'") + var.address = NULL # chunked compile: addresses provided later + var.type = 0 # auto-set to ME_VARIABLE inside compiler + var.context = NULL + var.itemsize = v.dtype.itemsize if v.dtype.num == 19 else 0 # only store item type if string + + cdef int error = 0 + cdef bytes expression_bytes + cdef str expression_display + if isinstance(expression, str): + expression_display = expression + expression_bytes = (expression).encode("utf-8") + elif isinstance(expression, bytes): + expression_bytes = expression + expression_display = (expression).decode("utf-8", "replace") + else: + expression_display = str(expression) + expression_bytes = expression_display.encode("utf-8") + cdef me_dtype = me_output_dtype + cdef me_expr *out_expr + cdef int ndims = self.array.ndim + cdef int64_t* shape = &self.array.shape[0] + cdef int32_t* chunkshape = &self.array.chunkshape[0] + cdef int32_t* blockshape = &self.array.blockshape[0] + cdef int rc = me_compile_nd_jit(expression_bytes, variables, n, me_dtype, ndims, + shape, chunkshape, blockshape, jit_mode, + &error, &out_expr) + cdef str me_error_msg = _me_compile_error_details(rc, error) + if rc == ME_COMPILE_ERR_INVALID_ARG_TYPE: + raise TypeError(f"miniexpr does not support operand or output dtype: {expression_display}; details: {me_error_msg}") + if rc != ME_COMPILE_SUCCESS: + raise NotImplementedError(f"Cannot compile expression: {expression_display}; details: {me_error_msg}") + udata.miniexpr_handle = out_expr + + # Free resources + for i in range(len(inputs)): + free(variables[i].name) + free(variables) + + cdef blosc2_prefilter_params* preparams = calloc(1, sizeof(blosc2_prefilter_params)) + preparams.user_data = udata + preparams.output_is_disposable = False if aux_reduc is None else True + cparams.preparams = preparams + _check_cparams(cparams) + + if self.array.sc.cctx != NULL: + # Freeing NULL context can lead to segmentation fault + blosc2_free_ctx(self.array.sc.cctx) + self.array.sc.cctx = blosc2_create_cctx(dereference(cparams)) + if self.array.sc.cctx == NULL: + raise RuntimeError("Could not create compression context") + + def _set_pref_udf(self, func, inputs_id): + if self.array.sc.storage.cparams.nthreads > 1: + raise AttributeError("compress `nthreads` must be 1 when assigning a prefilter") + + func_id = func.__name__ + blosc2.prefilter_funcs[func_id] = func + func_id = func_id.encode("utf-8") if isinstance(func_id, str) else func_id + + # Set prefilter + cdef blosc2_cparams* cparams = self.array.sc.storage.cparams + cparams.prefilter = general_udf_prefilter + + cdef blosc2_prefilter_params* preparams = calloc(1, sizeof(blosc2_prefilter_params)) + preparams.user_data = self._fill_udf_udata(func_id, inputs_id) + cparams.preparams = preparams + _check_cparams(cparams) + + blosc2_free_ctx(self.array.sc.cctx) + self.array.sc.cctx = blosc2_create_cctx(dereference(cparams)) + if self.array.sc.cctx == NULL: + raise RuntimeError("Could not create compression context") + + def _set_postf_udf(self, func, inputs_id): + if self.array.sc.storage.dparams.nthreads > 1: + raise AttributeError("decompress `nthreads` must be 1 when assigning a postfilter") + + func_id = func.__name__ + blosc2.postfilter_funcs[func_id] = func + func_id = func_id.encode("utf-8") if isinstance(func_id, str) else func_id + + # Set postfilter + cdef blosc2_dparams *dparams = self.array.sc.storage.dparams + dparams.postfilter = general_udf_postfilter + # Fill postparams + cdef blosc2_postfilter_params *postparams = malloc( + sizeof(blosc2_postfilter_params)) + postparams.user_data = self._fill_udf_udata(func_id,inputs_id) + dparams.postparams = postparams + _check_dparams(dparams, self.array.sc.storage.cparams) + + if self.array.sc.dctx != NULL: + # Freeing NULL context can lead to segmentation fault + blosc2_free_ctx(self.array.sc.dctx) + self.array.sc.dctx = blosc2_create_dctx(dereference(dparams)) + if self.array.sc.dctx == NULL: + raise RuntimeError("Could not create decompression context") + + def __dealloc__(self): + if self.array != NULL: + _check_rc(b2nd_free(self.array), "Error while freeing the array") + + +cdef b2nd_context_t* create_b2nd_context(shape, chunks, blocks, dtype, kwargs): + if isinstance(dtype, list) and len(dtype) > 0 and isinstance(dtype[0], tuple): + # Extract just the field names and basic dtype info + fields = [] + for field in dtype: + name = field[0] + field_dtype = field[1] + + # Handle different field formats: + # 1. ('name', ('|S10', {'h5py_encoding': 'ascii'})) - h5py style + # 2. ('name', ' 0: + # h5py nested representation with metadata dict + field_dtype = field_dtype[0] + + # Check if we have shape information as third element + if len(field) > 2 and field[2] is not None: + # Include the shape information + fields.append((name, field_dtype, field[2])) + else: + fields.append((name, field_dtype)) + + dtype = np.dtype(fields) + else: + dtype = np.dtype(dtype) + + typesize = dtype.itemsize + if 'cparams' in kwargs: + kwargs['cparams']['typesize'] = typesize + else: + kwargs['cparams'] = {'typesize': typesize} # last filter is shuffle + if isinstance(dtype, np.dtypes.StrDType) or dtype == np.str_: + kwargs['cparams']['filters'] = [blosc2.Filter.NOFILTER] * 5 + [blosc2.Filter.SHUFFLE] + kwargs['cparams']['filters_meta'] = [0] * 5 + [4] # unicode char bytesize + if dtype.kind == 'V': + str_dtype = str(dtype) + else: + str_dtype = dtype.str + str_dtype = str_dtype.encode("utf-8") if isinstance(str_dtype, str) else str_dtype + + urlpath = kwargs.get("urlpath") + if 'contiguous' not in kwargs: + # Make contiguous true for disk, else sparse (for in-memory performance) + kwargs['contiguous'] = False if urlpath is None else True + + if urlpath is not None: + if isinstance(urlpath, pathlib.PurePath): + urlpath = str(urlpath) + _urlpath = urlpath.encode() if isinstance(urlpath, str) else urlpath + kwargs["urlpath"] = _urlpath + + if kwargs.get("mmap_mode") is not None: + kwargs["mode"] = mode_from_mmap_mode(kwargs["mmap_mode"]) + + mode = kwargs.get("mode", "a") + if kwargs is not None: + if mode == "w": + blosc2.remove_urlpath(urlpath) + elif mode == "r" and urlpath is not None: + raise ValueError("NDArray must already exist") + + # Create storage + cdef blosc2_storage storage + cdef blosc2_cparams *cparams = malloc(sizeof(blosc2_cparams)) + cdef blosc2_dparams *dparams = malloc(sizeof(blosc2_dparams)) + storage.cparams = cparams + storage.dparams = dparams + create_storage(&storage, kwargs) + + # Shapes + ndim = len(shape) + cdef int64_t[B2ND_MAX_DIM] shape_ + cdef int32_t[B2ND_MAX_DIM] chunkshape + cdef int32_t[B2ND_MAX_DIM] blockshape + for i in range(ndim): + chunkshape[i] = chunks[i] + blockshape[i] = blocks[i] + shape_[i] = shape[i] + + # Metalayers + meta = kwargs.get('meta', None) + cdef blosc2_metalayer[B2ND_MAX_METALAYERS] metalayers + + if meta is None: + return b2nd_create_ctx(&storage, len(shape), shape_, chunkshape, blockshape, str_dtype, + B2ND_DEFAULT_DTYPE_FORMAT, NULL, 0) + else: + nmetalayers = len(meta) + for i, (name, content) in enumerate(meta.items()): + name2 = name.encode("utf-8") if isinstance(name, str) else name # do a copy + metalayers[i].name = strdup(name2) + content = packb(content, default=encode_tuple, strict_types=True, use_bin_type=True) + metalayers[i].content = malloc(len(content)) + memcpy(metalayers[i].content, content, len(content)) + metalayers[i].content_len = len(content) + + return b2nd_create_ctx(&storage, len(shape), shape_, chunkshape, blockshape, str_dtype, + B2ND_DEFAULT_DTYPE_FORMAT, metalayers, nmetalayers) + + +def uninit(shape, chunks, blocks, dtype, **kwargs): + cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) + if ctx == NULL: + raise RuntimeError("Error while creating the context") + + cdef b2nd_array_t *array + _check_rc(b2nd_uninit(ctx, &array), "Could not build uninit array") + _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") + ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) + ndarray.schunk.mode = kwargs.get("mode", "a") + + return ndarray + + +def nans(shape, chunks, blocks, dtype, **kwargs): + cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) + if ctx == NULL: + raise RuntimeError("Error while creating the context") + + cdef b2nd_array_t *array + _check_rc(b2nd_nans(ctx, &array), "Could not build nans array") + _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") + ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) + ndarray.schunk.mode = kwargs.get("mode", "a") + + return ndarray + + +def empty(shape, chunks, blocks, dtype, **kwargs): + cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) + if ctx == NULL: + raise RuntimeError("Error while creating the context") + + cdef b2nd_array_t *array + _check_rc(b2nd_empty(ctx, &array), "Could not build empty array") + _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") + ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) + ndarray.schunk.mode = kwargs.get("mode", "a") + + return ndarray + + +def zeros(shape, chunks, blocks, dtype, **kwargs): + cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) + if ctx == NULL: + raise RuntimeError("Error while creating the context") + + cdef b2nd_array_t *array + _check_rc(b2nd_zeros(ctx, &array), "Could not build zeros array") + ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) + _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") + ndarray.schunk.mode = kwargs.get("mode", "a") + + return ndarray + + +def full(shape, chunks, blocks, fill_value, dtype, **kwargs): + cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) + if ctx == NULL: + raise RuntimeError("Error while creating the context") + + dtype = np.dtype(dtype) + nparr = np.array([fill_value], dtype=dtype) + cdef Py_buffer val + PyObject_GetBuffer(nparr, &val, PyBUF_SIMPLE) + + cdef b2nd_array_t *array + _check_rc(b2nd_full(ctx, &array, val.buf), "Could not create full array") + PyBuffer_Release(&val) + + ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) + _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") + ndarray.schunk.mode = kwargs.get("mode", "a") + + return ndarray + + +def from_buffer(buf, shape, chunks, blocks, dtype, **kwargs): + cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) + if ctx == NULL: + raise RuntimeError("Error while creating the context") + + cdef b2nd_array_t *array + _check_rc(b2nd_from_cbuffer(ctx, &array, buf, len(buf)), + "Error while creating the NDArray") + ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) + _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") + ndarray.schunk.mode = kwargs.get("mode", "a") + + return ndarray + + +def asarray(ndarray, chunks, blocks, **kwargs): + interface = ndarray.__array_interface__ + cdef Py_buffer buf + PyObject_GetBuffer(ndarray, &buf, PyBUF_SIMPLE) + + shape = interface["shape"] + dtype = interface["typestr"] + if dtype.startswith("|V") and "descr" in interface: + # Structured dtype + dtype = interface["descr"] + cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) + if ctx == NULL: + raise RuntimeError("Error while creating the context") + + cdef b2nd_array_t *array + _check_rc(b2nd_from_cbuffer(ctx, &array, buf.buf, buf.len), + "Error while creating the NDArray") + PyBuffer_Release(&buf) + ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) + _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") + ndarray.schunk.mode = kwargs.get("mode", "a") + + return ndarray + +def array_from_ffi_ptr(array_ptr): + array = PyCapsule_GetPointer(array_ptr, "b2nd_array_t*") + return blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=array_ptr) + +def ndarray_from_cframe(cframe, copy=False): + cdef Py_buffer buf + PyObject_GetBuffer(cframe, &buf, PyBUF_SIMPLE) + cdef b2nd_array_t *array + cdef int rc + rc = b2nd_from_cframe(buf.buf, buf.len, copy, &array) + if rc < 0: + raise RuntimeError("Could not get the NDArray from the cframe") + ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) + + PyBuffer_Release(&buf) + if not copy: + ndarray._schunk._avoid_cframe_free(True) + return ndarray + + +def array_get_slice_nchunks(array: NDArray, key): + start, stop = key + cdef int64_t[B2ND_MAX_DIM] start_, stop_ + for i in range(array.ndim): + start_[i] = start[i] + stop_[i] = stop[i] + cdef int64_t *chunks_idx + rc = blosc2_get_slice_nchunks(array.array.sc, start_, stop_, &chunks_idx) + _check_rc(rc, "Error while getting the chunk indexes") + res = np.empty(rc, dtype=np.int64) + for i in range(rc): + res[i] = chunks_idx[i] + free(chunks_idx) + return res + + +def schunk_get_slice_nchunks(schunk: SChunk, key): + start, stop = key + nitems = schunk.nbytes // schunk.typesize + start, stop, _ = slice(start, stop, 1).indices(nitems) + + cdef int64_t start_, stop_ + start_ = start + stop_ = stop + cdef int64_t *chunks_idx + rc = blosc2_get_slice_nchunks(schunk.schunk, &start_, &stop_, &chunks_idx) + _check_rc(rc, "Error while getting the chunk indexes") + + res = np.empty(rc, dtype=np.int64) + for i in range(rc): + res[i] = chunks_idx[i] + free(chunks_idx) + return res + + +def concat(arr1: NDArray, arr2: NDArray, axis: int, **kwargs): + """ + Concatenate two NDArray objects along a specified axis. + """ + cdef c_bool copy = kwargs.pop("copy", True) + cdef b2nd_context_t *ctx = create_b2nd_context(arr1.shape, arr1.chunks, arr1.blocks, arr1.dtype, kwargs) + if ctx == NULL: + raise RuntimeError("Error while creating the context for concatenation") + + cdef b2nd_array_t *array + _check_rc(b2nd_concatenate(ctx, arr1.array, arr2.array, axis, copy, &array), + "Error while concatenating the arrays") + _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") + + if copy: + # We have copied the concatenated data into a new array + return blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) + else: + # Return the first array, which now contains the concatenated data + return arr1 + +def expand_dims(arr1: NDArray, axis_mask: list[bool], final_dims: int) -> blosc2.NDArray: + """ + Add new dummy axis to NDArray object at specified dimension. + """ + cdef b2nd_array_t *view + cdef c_bool mask_[B2ND_MAX_DIM] + if final_dims > B2ND_MAX_DIM: + raise ValueError(f"Cannot expand dimensions beyond {B2ND_MAX_DIM} dimensions") + for i in range(final_dims): + mask_[i] = axis_mask[i] + _check_rc(b2nd_expand_dims(arr1.array, &view, mask_, final_dims),"Error while expanding the arrays") + + # create view with reference to arr1 to hold onto + new_base = arr1 if arr1.base is None else arr1.base + return blosc2.NDArray(_schunk=PyCapsule_New(view.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(view, "b2nd_array_t*", NULL), _base=new_base) + +def squeeze(arr1: NDArray, axis_mask: list[bool]) -> blosc2.NDArray: + """ + Remove axis from NDArray object at specified dimensions. + """ + cdef b2nd_array_t *view + cdef c_bool mask_[B2ND_MAX_DIM] + for i in range(arr1.ndim): + mask_[i] = axis_mask[i] + _check_rc(b2nd_squeeze_index(arr1.array, &view, mask_), "Error while squeezing array") + + # this squeezes even if not asked for by mask - may have to use in future though + # if arr1.array.shape[0] == 1 and arr1.ndim == 1: + # arr1.array.ndim = 0 + + # create view with reference to self to hold onto + new_base = arr1 if arr1.base is None else arr1.base + return blosc2.NDArray(_schunk=PyCapsule_New(view.sc, "blosc2_schunk*", NULL), + _array=PyCapsule_New(view, "b2nd_array_t*", NULL), _base=new_base) diff --git a/venv/Lib/site-packages/blosc2/c2array.py b/venv/Lib/site-packages/blosc2/c2array.py new file mode 100644 index 0000000..e8556ba --- /dev/null +++ b/venv/Lib/site-packages/blosc2/c2array.py @@ -0,0 +1,465 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from __future__ import annotations + +import os +from contextlib import contextmanager +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Sequence + +import numpy as np +import requests + +import blosc2 +from blosc2.info import InfoReporter + +_subscriber_data = { + "urlbase": os.environ.get("BLOSC_C2URLBASE"), + "auth_token": "", +} +"""Caterva2 subscriber data saved by context manager.""" + +TIMEOUT = 15 +"""Default timeout for HTTP requests.""" + + +@contextmanager +def c2context( + *, + urlbase: (str | None) = None, + username: (str | None) = None, + password: (str | None) = None, + auth_token: (str | None) = None, +) -> None: + """ + Context manager that sets parameters in Caterva2 subscriber requests. + + A parameter not specified or set to ``None`` will inherit the value from the + previous context manager, defaulting to an environment variable (see + below) if supported by that parameter. Parameters set to an empty string + will not be used in requests (without a default either). + + If the subscriber requires authorization for requests, you can either + provide an `auth_token` (which you should have obtained previously from the + subscriber), or both `username` and `password` to obtain the token by + logging in to the subscriber. The token will be reused until it is explicitly + reset or requested again in a later context manager invocation. + + Please note that this manager is reentrant but not safe for concurrent use. + + Parameters + ---------- + urlbase : str | None + The base URL to be used when a C2Array instance does not have a subscriber + URL base set. If not specified, it defaults to the value of the + ``BLOSC_C2URLBASE`` environment variable. + username : str | None + The username for logging in to the subscriber to obtain an authorization token. + If not specified, it defaults to the value of the ``BLOSC_C2USERNAME`` environment variable. + password : str | None + The password for logging in to the subscriber to obtain an authorization token. + If not specified, it defaults to the value of the ``BLOSC_C2PASSWORD`` environment variable. + auth_token : str | None + The authorization token to be used when a C2Array instance does not have an + authorization token set. + + Yields + ------ + out: None + + """ + global _subscriber_data + print("_subscriber_data", _subscriber_data) + + # Perform login to get an authorization token. + if not auth_token: + username = username or os.environ.get("BLOSC_C2USERNAME") + password = password or os.environ.get("BLOSC_C2PASSWORD") + if username or password: + if auth_token: + raise ValueError("Either provide a username/password or an authorization token") + auth_token = login(username, password, urlbase) + + try: + old_sub_data = _subscriber_data + new_sub_data = old_sub_data.copy() # inherit old values + if urlbase is not None: + new_sub_data["urlbase"] = urlbase + elif old_sub_data["urlbase"] is None: + # The variable may have gotten a value after program start. + new_sub_data["urlbase"] = os.environ.get("BLOSC_C2URLBASE") + if auth_token is not None: + new_sub_data["auth_token"] = auth_token + _subscriber_data = new_sub_data + yield + finally: + _subscriber_data = old_sub_data + + +def _xget(url, params=None, headers=None, auth_token=None, timeout=TIMEOUT): + auth_token = auth_token or _subscriber_data["auth_token"] + if auth_token: + headers = headers.copy() if headers else {} + headers["Cookie"] = auth_token + response = requests.get(url, params=params, headers=headers, timeout=timeout) + response.raise_for_status() + return response + + +def _xpost(url, json=None, auth_token=None, timeout=TIMEOUT): + auth_token = auth_token or _subscriber_data["auth_token"] + headers = {"Cookie": auth_token} if auth_token else None + response = requests.post(url, json=json, headers=headers, timeout=timeout) + response.raise_for_status() + return response.json() + + +def _sub_url(urlbase, path): + urlbase = urlbase or _subscriber_data["urlbase"] + if not urlbase: + raise RuntimeError("No default Caterva2 subscriber set") + return f"{urlbase}{path}" if urlbase.endswith("/") else f"{urlbase}/{path}" + + +def login(username, password, urlbase): + url = _sub_url(urlbase, "auth/jwt/login") + creds = {"username": username, "password": password} + resp = requests.post(url, data=creds, timeout=TIMEOUT) + resp.raise_for_status() + return "=".join(list(resp.cookies.items())[0]) + + +def info(path, urlbase, params=None, headers=None, model=None, auth_token=None): + url = _sub_url(urlbase, f"api/info/{path}") + response = _xget(url, params, headers, auth_token) + json = response.json() + return json if model is None else model(**json) + + +def fetch_data(path, urlbase, params, auth_token=None, as_blosc2=False): + url = _sub_url(urlbase, f"api/fetch/{path}") + response = _xget(url, params=params, auth_token=auth_token) + data = response.content + # Try different deserialization methods + try: + data = blosc2.ndarray_from_cframe(data) + except RuntimeError: + data = blosc2.schunk_from_cframe(data) + if as_blosc2: + return data + if hasattr(data, "ndim"): # if b2nd or b2frame + # catch 0d case where [:] fails + return data[()] if data.ndim == 0 else data[:] + else: + return data[:] + + +def slice_to_string(slice_): + if slice_ is None or slice_ == () or slice_ == slice(None): + return "" + slice_parts = [] + if not isinstance(slice_, tuple): + slice_ = (slice_,) + for index in slice_: + if isinstance(index, int): + slice_parts.append(str(index)) + elif isinstance(index, slice): + start = index.start or "" + stop = index.stop or "" + if index.step not in (1, None): + raise IndexError("Only step=1 is supported") + # step = index.step or '' + slice_parts.append(f"{start}:{stop}") + return ", ".join(slice_parts) + + +class C2Array(blosc2.Operand): + def __init__(self, path: str, /, urlbase: str | None = None, auth_token: str | None = None): + """Create an instance of a remote NDArray. + + Remote NDArrays can be accessed via HTTP from a Caterva2 server + (e.g., https://cat2.cloud). More information about Caterva2 at: + https://ironarray.io/caterva2. + + Parameters + ---------- + path: str + The path to the remote NDArray file (root + file path) as + a posix path. + urlbase: str + The base URL (slash-terminated) of the subscriber to query. + auth_token: str + An optional token to authorize requests via HTTP. Currently, it + will be sent as an HTTP cookie. + + Returns + ------- + out: C2Array + + Examples + -------- + >>> import blosc2 + >>> urlbase = "https://cat2.cloud/demo" + >>> path = "@public/examples/dir1/ds-3d.b2nd" + >>> remote_array = blosc2.C2Array(path, urlbase=urlbase) + >>> remote_array.shape + (3, 4, 5) + >>> remote_array.chunks + (2, 3, 4) + >>> remote_array.blocks + (2, 2, 2) + >>> remote_array.dtype + dtype('float32') + """ + if path.startswith("/"): + raise ValueError("The path should start with a root name, not a slash") + self.path = path + + if urlbase and not urlbase.endswith("/"): + urlbase += "/" + self.urlbase = urlbase + + self.auth_token = auth_token + + # Try to 'open' the remote path + try: + self.meta = info(self.path, self.urlbase, auth_token=self.auth_token) + except requests.HTTPError as err: + raise FileNotFoundError(f"Remote path not found: {path}.\nError was: {err}") from err + cparams = self.meta["schunk"]["cparams"] + # Remove "filters, meta" from cparams; this is an artifact from the server + cparams.pop("filters, meta", None) + self._cparams = blosc2.CParams(**cparams) + + def __getitem__(self, slice_: int | slice | Sequence[slice]) -> np.ndarray: + """ + Get a slice of the array (returning NumPy array). + + Parameters + ---------- + slice_ : int, slice, tuple of ints and slices, or None + The slice to fetch. + + Returns + ------- + out: numpy.ndarray + A numpy.ndarray containing the data slice. + + Examples + -------- + >>> import blosc2 + >>> urlbase = "https://cat2.cloud/demo" + >>> path = "@public/examples/dir1/ds-2d.b2nd" + >>> remote_array = blosc2.C2Array(path, urlbase=urlbase) + >>> data_slice = remote_array[3:5, 1:4] + >>> data_slice.shape + (2, 3) + >>> data_slice[:] + array([[61, 62, 63], + [81, 82, 83]], dtype=uint16) + """ + slice_ = slice_to_string(slice_) + return fetch_data( + self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token, as_blosc2=False + ) + + def slice(self, slice_: int | slice | Sequence[slice]) -> blosc2.NDArray: + """ + Get a slice of the array (returning blosc2 NDArray array). + + Parameters + ---------- + slice_ : int, slice, tuple of ints and slices, or None + The slice to fetch. + + Returns + ------- + out: blosc2.NDArray + A blosc2.NDArray containing the data slice. + + Examples + -------- + >>> import blosc2 + >>> urlbase = "https://cat2.cloud/demo" + >>> path = "@public/examples/dir1/ds-2d.b2nd" + >>> remote_array = blosc2.C2Array(path, urlbase=urlbase) + >>> data_slice = remote_array.slice((slice(3,5), slice(1,4))) + >>> data_slice.shape + (2, 3) + >>> type(data_slice) + blosc2.ndarray.NDArray + """ + slice_ = slice_to_string(slice_) + return fetch_data( + self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token, as_blosc2=True + ) + + def __len__(self) -> int: + """Returns the length of the first dimension of the array. + This is equivalent to ``self.shape[0]``. + """ + return self.shape[0] + + def get_chunk(self, nchunk: int) -> bytes: + """ + Get the compressed unidimensional chunk of a :ref:`C2Array`. + + Parameters + ---------- + nchunk: int + The index of the unidimensional chunk to retrieve. + + Returns + ------- + out: bytes + The requested compressed chunk. + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> urlbase = "https://cat2.cloud/demo" + >>> path = "@public/examples/dir1/ds-3d.b2nd" + >>> a = blosc2.C2Array(path, urlbase) + >>> # Get the compressed chunk from array 'a' for index 0 + >>> compressed_chunk = a.get_chunk(0) + >>> f"Size of chunk {0} from a: {len(compressed_chunk)} bytes" + Size of chunk 0 from a: 160 bytes + >>> # Decompress the chunk and convert it to a NumPy array + >>> decompressed_chunk = blosc2.decompress(compressed_chunk) + >>> np.frombuffer(decompressed_chunk, dtype=a.dtype) + array([ 0., 1., 5., 6., 20., 21., 25., 26., 2., 3., 7., 8., 22., + 23., 27., 28., 10., 11., 0., 0., 30., 31., 0., 0., 12., 13., + 0., 0., 32., 33., 0., 0.], dtype=float32) + """ + url = _sub_url(self.urlbase, f"api/chunk/{self.path}") + params = {"nchunk": nchunk} + response = _xget(url, params=params, auth_token=self.auth_token) + return response.content + + @property + def shape(self) -> tuple[int]: + """The shape of the remote array""" + return tuple(self.meta["shape"]) + + @property + def chunks(self) -> tuple[int]: + """The chunks of the remote array""" + return tuple(self.meta["chunks"]) + + @property + def blocks(self) -> tuple[int]: + """The blocks of the remote array""" + return tuple(self.meta["blocks"]) + + @property + def dtype(self) -> np.dtype: + """The dtype of the remote array""" + return np.dtype(self.meta["dtype"]) + + @property + def cparams(self) -> blosc2.CParams: + """The compression parameters of the remote array""" + return self._cparams + + @property + def nbytes(self) -> int: + """The number of bytes of the remote array""" + return self.meta["schunk"]["nbytes"] + + @property + def cbytes(self) -> int: + """The number of compressed bytes of the remote array""" + return self.meta["schunk"]["cbytes"] + + @property + def cratio(self) -> float: + """The compression ratio of the remote array""" + return self.meta["schunk"]["cratio"] + + # TODO: Add these to SChunk model in srv_utils and then access them here + # @property + # def dparams(self) -> float: + # """The dparams of the remote array""" + # return + # + # @property + # def meta(self) -> float: + # """The meta of the remote array""" + # return + + # TODO: This seems to cause problems for proxy sources (see tests/ndarray/test_proxy_c2array.py::test_open) + # @property + # def urlpath(self) -> str: + # """The URL path of the remote array""" + # return self.meta["schunk"]["urlpath"] + + @property + def vlmeta(self) -> dict: + """The variable-length metadata f the remote array""" + return self.meta["schunk"]["vlmeta"] + + @property + def info(self) -> InfoReporter: + """ + Print information about this remote array. + """ + return InfoReporter(self) + + @property + def info_items(self) -> list: + """A list of tuples with the information about the remote array. + Each tuple contains the name of the attribute and its value. + """ + items = [] + items += [("type", f"{self.__class__.__name__}")] + items += [("shape", self.shape)] + items += [("chunks", self.chunks)] + items += [("blocks", self.blocks)] + items += [("dtype", self.dtype)] + items += [("nbytes", self.nbytes)] + items += [("cbytes", self.cbytes)] + items += [("cratio", f"{self.cratio:.2f}")] + items += [("cparams", self.cparams)] + # items += [("dparams", self.dparams)] + return items + + # TODO: Access chunksize, size, ext_chunks, etc. + # @property + # def size(self) -> int: + # """The size (in bytes) for this container.""" + # return self.cbytes + # @property + # def chunksize(self) -> int: + # """NOT the same as `SChunk.chunksize ` + # in case :attr:`chunks` is not multiple in + # each dimension of :attr:`blocks` (or equivalently, if :attr:`chunks` is + # not the same as :attr:`ext_chunks`). + # """ + # return + + @property + def blocksize(self) -> int: + """The block size (in bytes) for the remote container.""" + return self.meta["schunk"]["blocksize"] + + +class URLPath: + def __init__(self, path: str, /, urlbase: str | None = None, auth_token: str | None = None): + """ + Create an instance of a remote data file (aka :ref:`C2Array `) urlpath. + This is meant to be used in the :func:`blosc2.open` function. + + The parameters are the same as for the :meth:`C2Array.__init__`. + + """ + self.path = path + self.urlbase = urlbase + self.auth_token = auth_token diff --git a/venv/Lib/site-packages/blosc2/core.py b/venv/Lib/site-packages/blosc2/core.py new file mode 100644 index 0000000..4ec139c --- /dev/null +++ b/venv/Lib/site-packages/blosc2/core.py @@ -0,0 +1,2103 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Avoid checking the name of type annotations at run time +from __future__ import annotations + +import copy +import ctypes +import ctypes.util +import json +import math +import os +import pathlib +import pickle +import platform +import subprocess +import sys +from dataclasses import asdict +from functools import lru_cache +from typing import TYPE_CHECKING, ClassVar + +import numpy as np +import requests + +import blosc2 +from blosc2 import blosc2_ext + +if TYPE_CHECKING: + from collections.abc import Callable + + import tensorflow + import torch + +_wasm_releasegil_state = False + + +def _check_typesize(typesize): + if not 1 <= typesize <= blosc2_ext.MAX_TYPESIZE: + raise ValueError(f"typesize can only be in the 1-{blosc2_ext.MAX_TYPESIZE} range.") + + +def _check_clevel(clevel): + if not 0 <= clevel <= 9: + raise ValueError("clevel can only be in the 0-9 range.") + + +def _check_input_length(input_name, input_len, typesize, _ignore_multiple_size=False): + if input_len > blosc2_ext.MAX_BUFFERSIZE: + raise ValueError(f"{input_name} cannot be larger than {blosc2_ext.MAX_BUFFERSIZE} bytes") + if not _ignore_multiple_size and input_len % typesize != 0: + raise ValueError(f"len({input_name}) can only be a multiple of typesize ({typesize}).") + + +def _check_filter(filter): + if filter not in blosc2.Filter: + raise ValueError(f"filter can only be one of: {blosc2.Filter.keys()}") + + +def _check_codec(codec): + if codec not in blosc2.Codec: + raise ValueError(f"codec can only be one of: {codecs}, not '{codec}'") + + +def compress( + src: object, + typesize: int = 8, + clevel: int = 1, + filter: blosc2.Filter = blosc2.Filter.SHUFFLE, + codec: blosc2.Codec = blosc2.Codec.ZSTD, + _ignore_multiple_size: bool = False, +) -> str | bytes: + """Compress the given source data with specified parameters. + + Parameters + ---------- + src: bytes-like object + The data to be compressed. It must support the buffer interface. + typesize: int (optional) from 1 to 255 + The data type size. The default is 8, or `src.itemsize` if it exists. + clevel: int (optional) + The compression level from 0 (no compression) to 9 + (maximum compression). The default is 9. + filter: :class:`Filter` (optional) + The filter to be activated. The + default is :py:obj:`Filter.SHUFFLE `. + codec: :class:`Codec` (optional) + The compressor used internally in Blosc. The default is :py:obj:`Codec.BLOSCLZ `. + _ignore_multiple_size : bool (optional) + If True, ignores the requirement that the length of `src` must be a multiple of `typesize`. + + Returns + ------- + out: str or bytes + The compressed data in as a Python str or bytes object. + + Raises + ------ + TypeError + If :paramref:`src` doesn't support the buffer interface. + ValueError + If :paramref:`src` is too long. + If :paramref:`typesize` is not within the allowed range. + If :paramref:`clevel` is not within the allowed range. + If :paramref:`codec` is not within the supported compressors. + + Notes + ----- + The `cname` and `shuffle` parameters in python-blosc API have been replaced by :paramref:`codec` and + :paramref:`filter` respectively. + To set :paramref:`codec` and :paramref:`filter`, use the enumerations :class:`Codec` and :class:`Filter` + instead of the python-blosc API variables like `blosc.SHUFFLE` for :paramref:`filter` + or strings like "blosclz" for :paramref:`codec`. + + This function only can deal with data < 2 GB. If you want to compress + larger buffers, you should use the :class:`~blosc2.SChunk` class or, if you want to save + large arrays/tensors, the :func:`~blosc2.pack_tensor` function can be handier. + + Examples + -------- + >>> import array, sys + >>> a = array.array('i', range(1000*1000)) + >>> a_bytesobj = a.tobytes() + >>> c_bytesobj = blosc2.compress(a_bytesobj, typesize=4) + >>> len(c_bytesobj) < len(a_bytesobj) + True + + See also + -------- + :func:`~blosc2.decompress` + :func:`~blosc2.pack_tensor` + :class:`~blosc2.SChunk` + """ + len_src = len(src) + if hasattr(src, "itemsize"): + if typesize is None: + typesize = src.itemsize + len_src *= src.itemsize + else: + # Let's not guess the typesize for non NumPy objects + if typesize is None: + typesize = 1 + _check_clevel(clevel) + _check_typesize(typesize) + _check_filter(filter) + _check_input_length("src", len_src, typesize, _ignore_multiple_size=_ignore_multiple_size) + return blosc2_ext.compress(src, typesize, clevel, filter, codec) + + +def decompress( + src: object, dst: object | bytearray = None, as_bytearray: bool = False +) -> str | bytes | bytearray | None: + """Decompresses a bytes-like compressed object. + + Parameters + ---------- + src: bytes-like object + The data to be decompressed. Must be a bytes-like object + that supports the Python Buffer Protocol, like bytes, bytearray, + memoryview, or + `numpy.ndarray `_. + dst: NumPy object or bytearray + The destination NumPy object or bytearray to fill, + the length of which must be greater than 0. + The user must ensure it has enough capacity to hold + the decompressed data. + Default is None, meaning that a new `bytes` or `bytearray` object + is created, filled and returned. + as_bytearray: bool (optional) + If True, then return type will be a bytearray object + instead of a bytes object. + + Returns + ------- + out: str or bytes or bytearray + If :paramref:`dst` is `None`, the decompressed data will be returned as a Python str or bytes object. + If as_bytearray is True, the return type will be a bytearray object. + + If :paramref:`dst` is not `None`, the function will return `None` because the result + will already be stored in :paramref:`dst`. + + Raises + ------ + RuntimeError + Raised if the compressed data is corrupted or the output buffer is not large enough. + Also raised if a `bytes` object could not be obtained. + TypeError + Raised if :paramref:`src` does not support the Buffer Protocol. + ValueError + Raised if the length of :paramref:`src` is smaller than the minimum required length. + Also raised if `dst` is not `None` and its length is 0. + + Examples + -------- + >>> import array, sys + >>> a = array.array('i', range(1000*1000)) + >>> a_bytesobj = a.tobytes() + >>> c_bytesobj = blosc2.compress(a_bytesobj, typesize=4) + >>> a_bytesobj2 = blosc2.decompress(c_bytesobj) + >>> a_bytesobj == a_bytesobj2 + True + >>> b"" == blosc2.decompress(blosc2.compress(b"")) + True + >>> b"1"*7 == blosc2.decompress(blosc2.compress(b"1"*7)) + True + >>> type(blosc2.decompress(blosc2.compress(b"1"*7), + ... as_bytearray=True)) is bytearray + True + >>> import numpy as np + >>> arr = np.arange(10) + >>> comp_arr = blosc2.compress(arr) + >>> dest = np.empty(arr.shape, arr.dtype) + >>> blosc2.decompress(comp_arr, dst=dest) + >>> np.array_equal(arr, dest) + True + """ + return blosc2_ext.decompress(src, dst, as_bytearray) + + +def pack( + obj: object, + clevel: int = 9, + filter: blosc2.Filter = blosc2.Filter.SHUFFLE, + codec: blosc2.Codec = blosc2.Codec.BLOSCLZ, +) -> str | bytes: + """Pack (compress) a Python object. + + Parameters + ---------- + obj: object + The Python object to be packed. It must have an `itemsize` attribute. + clevel: int (optional) + The compression level from 0 (no compression) to 9 + (maximum compression). The default is 9. + filter: :class:`Filter` (optional) + The filter to be activated. The + default is :py:obj:`Filter.SHUFFLE `. + codec: :class:`Codec` (optional) + The compressor used internally in Blosc. The default is + :py:obj:`Codec.BLOSCLZ `. + + Returns + ------- + out: str or bytes + The packed object as a Python str or bytes object. + + Raises + ------ + AttributeError + If :paramref:`obj` does not have an `itemsize` attribute. + If :paramref:`obj` does not have an `size` attribute. + ValueError + If the pickled object size is larger than the maximum allowed buffer size. + If typesize is not within the allowed range. + If :paramref:`clevel` is not within the allowed range. + If :paramref:`codec` is not within the supported compressors. + + Notes + ----- + The `cname` and `shuffle` parameters in python-blosc API have been replaced by :paramref:`codec` and + :paramref:`filter` respectively. + To set :paramref:`codec` and :paramref:`filter`, use the enumerations :class:`Codec` and :class:`Filter` + instead of the python-blosc API variables such as `blosc.SHUFFLE` for :paramref:`filter` + or strings like "blosclz" for :paramref:`codec`. + + Examples + -------- + >>> import numpy as np + >>> a = np.arange(1e6) + >>> parray = blosc2.pack(a) + >>> len(parray) < a.size * a.itemsize + True + """ + if not hasattr(obj, "itemsize"): + raise AttributeError("The object must have an itemsize attribute.") + if not hasattr(obj, "size"): + raise AttributeError("The object must have an size attribute.") + + itemsize = obj.itemsize + _check_clevel(clevel) + _check_codec(codec) + _check_typesize(itemsize) + pickled_object = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) + # The object to be compressed is pickled_object, and not obj + len_src = len(pickled_object) + _check_input_length("pickled object", len_src, itemsize, _ignore_multiple_size=True) + return compress( + pickled_object, + typesize=itemsize, + clevel=clevel, + filter=filter, + codec=codec, + _ignore_multiple_size=True, + ) + + +def unpack(packed_object: str | bytes, **kwargs: dict) -> object: + """Unpack (decompress) an object. + + Parameters + ---------- + packed_object: str or bytes + The packed object to be decompressed. + kwargs: dict, optional + Parameters that can be passed to the + `pickle.loads API `_ + + Returns + ------- + out: object + The decompressed data in form of the original object. + + Raises + ------ + TypeError + If :paramref:`packed_object` is not of type bytes or string. + + Examples + -------- + >>> import numpy as np + >>> a = np.arange(1e6) + >>> parray = blosc2.pack(a) + >>> len(parray) < a.size * a.itemsize + True + >>> a2 = blosc2.unpack(parray) + >>> np.array_equal(a, a2) + True + >>> a = np.array(['å', 'ç', 'ø']) + >>> parray = blosc2.pack(a) + >>> a2 = blosc2.unpack(parray) + >>> np.array_equal(a, a2) + True + """ + pickled_object = decompress(packed_object) + if kwargs: + obj = pickle.loads(pickled_object, **kwargs) + else: + obj = pickle.loads(pickled_object) + + return obj + + +def pack_array( + arr: np.ndarray, + clevel: int = 9, + filter: blosc2.Filter = blosc2.Filter.SHUFFLE, + codec: blosc2.Codec = blosc2.Codec.BLOSCLZ, +) -> str | bytes: + """Pack (compress) a NumPy array. It is equivalent to the pack function. + + Parameters + ---------- + arr: np.ndarray + The NumPy array to be packed. + clevel: int (optional) + The compression level from 0 (no compression) to 9 + (maximum compression). The default is 9. + filter: :class:`Filter` (optional) + The filter to be applied during compression. The + default is :py:obj:`Filter.SHUFFLE `. + codec: :class:`Codec` (optional) + The codec to be used for compression. The default is + :py:obj:`Codec.BLOSCLZ `. + + Returns + ------- + out: str or bytes + The packed array in the form of a Python str or bytes object. + + Raises + ------ + AttributeError + If :paramref:`arr` does not have an `itemsize` attribute. + If :paramref:`arr` does not have a `size` attribute. + ValueError + If typesize is not within the allowed range. + If the pickled object size is larger than the maximum allowed buffer size. + If :paramref:`clevel` is not within the allowed range. + If :paramref:`codec` is not within the supported compressors. + + See also + -------- + :func:`~blosc2.pack` + + Examples + -------- + >>> import numpy as np + >>> a = np.arange(1e6) + >>> parray = blosc2.pack_array(a) + >>> len(parray) < a.size*a.itemsize + True + """ + return pack(arr, clevel, filter, codec) + + +def unpack_array(packed_array: str | bytes, **kwargs: dict) -> np.ndarray: + """Restore a packed NumPy array. + + Parameters + ---------- + packed_array: str or bytes + The packed array to be restored. + kwargs: dict, optional + Parameters that can be passed to the + `pickle.loads API `_ + + Returns + ------- + out: ndarray + The decompressed data in form of a NumPy array. + + Raises + ------ + TypeError + If :paramref:`packed_array` is not of type bytes or string. + + Examples + -------- + >>> import numpy as np + >>> a = np.arange(1e6) + >>> parray = blosc2.pack_array(a) + >>> len(parray) < a.size*a.itemsize + True + >>> a2 = blosc2.unpack_array(parray) + >>> np.array_equal(a, a2) + True + >>> a = np.array(['å', 'ç', 'ø']) + >>> parray = blosc2.pack_array(a) + >>> a2 = blosc2.unpack_array(parray) + >>> np.array_equal(a, a2) + True + """ + pickled_array = decompress(packed_array) + if kwargs: + arr = pickle.loads(pickled_array, **kwargs) + if all(isinstance(x, bytes) for x in arr.tolist()): + arr = np.array([x.decode("utf-8") for x in arr.tolist()]) + else: + arr = pickle.loads(pickled_array) + + return arr + + +def pack_array2(arr: np.ndarray, chunksize: int | None = None, **kwargs: dict) -> bytes | int: + """Pack (compress) a NumPy array. This method is faster and does not have a 2 GB limitation. + + Parameters + ---------- + arr: np.ndarray + The NumPy array to be packed. + + chunksize: int + The size (in bytes) for the chunks during compression. If not provided, + it is computed automatically. + + kwargs: dict, optional + These are the same as the kwargs in :func:`SChunk.__init__ `. + + Returns + ------- + out: bytes | int + The serialized version (cframe) of the array. + If urlpath is provided, the number of bytes in file is returned instead. + + Examples + -------- + >>> import numpy as np + >>> a = np.arange(1e6) + >>> cframe = blosc2.pack_array2(a) + >>> len(cframe) < a.size * a.itemsize + True + + See also + -------- + :func:`~blosc2.unpack_array2` + :func:`~blosc2.save_array` + :func:`~blosc2.pack_tensor` + :func:`~blosc2.save_tensor` + """ + # May we raise a DeprecationWarning here in the future? + return pack_tensor(arr, chunksize, **kwargs) + + +def unpack_array2(cframe: bytes) -> np.ndarray: + """Unpack (decompress) a packed NumPy array from a cframe. + + Parameters + ---------- + cframe: bytes + The packed array to be restored. + + Returns + ------- + out: np.ndarray + The unpacked NumPy array. + + Raises + ------ + TypeError + If :paramref:`cframe` is not of type bytes, or not a cframe. + RunTimeError + If an error occurs during decompression. + + Examples + -------- + >>> import numpy as np + >>> a = np.arange(1e6) + >>> cframe = blosc2.pack_array2(a) + >>> len(cframe) < a.size*a.itemsize + True + >>> a2 = blosc2.unpack_array2(cframe) + >>> np.array_equal(a, a2) + True + + See also + -------- + :func:`~blosc2.pack_array2` + :func:`~blosc2.pack_tensor` + :func:`~blosc2.save_array` + :func:`~blosc2.save_tensor` + """ + # May we raise a DeprecationWarning here in the future? + return unpack_tensor(cframe) + + +def save_array(arr: np.ndarray, urlpath: str, chunksize: int | None = None, **kwargs: dict) -> int: + """Save a serialized NumPy array to a specified file path. + + Parameters + ---------- + arr: np.ndarray + The NumPy array to be saved. + + urlpath: str + The path for the file where the array will be saved. + + chunksize: int + The size (in bytes) for the chunks during compression. If not provided, + it is computed automatically. + + kwargs: dict, optional + These are the same as the kwargs in :func:`SChunk.__init__ `. + + Returns + ------- + out: int + The number of bytes of the saved array. + + Examples + -------- + >>> import numpy as np + >>> a = np.arange(1e6) + >>> serial_size = blosc2.save_array(a, "test.bl2", mode="w") + >>> serial_size < a.size * a.itemsize + True + + See also + -------- + :func:`~blosc2.load_array` + :func:`~blosc2.pack_array2` + :func:`~blosc2.save_tensor` + :func:`~blosc2.open` + """ + # May we raise a DeprecationWarning here in the future? + return pack_tensor(arr, chunksize=chunksize, urlpath=urlpath, **kwargs) + + +def load_array(urlpath: str, dparams: dict | None = None) -> np.ndarray: + """Load a serialized NumPy array from a file. + + Parameters + ---------- + urlpath: str + The path to the file containing the serialized array. + dparams: dict, optional + A dictionary with the decompression parameters, which can + be used in the :func:`~blosc2.decompress2` function. + + Returns + ------- + out: np.ndarray + The deserialized NumPy array. + + Raises + ------ + TypeError + If :paramref:`urlpath` is not in cframe format + RunTimeError + If any other error is detected. + + Examples + -------- + >>> import numpy as np + >>> a = np.arange(1e6) + >>> serial_size = blosc2.save_array(a, "test.bl2", mode="w") + >>> serial_size < a.size * a.itemsize + True + >>> a2 = blosc2.load_array("test.bl2") + >>> np.array_equal(a, a2) + True + + See also + -------- + :func:`~blosc2.save_array` + :func:`~blosc2.load_tensor` + :func:`~blosc2.pack_array2` + :func:`~blosc2.pack_tensor` + """ + # May we raise a DeprecationWarning here in the future? + return load_tensor(urlpath, dparams=dparams) + + +def pack_tensor( + tensor: tensorflow.Tensor | torch.Tensor | np.ndarray, chunksize: int | None = None, **kwargs: dict +) -> bytes | int: + """Pack (compress) a TensorFlow or PyTorch tensor or a NumPy array. + + Parameters + ---------- + tensor: tensorflow.Tensor, torch.Tensor, or np.ndarray. + The tensor or array to be packed. + + chunksize: int, optional + The size (in bytes) for the chunks during compression. If not provided, + it is computed automatically. + + kwargs: dict, optional + These are the same as the kwargs in :func:`SChunk.__init__ `. + + Returns + ------- + out: bytes | int + The serialized version (cframe) of the array. + If urlpath is provided, the number of bytes in file is returned instead. + + Notes + ----- + In case you pass a TensorFlow/PyTorch tensor, the tensor will be converted to a NumPy array + before being packed. The tensor will be restored to its original form when unpacked. + + Examples + -------- + >>> import numpy as np + >>> th = np.arange(1e6, dtype=np.float32) + >>> cframe = blosc2.pack_tensor(th) + >>> if not os.getenv("BTUNE_TRADEOFF"): + ... assert len(cframe) < th.size * th.itemsize + ... + + See also + -------- + :func:`~blosc2.unpack_tensor` + :func:`~blosc2.save_tensor` + """ + arr = np.asarray(tensor) + + schunk = blosc2.SChunk(chunksize=chunksize, data=arr, **kwargs) + + # Guess the kind of tensor / array + repr_tensor = repr(tensor) + if "tensor" in repr_tensor: + kind = "torch" + elif "Tensor" in repr_tensor: + kind = "tensorflow" + elif "array" in repr_tensor: + kind = "numpy" + else: + raise TypeError(f"Unrecognized tensor/array: {tensor!r}") + + # dtype encoding requires some care + dtype = arr.dtype.descr if arr.dtype.kind == "V" else arr.dtype.str + + schunk.vlmeta["__pack_tensor__"] = (kind, arr.shape, dtype) + + if schunk.urlpath is None: + return schunk.to_cframe() + else: + return os.stat(schunk.urlpath).st_size + + +def _unpack_tensor(schunk): + kind, shape, dtype = schunk.vlmeta["__pack_tensor__"] + out = np.empty(shape, dtype=dtype) + schunk.get_slice(out=out) + + if kind == "torch": + import torch + + th = torch.from_numpy(out) + elif kind == "tensorflow": + import tensorflow as tf + + th = tf.constant(out) + elif kind == "numpy": + th = out + else: + raise TypeError(f"Unrecognized tensor kind: {kind}") + return th + + +def unpack_tensor(cframe: bytes) -> tensorflow.Tensor | torch.Tensor | np.ndarray: + """Unpack (decompress) a packed TensorFlow or PyTorch tensor or a NumPy + array from a cframe. + + Parameters + ---------- + cframe: bytes + The packed tensor to be restored. + + Returns + ------- + out: tensorflow.Tensor, torch.Tensor, or np.ndarray + The unpacked TensorFlow or PyTorch tensor or NumPy array. + + Raises + ------ + TypeError + If :paramref:`cframe` is not of type bytes, or not a cframe. + RunTimeError + If an error occurs during decompression. + + Examples + -------- + >>> import os + >>> import numpy as np + >>> th = np.arange(1e3, dtype=np.float32) + >>> cframe = blosc2.pack_tensor(th) + >>> if not os.getenv("BTUNE_TRADEOFF"): + ... assert len(cframe) < th.size * th.itemsize + ... + >>> th2 = blosc2.unpack_tensor(cframe) + >>> a = np.asarray(th) + >>> a2 = np.asarray(th2) + >>> np.array_equal(a, a2) + True + + See also + -------- + :func:`~blosc2.pack_tensor` + :func:`~blosc2.save_tensor` + """ + schunk = blosc2.schunk_from_cframe(cframe, False) + return _unpack_tensor(schunk) + + +def save_tensor( + tensor: tensorflow.Tensor | torch.Tensor | np.ndarray, + urlpath: str, + chunksize: int | None = None, + **kwargs: dict, +) -> int: + """Save a serialized PyTorch or TensorFlow tensor or NumPy array to + a specified file path. + + Parameters + ---------- + tensor: tensorflow.Tensor, torch.Tensor, or np.ndarray + The tensor or array to be saved. + + urlpath: str + The file path where the tensor or array will be saved. + + chunksize: int + The size (in bytes) for the chunks during compression. If not provided, + it is computed automatically. + + kwargs: dict, optional + These are the same as the kwargs in :func:`SChunk.__init__ `. + + Returns + ------- + out: int + The number of bytes of the saved tensor or array. + + Examples + -------- + >>> import numpy as np + >>> th = np.arange(1e6, dtype=np.float32) + >>> serial_size = blosc2.save_tensor(th, "test.bl2", mode="w") + >>> if not os.getenv("BTUNE_TRADEOFF"): + ... assert serial_size < th.size * th.itemsize + ... + + See also + -------- + :func:`~blosc2.load_tensor` + :func:`~blosc2.pack_tensor` + :func:`~blosc2.open` + """ + return pack_tensor(tensor, chunksize=chunksize, urlpath=urlpath, **kwargs) + + +def load_tensor(urlpath: str, dparams: dict | None = None) -> tensorflow.Tensor | torch.Tensor | np.ndarray: + """Load a serialized PyTorch or TensorFlow tensor or NumPy array from a file. + + Parameters + ---------- + urlpath: str + The path to the file where the tensor or array is stored. + + dparams: dict, optional + A dictionary with the decompression parameters, which are the same as those + used in the :func:`~blosc2.decompress2` function. + + Returns + ------- + out: tensor or ndarray + The unpacked PyTorch or TensorFlow tensor or NumPy array. + + Raises + ------ + TypeError + If :paramref:`urlpath` is not in cframe format + RunTimeError + If some other problem is detected. + + Examples + -------- + >>> import numpy as np + >>> th = np.arange(1e6, dtype=np.float32) + >>> size = blosc2.save_tensor(th, "test.bl2", mode="w") + >>> if not os.getenv("BTUNE_TRADEOFF"): + ... assert size < th.size * th.itemsize + ... + >>> th2 = blosc2.load_tensor("test.bl2") + >>> np.array_equal(th, th2) + True + + See also + -------- + :func:`~blosc2.save_tensor` + :func:`~blosc2.pack_tensor` + """ + schunk = blosc2.open(urlpath, dparams=dparams) + return _unpack_tensor(schunk) + + +def set_compressor(codec: blosc2.Codec) -> int: + """Set the compressor to be used. If this function is not + called, then :py:obj:`blosc2.Codec.BLOSCLZ ` will be used by default. + + Parameters + ---------- + codec: :class:`Codec` + The compressor to be used. + + Returns + ------- + out: int + The code for the compressor (>=0). + + Raises + ------ + ValueError + If the compressor is not recognized or is not supported. + + Notes + ----- + The `compname` parameter in python-blosc API has been replaced by :paramref:`codec` , using `compname` + as parameter or a string as a :paramref:`codec` value will not work. + + See also + -------- + :func:`~blosc2.get_compressor` + :func:`~blosc2.compressor_list` + """ + return blosc2_ext.set_compressor(codec) + + +def free_resources() -> None: + """Free any temporary memory and thread resources. + + Returns + ------- + out: None + + Notes + ----- + Blosc maintain a pool of threads waiting for work as well as some + temporary space. You can use this function to release these + resources when you are not going to use Blosc for a long time. + + Examples + -------- + >>> blosc2.free_resources() + """ + blosc2_ext.free_resources() + + +def set_nthreads(nthreads: int) -> int: + """Set the number of threads to be used during Blosc operations. + + Parameters + ---------- + nthreads: int + The number of threads to be used during Blosc operations. + + Returns + ------- + out: int + The previous number of threads used. + + Raises + ------ + ValueError + If :paramref:`nthreads` is larger than the maximum number of threads Blosc can use. + If :paramref:`nthreads` is not a positive integer. + + Notes + ----- + The number of threads can also be set via the ``BLOSC_NTHREADS`` environment + variable (e.g., ``export BLOSC_NTHREADS=1``). Additionally, you may want to set + ``NUMEXPR_NUM_THREADS`` (e.g., ``export NUMEXPR_NUM_THREADS=1``) as well since + numexpr is used under the hood when performing some operations. Note that + this function only sets the number of threads used by Blosc, not the number + of threads used by numexpr. + + The maximum number of threads for Blosc is :math:`2^{31} - 1`. In some + cases, Blosc gets better results if you set the number of threads + to a value slightly below your number of cores + (via :func:`~blosc2.detect_number_of_cores`). + + Examples + -------- + Set the number of threads to 2 and then to 1: + + >>> oldn = blosc2.set_nthreads(2) + >>> blosc2.set_nthreads(1) + 2 + + See also + -------- + :attr:`~blosc2.nthreads` + """ + if blosc2.IS_WASM: + # Keep API validation semantics while forcing single-thread execution. + if nthreads > 2**31 - 1: + raise ValueError("nthreads must be less or equal than 2^31 - 1.") + if nthreads < 1: + raise ValueError("nthreads must be a positive integer.") + nthreads = 1 + + rc = blosc2_ext.set_nthreads(nthreads) + blosc2.nthreads = nthreads + return rc + + +def compressor_list(plugins: bool = False) -> list: + """ + Returns a list of compressors (codecs) available in the C library. + + Parameters + ---------- + plugins: bool + Whether to include plugins or not. + + Returns + ------- + out: list + The list of codec names. + + See also + -------- + :func:`~blosc2.get_compressor` + :func:`~blosc2.set_compressor` + + """ + cap = blosc2.GLOBAL_REGISTERED_CODECS_STOP if plugins else blosc2.DEFINED_CODECS_STOP + return [key for key in blosc2.Codec if key.value <= cap] + + +def set_blocksize(blocksize: int = 0) -> None: + """ + Force the use of a specific blocksize. + + Parameters + ---------- + blocksize: int + The blocksize to use. If 0, an automatic blocksize will be used (the default). + + Returns + ------- + out: None + + Notes + ----- + This is a low-level function and is recommended for expert users only. + + Examples + -------- + >>> blosc2.set_blocksize(512) + >>> blosc2.set_blocksize(0) + """ + blosc2_ext.set_blocksize(blocksize) + + +def clib_info(codec: blosc2.Codec) -> tuple: + """Return information about the compression libraries in the C library. + + Parameters + ---------- + codec: :class:`Codec` + The compressor. + + Returns + ------- + out: tuple + The associated library name and version. + + Notes + ----- + The `cname` parameter in python-blosc API has been replaced by :paramref:`codec` , using `cname` + as parameter or a string as a :paramref:`codec` value will not work. + """ + return blosc2_ext.clib_info(codec) + + +def get_clib(bytesobj: str | bytes) -> str: + """ + Return the name of the compression library for Blosc :paramref:`bytesobj` buffer. + + Parameters + ---------- + bytesobj: str or bytes + The compressed buffer. + + Returns + ------- + out: str + The name of the compression library. + """ + return blosc2_ext.get_clib(bytesobj).decode("utf-8") + + +def get_compressor() -> str: + """Get the current compressor used for compression. + + Returns + ------- + out: str + The name of the compressor. + + See also + -------- + :func:`~blosc2.set_compressor` + :func:`~blosc2.compressor_list` + + """ + return blosc2_ext.get_compressor().decode("utf-8") + + +def set_releasegil(gilstate: bool) -> bool: + """ + Set whether to release the Python global inter-lock (GIL) + during c-blosc compress and decompress operations or not. This defaults + to False. + + Parameters + ---------- + gilstate: bool + True to release the GIL, False to retain it. + + Returns + ------- + out: bool + The previous value of the Python global inter-lock (GIL) release state. + + Notes + ----- + Designed to be used with larger chunk sizes and a ThreadPool. There is a + small performance penalty with releasing the GIL that will more harshly + penalize small block sizes. + + Examples + -------- + >>> oldReleaseState = blosc2.set_releasegil(True) + """ + gilstate = bool(gilstate) + if blosc2.IS_WASM: + # wasm32 does not benefit from releasing the GIL and enabling this can + # lead to incorrect results in some code paths. + global _wasm_releasegil_state + oldstate = _wasm_releasegil_state + _wasm_releasegil_state = gilstate + blosc2_ext.set_releasegil(False) + return oldstate + return blosc2_ext.set_releasegil(gilstate) + + +def detect_number_of_cores() -> int: + """Detect the number of cores in this system. + + Returns + ------- + out: int + The number of cores in this system. + """ + if "count" in blosc2.cpu_info: + return blosc2.cpu_info["count"] + return 1 # Default + + +# Dictionaries for the maps between compressor names and libs +codecs = compressor_list(plugins=True) +# Map for compression libraries and versions +clib_versions = {codec.name: clib_info(codec)[1].decode("utf-8") for codec in compressor_list(plugins=False)} + + +def os_release_pretty_name(): + for p in ("/etc/os-release", "/usr/lib/os-release"): + try: + with open(p) as f: + for line in f: + name, _, value = line.rstrip().partition("=") + if name == "PRETTY_NAME": + if len(value) >= 2 and value[0] in "\"'" and value[0] == value[-1]: + value = value[1:-1] + return value + except OSError: + pass + return None + + +def print_versions(): + """Print all the versions of software that python-blosc2 relies on.""" + print("-=" * 38) + print(f"python-blosc2 version: {blosc2.__version__}") + print(f"Blosc version: {blosc2.blosclib_version}") + print(f"Codecs available (including plugins): {', '.join([codec.name for codec in codecs])}") + print("Main codec library versions:") + for clib in sorted(clib_versions.keys()): + print(f" {clib}: {clib_versions[clib]}") + print(f"NumPy version: {np.__version__}") + if not blosc2.IS_WASM: + import numexpr + + print(f"numexpr version: {numexpr.__version__}") + print(f"requests version: {requests.__version__}") + print(f"Python version: {sys.version}") + (sysname, _nodename, release, version, machine, processor) = platform.uname() + print(f"Platform: {sysname}-{release}-{machine} ({version})") + if sysname == "Linux": + distro = os_release_pretty_name() + if distro: + print(f"Linux dist: {distro}") + if blosc2.IS_WASM: + processor = "wasm32" + if not processor: + processor = "not recognized" + print(f"Processor: {processor}") + print(f"Byte-ordering: {sys.byteorder}") + # Internal Blosc threading + print(f"Detected cores: {blosc2.ncores}") + print(f"Number of threads to use by default: {blosc2.nthreads}") + print("-=" * 38) + + +def apple_silicon_cache_size(cache_level: int) -> int | None: + """Get the data cache_level size in bytes for Apple Silicon in MacOS. + + Apple Silicon has two clusters, Performance (0) and Efficiency (1). + This function returns the data cache size for the Performance cluster. + Returns None if the cache size cannot be determined. + """ + libc = ctypes.CDLL(ctypes.util.find_library("c")) + size = ctypes.c_size_t() + if cache_level == 1: + # We are interested in the L1 *data* cache size + hwcachesize = "hw.perflevel0.l1dcachesize" + else: + hwcachesize = f"hw.perflevel0.l{cache_level}cachesize" + hwcachesize = hwcachesize.encode("ascii") + libc.sysctlbyname(hwcachesize, ctypes.byref(size), ctypes.byref(ctypes.c_size_t(8)), None, 0) + return size.value if size.value > 0 else None + + +def windows_cache_size(cache_level: int) -> int | None: + """Get the data cache size in bytes for Windows. + + Semantics: + - L1: data cache only + - L2/L3: unified cache (data + instruction), as no split exists + + Returns None if the cache size cannot be determined. + """ + from ctypes import wintypes + + if cache_level not in (1, 2, 3): + return None + + # Windows constants + RelationCache = 2 + + # PROCESSOR_CACHE_TYPE enum values + CacheUnified = 0 + CacheData = 2 + + # Header structure to read Relationship and Size first + class PROCESSOR_INFO_HEADER(ctypes.Structure): + _fields_: ClassVar[list] = [ + ("Relationship", ctypes.c_int), + ("Size", ctypes.c_uint), + ] + + # Only the fields we need from CACHE_RELATIONSHIP (first 12 bytes) + class CACHE_RELATIONSHIP(ctypes.Structure): + _fields_: ClassVar[list] = [ + ("Level", ctypes.c_ubyte), + ("Associativity", ctypes.c_ubyte), + ("LineSize", ctypes.c_ushort), + ("CacheSize", ctypes.c_uint), + ("Type", ctypes.c_uint), + ] + + kernel32 = ctypes.WinDLL("kernel32", use_last_error=True) + + size = wintypes.DWORD(0) + + # Query buffer size + kernel32.GetLogicalProcessorInformationEx( + RelationCache, + None, + ctypes.byref(size), + ) + + buffer = ctypes.create_string_buffer(size.value) + + # Retrieve cache info + kernel32.GetLogicalProcessorInformationEx( + RelationCache, + buffer, + ctypes.byref(size), + ) + + offset = 0 + header_size = ctypes.sizeof(PROCESSOR_INFO_HEADER) + + while offset < size.value: + # Read header to get Size for advancing offset + header = PROCESSOR_INFO_HEADER.from_buffer_copy(buffer[offset : offset + header_size]) + + if header.Relationship == RelationCache: + # Read cache info starting after the header + cache = CACHE_RELATIONSHIP.from_buffer_copy(buffer[offset + header_size :]) + + if cache.Level == cache_level and ( + (cache_level == 1 and cache.Type == CacheData) + or (cache_level > 1 and cache.Type == CacheUnified) + ): + return cache.CacheSize + + offset += header.Size + + return None + + +def get_cache_info(cache_level: int) -> tuple: + if cache_level == 0: + cache_level = "1d" + + try: + result = subprocess.run(["lscpu", "--json"], capture_output=True, check=True, text=True) + except (FileNotFoundError, subprocess.CalledProcessError) as err: + raise ValueError("lscpu not found or error running lscpu") from err + lscpu_info = json.loads(result.stdout) + for entry in lscpu_info["lscpu"]: + if entry["field"] == f"L{cache_level} cache:": + size_str, instances_str = entry["data"].split(" (") + size, units = size_str.split() + size = int(size) + if units == "KiB": + size *= 2**10 + elif units == "MiB": + size *= 2**20 + elif units == "GiB": + size *= 2**30 + else: + raise ValueError("Unrecognized unit when guessing cache units") + instances = int(instances_str.split()[0]) + return size, instances + + raise ValueError(f"L{cache_level} cache not found in lscpu output") + + +def linux_cache_size(cache_level: int) -> int | None: + """Get the data cache_level size in bytes for Linux. + + Returns None if the cache size cannot be determined. + """ + try: + # Try to read the cache size from sysfs + with open(f"/sys/devices/system/cpu/cpu0/cache/index{cache_level}/size") as f: + size = f.read() + if size.endswith("K\n"): + return int(size[:-2]) * 2**10 + elif size.endswith("M\n"): + return int(size[:-2]) * 2**20 + elif size.endswith("G\n"): + return int(size[:-2]) * 2**30 + except FileNotFoundError: + # Try with lscpu, if available. + try: + cache_size, cache_instances = get_cache_info(cache_level) + # cache_instances typically refers to the number of sockets, CCXs or cores, + # depending on the CPU and cache level. + # In general, dividing the cache size by the number of instances would bring + # best performance for private caches (L1 and L2). For shared caches (L3), + # this should be the case as well, but more experimentation is needed. + return cache_size // cache_instances + except (FileNotFoundError, ValueError): + pass + return None + + +def _available_cpus() -> int: + try: + # On Linux, this returns the number of CPUs available to the process, + # which may be less than os.cpu_count() due to CPU affinity settings. + return len(os.sched_getaffinity(0)) + except AttributeError: + # os.sched_getaffinity is not available on all platforms + return os.cpu_count() or 1 + + +def _update_cache_sizes( + cpu_info: dict, cache_size_func: Callable[[int], int | None], levels: tuple[int, int, int] +) -> None: + """Update cpu_info with cache sizes from the given function. + + Args: + cpu_info: Dictionary to update with cache sizes. + cache_size_func: Function that takes a cache level and returns size or None. + levels: Tuple of (l1_level, l2_level, l3_level) to pass to cache_size_func. + """ + l1_level, l2_level, l3_level = levels + if (l1_data_cache_size := cache_size_func(l1_level)) is not None: + cpu_info["l1_data_cache_size"] = l1_data_cache_size + if (l2_cache_size := cache_size_func(l2_level)) is not None: + cpu_info["l2_cache_size"] = l2_cache_size + if (l3_cache_size := cache_size_func(l3_level)) is not None: + cpu_info["l3_cache_size"] = l3_cache_size + + +@lru_cache(maxsize=1) +def get_cpu_info(): + """ + Construct the result of cpuinfo.get_cpu_info(), without actually using + cpuinfo.get_cpu_info() since that function takes 1s to run and this method is ran + at import time. + """ + cpu_info = { + "count": _available_cpus(), + "l1_data_cache_size": 32 * 1024, + "l2_cache_size": 256 * 1024, + "l3_cache_size": 1024 * 1024, + } + + if blosc2.IS_WASM: + # Emscripten/wasm32 does not have access to CPU information. + # Return defaults. + return cpu_info + + if platform.system() == "Darwin": + _update_cache_sizes(cpu_info, apple_silicon_cache_size, (1, 2, 3)) + elif platform.system() == "Linux": + # Cache level 0 is typically the L1 data cache, and level 1 is the L1 instruction cache + _update_cache_sizes(cpu_info, linux_cache_size, (0, 2, 3)) + elif platform.system() == "Windows": + _update_cache_sizes(cpu_info, windows_cache_size, (1, 2, 3)) + + return cpu_info + + +def get_blocksize() -> int: + """Get the internal blocksize to be used during compression. + + Returns + ------- + out: int + The size in bytes of the internal block size. + """ + return blosc2_ext.get_blocksize() + + +def get_cbuffer_sizes(src: object) -> tuple[(int, int, int)]: + """ + Get the sizes of a compressed `src` buffer. + + Parameters + ---------- + src: bytes-like object + A compressed buffer. Must be a bytes-like object + that supports the Python Buffer Protocol, such as bytes, + bytearray, memoryview, or numpy.ndarray. + + Returns + ------- + (nbytes, cbytes, blocksize): tuple + A tuple containing the number of bytes (`nbytes`), the compressed size in bytes + (`cbytes`) and the block size in bytes (`blocksize`) of the + `src` compressed buffer. + """ + return blosc2_ext.cbuffer_sizes(src) + + +# Compute a decent value for chunksize based on L3 and/or heuristics +def get_chunksize(blocksize, l3_minimum=4 * 2**20, l3_maximum=2**26, reduc_factor=4): + # Find a decent default when L3 cannot be detected by cpuinfo. + # `reduc_factor` means that the chunk will be divided by this factor + # 4 stems for 3 operands + 1 result, but some functions (e.g., linalg ones) may + # decide to use another one (e.g., 1 for matmul has proved to be better). + # Most of this is based mainly on heuristics and experimentation. + chunksize = blocksize + if blocksize * 32 < l3_maximum: + chunksize = blocksize * 32 + + # Refine with L2/L3 measurements (not always possible) + cpu_info = blosc2.cpu_info + if "l3_cache_size" in cpu_info: + l3_cache_size = cpu_info["l3_cache_size"] + # cpuinfo sometimes returns cache sizes as strings (like, + # "4096 KB"), so refuse the temptation to guess and use the + # value only when it is an actual int. + # Also, sometimes cpuinfo does not return a correct L3 size; + # so in general, enforcing L3 > L2 is a good sanity check. + if isinstance(l3_cache_size, int) and l3_cache_size > 0: + l2_cache_size = cpu_info.get("l2_cache_size", "Not found") + if isinstance(l2_cache_size, int) and l3_cache_size > l2_cache_size: + chunksize = l3_cache_size + # When computing expressions, it is convenient to keep chunks for all operands + # in L3 cache (reduc_factor will account for this). + chunksize //= reduc_factor + + # Chunksize should be at least the size of L2 + l2_cache_size = cpu_info.get("l2_cache_size", "Not found") + if isinstance(l2_cache_size, int) and l2_cache_size > chunksize: + # Apple Silicon has a large L2 cache, and memory bandwidth is high, + # so we can use a larger chunksize based on L2 cache size. + chunksize = l2_cache_size * 4 + + # Ensure a minimum size + if chunksize < l3_minimum: + chunksize = l3_minimum + + # In Blosc2, the chunksize cannot be larger than MAX_BUFFERSIZE + if chunksize > blosc2.MAX_BUFFERSIZE: + chunksize = blosc2.MAX_BUFFERSIZE + + # chunksize can never be larger than blocksize + if chunksize < blocksize: + chunksize = blocksize + + return chunksize + + +def nearest_divisor(a, b, strict=False): + """Find the divisor of `a` that is closest to `b`. + + Parameters + ---------- + a : int + The number for which to find divisors. + b : int + The reference value to compare divisors against. + strict : bool, optional + If True, always use the downward search algorithm. + + Returns + ------- + int + The divisor of `a` that is closest to `b`. + + Notes + ----- + There is a version of this function in the Cython extension module + that is *way* faster. + """ + if a > 100_000 or strict: + # When `a` is largish, or we require `b` strictly less than `a`, + # use a (faster) algorithm that only goes downwards. + # This is quite brute force, and tried to optimize this, but I have not found a faster way. + for i in range(b, 0, -1): + if a % i == 0: + return i + return 1 # Fallback to 1, which is always a divisor + + # When `a` is smallish, use a more general algorithm that can find forwards and backwards + # Get all divisors of `a`; use a generator to avoid creating a list + divisors = (i for i in range(1, a + 1) if a % i == 0) + # Find the divisor nearest to b + return min(divisors, key=lambda x: abs(x - b)) + + +# This could be a good alternative to nearest_divisor that deserves more testing +# Found at: https://gist.github.com/raphaelvallat/5d5af7205df720db53be4cc2ee7e7549 +def find_closest_divisor(n, m): + """Find the divisor of n closest to m""" + divisors = np.array([i for i in range(1, int(np.sqrt(n) + 1)) if n % i == 0]) + divisions = n // divisors + return divisions[np.argmin(np.abs(m - divisions))] + + +# Compute chunks and blocks partitions +def compute_partition(nitems, maxshape, minpart=None): + if 0 in maxshape: + raise ValueError("shapes with 0 dims are not supported") + if nitems == 0: + raise ValueError("zero-sized partitions are not supported") + + # Increase dims starting from the latest + max_items = nitems + if minpart is None: + minpart = [1] * len(maxshape) + partition = [1] * len(maxshape) + for i, (size, minsize) in enumerate(zip(reversed(maxshape), reversed(minpart), strict=True)): + if max_items <= 1: + break + rsize = max(size, minsize) + if rsize <= max_items: + # rsize = rsize if size % rsize == 0 else nearest_divisor(size, rsize) + rsize = rsize if size % rsize == 0 else blosc2_ext.nearest_divisor(size, rsize) + else: + rsize = max(max_items, minsize) + # new_rsize = rsize if size % rsize == 0 else nearest_divisor(size, rsize, strict=True) + new_rsize = rsize if size % rsize == 0 else blosc2_ext.nearest_divisor(size, rsize, strict=True) + # If the new rsize is not too far from the original rsize, use it + if rsize // 2 < new_rsize < rsize * 2: + rsize = new_rsize + partition[-(i + 1)] = rsize + max_items //= rsize + + return partition + + +def compute_chunks_blocks( # noqa: C901 + shape: tuple | list, + chunks: tuple | list | None = None, + blocks: tuple | list | None = None, + dtype: np.dtype = np.uint8, + **kwargs: dict, +) -> tuple: + """ + Compute educated guesses for chunks and blocks of a :ref:`NDArray`. + + Parameters + ---------- + shape: tuple or list + The shape of the array. + chunks: tuple or list + The shape of the chunk. If None, a guess is computed based on cache sizes + and heuristics. + blocks: tuple or list + The shape of the block. If None, a guess is computed based on cache sizes + and heuristics. + dtype: np.dtype + The dtype of the array. Default is np.uint8. + kwargs: dict + Other keyword arguments supported by the + :obj:`SChunk.__init__ ` constructor. + + Returns + ------- + tuple + A (chunks, blocks) tuple containing the computed chunk and block sizes. + """ + + # Return an arbitrary value for chunks and blocks when shape has any 0 dim + if 0 in shape and chunks is None and blocks is None: + return shape, shape + + if blocks: + if not isinstance(blocks, tuple | list): + blocks = [blocks] + if len(blocks) != len(shape): + raise ValueError("blocks should have the same length than shape") + for block, dim in zip(blocks, shape, strict=True): + if block == 0 and dim != 0: + raise ValueError("blocks cannot contain 0 dimension if shape is not zero") + if chunks: + if not isinstance(chunks, tuple | list): + chunks = [chunks] + if len(chunks) != len(shape): + raise ValueError("chunks should have the same length than shape") + for chunk, dim in zip(chunks, shape, strict=True): + if chunk == 0 and dim != 0: + raise ValueError("chunks cannot contain 0 dimension if shape is not zero") + + if chunks is not None and blocks is not None: + for block, chunk in zip(blocks, chunks, strict=True): + if block > chunk: + raise ValueError("blocks cannot be greater than chunks") + return chunks, blocks + + cparams = kwargs.get("cparams") or blosc2.CParams() # just get defaults + if isinstance(cparams, blosc2.CParams): + cparams = asdict(cparams) + # Typesize in dtype always has preference over typesize in cparams + itemsize = cparams["typesize"] = np.dtype(dtype).itemsize + + if blocks is None: + # Get the default blocksize for the compression params + # Check if we need STUNE for lossy codecs/filters that have specific blocksize requirements + codec = cparams.get("codec") + filters = cparams.get("filters", None) + needs_stune = codec in ( + blosc2.Codec.ZFP_RATE, + blosc2.Codec.ZFP_PREC, + blosc2.Codec.ZFP_ACC, + blosc2.Codec.NDLZ, + ) or (filters and any(f in (blosc2.Filter.NDMEAN, blosc2.Filter.NDCELL) for f in filters)) + + if needs_stune: + # Lossy codecs need proper blocksize calculation via STUNE + # Using an 8 MB buffer should be enough for detecting the whole range of blocksizes + nitems = 2**23 // itemsize + # compress2 is used just to provide a hint on the blocksize + # However, it does not work well with filters that are not shuffle or bitshuffle, + # so let's get rid of them + if filters: + cparams2 = copy.deepcopy(cparams) + for i, filter in enumerate(filters): + if filter not in (blosc2.Filter.SHUFFLE, blosc2.Filter.BITSHUFFLE): + cparams2["filters"][i] = blosc2.Filter.NOFILTER + else: + cparams2 = cparams + # Force STUNE to get a hint on the blocksize + aux_tuner = cparams2.get("tuner", blosc2.Tuner.STUNE) + cparams2["tuner"] = blosc2.Tuner.STUNE + src = blosc2.compress2(np.zeros(nitems, dtype=f"V{itemsize}"), **cparams2) + _, _, blocksize = blosc2.get_cbuffer_sizes(src) + cparams2["tuner"] = aux_tuner + else: + # We disable internal STUNE path for regular codecs as it is a bit costly, specially for small arrays. + # The heuristic below should be good enough in general. + blocksize = 32 * 1024 + # Minimum blocksize calculation + min_blocksize = blocksize + if platform.machine() == "x86_64": + # For modern Intel/AMD archs, experiments say to split the cache among the operands + min_blocksize = blosc2.cpu_info["l2_cache_size"] // 4 + if blosc2.cpu_info["l2_cache_size"] >= 2**21: + # Incidentally, some modern Intel CPUs have a larger L2 cache (2 MB) and they + # prefer smaller blocks. This is somewhat heuristic, but it seems to work well. + min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 4 + # New experiments say that using the 4x of the L1 size is even better + # But let's avoid this because it does not work well for AMD archs + # min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 4 + elif platform.system() == "Darwin" and "arm" in platform.machine(): + # For Apple Silicon, experiments say we can use 4x the L1 size + # min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 4 + # However, let's adjust for several operands in cache, so let's use just L1 + min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 1 + elif "l1_data_cache_size" in blosc2.cpu_info and isinstance( + blosc2.cpu_info["l1_data_cache_size"], int + ): + # For other archs, we don't have hints; be conservative and use 1x the L1 size + min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 1 + + if blocksize < min_blocksize: + blocksize = min_blocksize + + # Fix for #364 + if blocksize < itemsize: + blocksize = itemsize + else: + blocksize = math.prod(blocks) * itemsize + + # Check limits for blocksize + if blocksize > blosc2.MAX_BLOCKSIZE: + raise ValueError("blocksize is too large: it cannot exceed MAX_BLOCKSIZE (~512MB)") + + # Now that a sensible blocksize has been computed, let's compute the blocks + if chunks is None: + maxshape = shape + else: + maxshape = chunks + blocks = compute_partition(blocksize // itemsize, maxshape) + + # Finally, the chunks + if chunks is None: + blocksize = math.prod(blocks) * itemsize + reduc_factor = kwargs.get("_chunksize_reduc_factor", 4) + chunksize = get_chunksize(blocksize, reduc_factor=reduc_factor) + # Make chunksize to be a multiple of the blocksize. This allows for: + # 1. Avoid unnecessary padding in chunks + # 2. Avoid exceeding the maximum buffer size (see #392) + if chunksize % blocksize != 0: + chunksize = chunksize // blocksize * blocksize + chunks = compute_partition(chunksize // itemsize, shape, blocks) + + return tuple(chunks), tuple(blocks) + + +def compress2(src: object, **kwargs: dict) -> str | bytes: + """Compress the given :paramref:`src` buffer with the specified + compression parameters. + + Parameters + ---------- + src: bytes-like object + The buffer to compress. Must support the buffer interface. + + kwargs: dict, optional + Compression parameters. The default values are in :class:`blosc2.CParams`. + Supported keyword arguments: + + cparams: :class:`blosc2.CParams` or dict + All the compression parameters to use, provided as + a :class:`blosc2.CParams` instance or dictionary. + others: Any + If `cparams` is not provided, all the parameters of a :class:`blosc2.CParams` + can be passed as keyword arguments. + + Returns + ------- + out: str or bytes + The compressed data as a Python str or bytes object. + + Raises + ------ + RuntimeError + If the data cannot be compressed into `dst`. + If an internal error occurs, likely due to an + invalid parameter. + + Notes + ----- + This function only can deal with data < 2 GB. If you want to compress + larger buffers, you should use the :class:`~blosc2.SChunk` class or, if you want to save + large arrays/tensors, the :func:`~blosc2.pack_tensor` function can be handier. + + Examples + -------- + >>> import numpy as np + >>> data = np.arange(1e6, dtype=np.float32) + >>> cparams = blosc2.CParams() + >>> compressed_data = blosc2.compress2(data, cparams=cparams) + >>> print(f"Compressed data length: {len(compressed_data)} bytes") + Compressed data length: 14129 bytes + + See also + -------- + :func:`~blosc2.decompress2` + :func:`~blosc2.pack_tensor` + :class:`~blosc2.SChunk` + """ + if kwargs is not None and "cparams" in kwargs: + if len(kwargs) > 1: + raise AttributeError("Cannot pass both cparams and other kwargs already included in CParams") + if isinstance(kwargs.get("cparams"), blosc2.CParams): + kwargs = asdict(kwargs.get("cparams")) + else: + kwargs = kwargs.get("cparams") + if kwargs is None: + kwargs = {} + if blosc2.IS_WASM and kwargs.get("nthreads", 1) != 1: + kwargs = kwargs.copy() + kwargs["nthreads"] = 1 + + return blosc2_ext.compress2(src, **kwargs) + + +def decompress2(src: object, dst: object | bytearray = None, **kwargs: dict) -> str | bytes: + """Decompress the given :paramref:`src` buffer with the specified decompression params. + + Parameters + ---------- + src: bytes-like object + The data to be decompressed. Must support the buffer interface, such as bytes, + bytearray, memoryview, or numpy.ndarray. + dst: NumPy object or bytearray, optional + The destination NumPy object or bytearray to fill. The length + must be greater than 0. The user must ensure + it has enough capacity for the decompressed + data. Default is `None`, meaning a new bytes object + is created, filled and returned. + + kwargs: dict, optional + Decompression parameters. The default values are in :class:`blosc2.DParams`. + Supported keyword arguments: + + dparams: :class:`blosc2.DParams` or dict + All the decompression parameters to use, provided as + a :class:`blosc2.DParams` instance or dict. + others: Any + If `dparams` is not provided, all the parameters of a :class:`blosc2.DParams` + can be passed as keyword arguments. + + Returns + ------- + out: str or bytes + The decompressed data as a Python str or bytes object if + :paramref:`dst` is `None`. Otherwise, it will return `None` because the result + will already be in :paramref:`dst`. + + Raises + ------ + RuntimeError + If the data cannot be compressed into :paramref:`dst`. + If an internal error occurs, likely due to an invalid parameter + If :paramref:`dst` is `None` and a bytes object could not be created to store the result. + TypeError + If :paramref:`src` does not support the Buffer Protocol. + ValueError + If the length of :paramref:`src` is smaller than the minimum. + If :paramref:`dst` is not None and its length is 0. + """ + if kwargs is not None and "dparams" in kwargs: + if len(kwargs) > 1: + raise AttributeError("Cannot pass both dparams and other kwargs already included in DParams") + if isinstance(kwargs.get("dparams"), blosc2.DParams): + kwargs = asdict(kwargs.get("dparams")) + else: + kwargs = kwargs.get("dparams") + if kwargs is None: + kwargs = {} + if blosc2.IS_WASM and kwargs.get("nthreads", 1) != 1: + kwargs = kwargs.copy() + kwargs["nthreads"] = 1 + + return blosc2_ext.decompress2(src, dst, **kwargs) + + +# Directory utilities +def remove_urlpath(path: str) -> None: + """Permanently remove the file or the directory specified by :paramref:`path`. + This function is used during the tests of a persistent SChunk to remove it. + + Parameters + ---------- + path: str + The path of the directory or file. + + Returns + ------- + out: None + """ + if path is not None: + if isinstance(path, pathlib.PurePath): + path = str(path) + path = path.encode("utf-8") if isinstance(path, str) else path + blosc2_ext.remove_urlpath(path) + + +def schunk_from_cframe(cframe: bytes | str, copy: bool = False) -> blosc2.SChunk: + """Create a :ref:`SChunk ` instance from a contiguous frame buffer. + + Parameters + ---------- + cframe: bytes or str + The bytes object containing the in-memory cframe. + copy: bool + Whether to internally make a copy. If `False`, + the user is responsible for keeping a reference to `cframe`. + Default is `False`. + + Returns + ------- + out: :ref:`SChunk ` + A new :ref:`SChunk ` containing the data passed. + + See Also + -------- + :func:`~blosc2.schunk.SChunk.to_cframe` + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> nchunks = 4 + >>> chunk_size = 200 * 1000 * 4 + >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) + >>> serialized_schunk = schunk.to_cframe() + >>> print(f"Serialized SChunk length: {len(serialized_schunk)} bytes") + Serialized SChunk length: 14129 bytes + >>> deserialized_schunk = blosc2.schunk_from_cframe(serialized_schunk) + >>> start = 1000 + >>> stop = 1005 + >>> sl_bytes = deserialized_schunk[start:stop] + >>> sl = np.frombuffer(sl_bytes, dtype=np.int32) + >>> print("Slice from deserialized SChunk:", sl) + Slice from deserialized SChunk: [1000 1001 1002 1003 1004] + >>> expected_slice = data[start:stop] + >>> print("Expected slice:", expected_slice) + Expected slice: [1000 1001 1002 1003 1004] + """ + return blosc2_ext.schunk_from_cframe(cframe, copy) + + +def ndarray_from_cframe(cframe: bytes | str, copy: bool = False) -> blosc2.NDArray: + """Create a :ref:`NDArray ` instance from a contiguous frame buffer. + + Parameters + ---------- + cframe: bytes or str + The bytes object containing the in-memory cframe. + copy: bool + Whether to internally make a copy. If `False`, + the user is responsible for keeping a reference to `cframe`. + Default is `False`. + + Returns + ------- + out: :ref:`NDArray ` + A new :ref:`NDArray ` containing the data passed. + + See Also + -------- + :func:`~blosc2.NDArray.to_cframe` + """ + return blosc2_ext.ndarray_from_cframe(cframe, copy) + + +def from_cframe( + cframe: bytes | str, copy: bool = True +) -> blosc2.EmbedStore | blosc2.NDArray | blosc2.SChunk: + """Create a :ref:`EmbedStore `, :ref:`NDArray ` or :ref:`SChunk ` instance + from a contiguous frame buffer. + + Parameters + ---------- + cframe: bytes or str + The bytes object containing the in-memory cframe. + copy: bool + Whether to internally make a copy. If `False`, + the user is responsible for keeping a reference to `cframe`. + Default is `True`, which is safer. If you need to save + time/memory, you can set it to `False`, but then you must + ensure that the `cframe` is not garbage collected while the + returned object is still in use. + + Returns + ------- + out: :ref:`EmbedStore `, :ref:`NDArray ` or :ref:`SChunk ` + A new instance of the appropriate type containing the data passed. + + See Also + -------- + :func:`~blosc2.EmbedStore.from_cframe` + :func:`~blosc2.NDArray.from_cframe` + :func:`~blosc2.schunk.SChunk.from_cframe` + """ + # Retrieve the SChunk; not doing a copy is cheap + schunk = schunk_from_cframe(cframe, copy=False) + # Check the metalayer to determine the type + if "b2embed" in schunk.meta: + return blosc2.estore_from_cframe(cframe, copy=copy) + if "b2nd" in schunk.meta: + return ndarray_from_cframe(cframe, copy=copy) + return schunk_from_cframe(cframe, copy=copy) + + +def register_codec( + codec_name: str, + id: int, + encoder: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], int] | None = None, + decoder: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], int] | None = None, + version: int = 1, +) -> None: + """Register a user defined codec. + + Parameters + ---------- + codec_name: str + Name of the codec. + id: int + Codec id, which must be between 160 and 255 (inclusive). + encoder: Python function or None + A Python function that receives an input to compress as a ndarray of dtype uint8, + an output to fill the compressed buffer in as a ndarray of dtype uint8, the codec meta + and the `SChunk` instance. It must return the size of the compressed buffer in bytes. + If None, the codec name indicates a dynamic plugin that must be installed. + decoder: Python function or None + A Python function that receives an input to decompress as a ndarray of dtype uint8, + an output to fill the decompressed buffer in as a ndarray of dtype uint8, the codec meta + and the `SChunk` instance. It must return the size of the decompressed buffer in bytes. + If None, then the codec name indicates a dynamic plugin which must be installed. + version: int + The codec version. Default is 1. + + Returns + ------- + out: None + + Notes + ----- + * Cannot use multi-threading when using a user-defined codec. + + * User-defined codecs can only be used inside an `SChunk` instance. + + * Both encoder and decoder functions must be given (for a Python codec), or none (for + a dynamic plugin). + + See Also + -------- + :func:`register_filter` + + Examples + -------- + .. code-block:: python + + # Define encoder and decoder functions + def encoder(input, output, meta, schunk): + # Check whether the data is an arange + step = int(input[1] - input[0]) + res = input[1:] - input[:-1] + if np.min(res) == np.max(res): + output[0:4] = input[0:4] # start + n = step.to_bytes(4, sys.byteorder) + output[4:8] = [n[i] for i in range(4)] + return 8 + else: + # Not compressible, tell Blosc2 to do a memcpy + return 0 + + + def decoder1(input, output, meta, schunk): + # For decoding we only have to worry about the arange case + # (other cases are handled by Blosc2) + output[:] = [input[0] + i * input[1] for i in range(output.size)] + + return output.size + + + # Register codec + codec_name = "codec1" + id = 180 + blosc2.register_codec(codec_name, id, encoder, decoder) + """ + if id in blosc2.ucodecs_registry: + raise ValueError("Id already in use") + blosc2_ext.register_codec(codec_name, id, encoder, decoder, version) + + +def register_filter( + id: int, + forward: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], None] | None = None, + backward: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], None] | None = None, + name: str | None = None, +) -> None: + """Register a user-defined filter. + + Parameters + ---------- + id: int + Filter id, must be between 160 and 255 (inclusive). + forward: Python function + Function to apply the filter. Receives an input ndarray of dtype uint8, an output ndarray + of dtype uint8, the filter meta and the corresponding `SChunk` instance. + If None, the filter name indicates a dynamic plugin which must be installed. + backward: Python function + Function to reverse the filter. Receives an input ndarray of dtype uint8, an output ndarray + of dtype uint8, the filter meta and the `SChunk` instance. + If None then the filter name indicates a dynamic plugin which must be installed. + name: str + The filter name. + If both `forward`and `backward` are None, this parameter must be passed to correctly + load the dynamic filter. + Returns + ------- + out: None + + Notes + ----- + * Multi-threading cannot be used with a user-defined filter. + + * User-defined filters can only be used inside an `SChunk` instance. + + See Also + -------- + :func:`register_codec` + + Examples + -------- + .. code-block:: python + + # Define forward and backward functions + def forward(input, output, meta, schunk): + nd_input = input.view(dtype) + nd_output = output.view(dtype) + + nd_output[:] = nd_input + 1 + + + def backward(input, output, meta, schunk): + nd_input = input.view(dtype) + nd_output = output.view(dtype) + + nd_output[:] = nd_input - 1 + + + # Register filter + id = 160 + blosc2.register_filter(id, forward, backward) + """ + if id in blosc2.ufilters_registry: + raise ValueError("Id already in use") + blosc2_ext.register_filter(id, forward, backward, name) diff --git a/venv/Lib/site-packages/blosc2/dict_store.py b/venv/Lib/site-packages/blosc2/dict_store.py new file mode 100644 index 0000000..9ac3cf4 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/dict_store.py @@ -0,0 +1,547 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +import os +import shutil +import tempfile +import zipfile +from collections.abc import Iterator, Set +from typing import Any + +import numpy as np + +import blosc2 +from blosc2.c2array import C2Array +from blosc2.embed_store import EmbedStore +from blosc2.schunk import SChunk + + +class DictStore: + """ + Directory-based storage for compressed data using Blosc2. + + Manages arrays in a directory (.b2d) or zip (.b2z) format. + + Supports the following types: + + - blosc2.NDArray: n-dimensional arrays. When persisted externally they + are stored as .b2nd files. + - blosc2.SChunk: super-chunks. When persisted externally they are stored + as .b2f files. + - blosc2.C2Array: columnar containers. These are always kept inside the + embedded store (never externalized). + - numpy.ndarray: converted to blosc2.NDArray on assignment. + + Parameters + ---------- + localpath : str + Local path for the directory (".b2d") or file (".b2z"); other extensions + are not supported. If a directory is specified, it will be treated as + a Blosc2 directory format (B2DIR). If a file is specified, it + will be treated as a Blosc2 zip format (B2ZIP). + mode : str, optional + File mode ('r', 'w', 'a'). Default is 'a'. + mmap_mode : str or None, optional + Memory mapping mode for read access. For now, only ``"r"`` is supported, + and only when ``mode="r"``. Default is None. + tmpdir : str or None, optional + Temporary directory to use when working with ".b2z" files. If None, + a system temporary directory will be managed. Default is None. + cparams : dict or None, optional + Compression parameters for the internal embed store. + If None, the default Blosc2 parameters are used. + dparams : dict or None, optional + Decompression parameters for the internal embed store. + If None, the default Blosc2 parameters are used. + storage : blosc2.Storage or None, optional + Storage properties for the internal embed store. + If None, the default Blosc2 storage properties are used. + threshold : int or None, optional + Threshold (in bytes of uncompressed data) under which values are kept + in the embedded store. If None, in-memory arrays are stored in the + embedded store and on-disk arrays are stored as separate files. + C2Array objects will always be stored in the embedded store, + regardless of their size. + + Examples + -------- + >>> dstore = DictStore(localpath="my_dstore.b2z", mode="w") + >>> dstore["/node1"] = np.array([1, 2, 3]) # goes to embed store + >>> dstore["/node2"] = blosc2.ones(2) # goes to embed store + >>> arr_external = blosc2.arange(3, urlpath="ext_node3.b2nd", mode="w") + >>> dstore["/dir1/node3"] = arr_external # external file in dir1 (.b2nd) + >>> schunk = blosc2.SChunk(chunksize=32) + >>> schunk.append_data(b"abcd") + 4 + >>> dstore["/dir1/schunk1"] = schunk # externalized as .b2f if above threshold + >>> dstore.to_b2z() # persist to the zip file; external files are copied in + >>> print(sorted(dstore.keys())) + ['/dir1/node3', '/dir1/schunk1', '/node1', '/node2'] + >>> print(dstore["/node1"][:])) + array([1, 2, 3]) + + Notes + ----- + - External persistence uses the following file extensions: + .b2nd for NDArray and .b2f for SChunk. + """ + + def __init__( + self, + localpath: os.PathLike[Any] | str | bytes, + mode: str = "a", + tmpdir: str | None = None, + cparams: blosc2.CParams | None = None, + dparams: blosc2.DParams | None = None, + storage: blosc2.Storage | None = None, + threshold: int | None = 2**13, + *, + mmap_mode: str | None = None, + _storage_meta: dict | None = None, + ): + """ + See :class:`DictStore` for full documentation of parameters. + """ + self.localpath = localpath if isinstance(localpath, (str, bytes)) else str(localpath) + if not self.localpath.endswith((".b2z", ".b2d")): + raise ValueError(f"localpath must have a .b2z or .b2d extension; you passed: {self.localpath}") + if mode not in ("r", "w", "a"): + raise ValueError("For DictStore containers, mode must be 'r', 'w', or 'a'") + if mmap_mode not in (None, "r"): + raise ValueError("For DictStore containers, mmap_mode must be None or 'r'") + if mmap_mode == "r" and mode != "r": + raise ValueError("For DictStore containers, mmap_mode='r' requires mode='r'") + + self.mode = mode + self.mmap_mode = mmap_mode + self.threshold = threshold + self.cparams = cparams or blosc2.CParams() + self.dparams = dparams or blosc2.DParams() + self.storage = storage or blosc2.Storage() + + if _storage_meta: + self.storage.meta = _storage_meta + else: + # Mark this storage as a b2dict object + self.storage.meta = {"b2dict": {"version": 1}} + + self.offsets = {} + self.map_tree = {} + self._temp_dir_obj = None + + self._setup_paths_and_dirs(tmpdir) + + if self.mode == "r": + self._init_read_mode(self.dparams) + else: + self._init_write_append_mode(self.cparams, self.dparams, storage) + + def _setup_paths_and_dirs(self, tmpdir: str | None): + """Set up working directories and paths.""" + self.is_zip_store = self.localpath.endswith(".b2z") + if self.is_zip_store: + if tmpdir is None: + self._temp_dir_obj = tempfile.TemporaryDirectory() + self.working_dir = self._temp_dir_obj.name + else: + self.working_dir = tmpdir + os.makedirs(tmpdir, exist_ok=True) + self.b2z_path = self.localpath + else: # .b2d + self.working_dir = self.localpath + if self.mode in ("w", "a"): + os.makedirs(self.working_dir, exist_ok=True) + self.b2z_path = self.localpath[:-4] + ".b2z" + + self.estore_path = os.path.join(self.working_dir, "embed.b2e") + + def _init_read_mode(self, dparams: blosc2.DParams | None = None): + """Initialize store in read mode.""" + if not os.path.exists(self.localpath): + raise FileNotFoundError(f"dir/zip file {self.localpath} does not exist.") + + if self.is_zip_store: + self.offsets = self._get_zip_offsets() + if "embed.b2e" not in self.offsets: + raise FileNotFoundError("Embed file embed.b2e not found in store.") + estore_offset = self.offsets["embed.b2e"]["offset"] + schunk = blosc2.blosc2_ext.open( + self.b2z_path, + mode="r", + offset=estore_offset, + mmap_mode=self.mmap_mode, + dparams=dparams, + ) + for filepath in self.offsets: + if filepath.endswith((".b2nd", ".b2f")): + key = "/" + filepath[: -5 if filepath.endswith(".b2nd") else -4] + self.map_tree[key] = filepath + else: # .b2d + if not os.path.isdir(self.localpath): + raise FileNotFoundError(f"Directory {self.localpath} does not exist for reading.") + schunk = blosc2.blosc2_ext.open( + self.estore_path, + mode="r", + offset=0, + mmap_mode=self.mmap_mode, + dparams=dparams, + ) + self._update_map_tree() + + self._estore = EmbedStore(_from_schunk=schunk) + self.storage.meta = self._estore.storage.meta + + def _init_write_append_mode( + self, + cparams: blosc2.CParams | None, + dparams: blosc2.DParams | None, + storage: blosc2.Storage | None, + ): + """Initialize store in write/append mode.""" + if self.mode == "a" and os.path.exists(self.localpath): + if self.is_zip_store: + with zipfile.ZipFile(self.localpath, "r") as zf: + zf.extractall(self.working_dir) + elif not os.path.isdir(self.working_dir): + raise FileNotFoundError(f"Directory {self.working_dir} does not exist for reading.") + + self._estore = EmbedStore( + urlpath=self.estore_path, + mode=self.mode, + cparams=cparams, + dparams=dparams, + storage=storage, + meta=self.storage.meta, + ) + self._update_map_tree() + + def _update_map_tree(self): + # Build map_tree from .b2nd and .b2f files in working dir + for root, _, files in os.walk(self.working_dir): + for file in files: + filepath = os.path.join(root, file) + if filepath.endswith((".b2nd", ".b2f")): + # Convert filename to key: remove extension and ensure starts with / + rel_path = os.path.relpath(filepath, self.working_dir) + # Normalize path separators to forward slashes for cross-platform consistency + rel_path = rel_path.replace(os.sep, "/") + if rel_path.endswith(".b2nd"): + key = rel_path[:-5] + elif rel_path.endswith(".b2f"): + key = rel_path[:-4] + else: + continue + if not key.startswith("/"): + key = "/" + key + self.map_tree[key] = rel_path + + @property + def estore(self) -> EmbedStore: + """Access the underlying EmbedStore.""" + return self._estore + + def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None: + """Add a node to the DictStore.""" + if isinstance(value, np.ndarray): + value = blosc2.asarray(value, cparams=self.cparams, dparams=self.dparams) + # C2Array should always go to embed store; let estore handle it directly + if isinstance(value, C2Array): + self._estore[key] = value + return + exceeds_threshold = self.threshold is not None and value.nbytes >= self.threshold + # Consider both NDArray and SChunk external files (have urlpath) + external_file = isinstance(value, (blosc2.NDArray, SChunk)) and getattr(value, "urlpath", None) + if exceeds_threshold or (external_file and self.threshold is None): + # Choose extension based on type + ext = ".b2f" if isinstance(value, SChunk) else ".b2nd" + # Convert key to a proper file path within the tree directory + rel_key = key.lstrip("/") + dest_path = os.path.join(self.working_dir, rel_key + ext) + + # Ensure the parent directory exists + parent_dir = os.path.dirname(dest_path) + if parent_dir and not os.path.exists(parent_dir): + os.makedirs(parent_dir, exist_ok=True) + + # Save the value to the destination path + if not external_file: + if hasattr(value, "save"): + value.save(urlpath=dest_path) + else: + # An SChunk does not have a save() method + with open(dest_path, "wb") as f: + f.write(value.to_cframe()) + else: + # This should be faster than using value.save() ? + shutil.copy2(value.urlpath, dest_path) + + # Store relative path from tree directory + rel_path = os.path.relpath(dest_path, self.working_dir) + # Normalize to forward slashes + rel_path = rel_path.replace(os.sep, "/") + self.map_tree[key] = rel_path + else: + if external_file: + # Embed a copy by using cframe + value = blosc2.from_cframe(value.to_cframe()) + self._estore[key] = value + + def __getitem__(self, key: str) -> blosc2.NDArray | SChunk | C2Array: + """Retrieve a node from the DictStore.""" + # Check map_tree first + if key in self.map_tree: + filepath = self.map_tree[key] + if filepath in self.offsets: + offset = self.offsets[filepath]["offset"] + return blosc2.blosc2_ext.open( + self.b2z_path, + mode="r", + offset=offset, + mmap_mode=self.mmap_mode, + dparams=self.dparams, + ) + else: + urlpath = os.path.join(self.working_dir, filepath) + if os.path.exists(urlpath): + return blosc2.open( + urlpath, + mode="r" if self.mode == "r" else "a", + mmap_mode=self.mmap_mode if self.mode == "r" else None, + dparams=self.dparams, + ) + else: + raise KeyError(f"File for key '{key}' not found in offsets or temporary directory.") + + # Fall back to EmbedStore + return self._estore[key] + + def get(self, key: str, default: Any = None) -> blosc2.NDArray | SChunk | C2Array | Any: + """Retrieve a node, or default if not found.""" + try: + return self[key] + except KeyError: + return default + + def __delitem__(self, key: str) -> None: + """Remove a node from the DictStore.""" + if key in self.map_tree: + # Remove from map_tree and delete the external file + filepath = self.map_tree[key] + del self.map_tree[key] + + # Delete the physical file if it exists + full_path = os.path.join(self.working_dir, filepath) + if os.path.exists(full_path): + os.remove(full_path) + elif key in self._estore: + del self._estore[key] + else: + raise KeyError(f"Key '{key}' not found") + + def __contains__(self, key: str) -> bool: + """Check if a key exists.""" + return key in self.map_tree or key in self._estore + + def __len__(self) -> int: + """Return number of nodes.""" + return len(self.map_tree) + len(self._estore) + + def __iter__(self) -> Iterator[str]: + """Iterate over keys.""" + yield from self.map_tree.keys() + for key in self._estore: + if key not in self.map_tree: + yield key + + def keys(self) -> Set[str]: + """Return all keys.""" + return self.map_tree.keys() | self._estore.keys() + + def values(self) -> Iterator[blosc2.NDArray | SChunk | C2Array]: + """Iterate over all values.""" + # Get all unique keys from both map_tree and _estore, with map_tree taking precedence + all_keys = set(self.map_tree.keys()) | set(self._estore.keys()) + + for key in all_keys: + if key in self.map_tree: + filepath = self.map_tree[key] + if self.is_zip_store: + if filepath in self.offsets: + offset = self.offsets[filepath]["offset"] + yield blosc2.blosc2_ext.open( + self.b2z_path, + mode="r", + offset=offset, + mmap_mode=self.mmap_mode, + dparams=self.dparams, + ) + else: + urlpath = os.path.join(self.working_dir, filepath) + yield blosc2.open( + urlpath, + mode="r" if self.mode == "r" else "a", + mmap_mode=self.mmap_mode if self.mode == "r" else None, + dparams=self.dparams, + ) + elif key in self._estore: + yield self._estore[key] + + def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk | C2Array]]: + """Iterate over (key, value) pairs.""" + # Get all unique keys from both map_tree and _estore, with map_tree taking precedence + all_keys = set(self.map_tree.keys()) | set(self._estore.keys()) + + for key in all_keys: + # Check map_tree first, then fall back to _estore + if key in self.map_tree: + filepath = self.map_tree[key] + if self.is_zip_store: + if filepath in self.offsets: + offset = self.offsets[filepath]["offset"] + yield ( + key, + blosc2.blosc2_ext.open( + self.b2z_path, + mode="r", + offset=offset, + mmap_mode=self.mmap_mode, + dparams=self.dparams, + ), + ) + else: + urlpath = os.path.join(self.working_dir, filepath) + yield ( + key, + blosc2.open( + urlpath, + mode="r" if self.mode == "r" else "a", + mmap_mode=self.mmap_mode if self.mode == "r" else None, + dparams=self.dparams, + ), + ) + elif key in self._estore: + yield key, self._estore[key] + + def to_b2z(self, overwrite=False, filename=None) -> os.PathLike[Any] | str: + """ + Serialize zip store contents to the b2z file. + + Parameters + ---------- + overwrite : bool, optional + If True, overwrite the existing b2z file if it exists. Default is False. + filename : str, optional + If provided, use this filename instead of the default b2z file path. + + Returns + ------- + filename : str + The absolute path to the created b2z file. + """ + if self.mode == "r": + raise ValueError("Cannot call to_b2z() on a DictStore opened in read mode.") + + b2z_path = self.b2z_path if filename is None else filename + if not b2z_path.endswith(".b2z"): + raise ValueError("b2z_path must have a .b2z extension") + + if os.path.exists(b2z_path) and not overwrite: + raise FileExistsError(f"'{b2z_path}' already exists. Use overwrite=True to overwrite.") + + # Gather all files except estore_path + filepaths = [] + for root, _, files in os.walk(self.working_dir): + for file in files: + filepath = os.path.join(root, file) + if os.path.abspath(filepath) != os.path.abspath(self.estore_path): + filepaths.append(filepath) + + # Sort filepaths by file size from largest to smallest + filepaths.sort(key=os.path.getsize, reverse=True) + + with zipfile.ZipFile(self.b2z_path, "w", zipfile.ZIP_STORED) as zf: + # Write all files (except estore_path) first (sorted by size) + for filepath in filepaths: + arcname = os.path.relpath(filepath, self.working_dir) + zf.write(filepath, arcname) + # Write estore last + if os.path.exists(self.estore_path): + arcname = os.path.relpath(self.estore_path, self.working_dir) + zf.write(self.estore_path, arcname) + return os.path.abspath(self.b2z_path) + + def _get_zip_offsets(self) -> dict[str, dict[str, int]]: + """Get offset and length of all files in the zip archive.""" + self.offsets = {} # Reset offsets + with open(self.b2z_path, "rb") as f, zipfile.ZipFile(f) as zf: + for info in zf.infolist(): + # info.header_offset points to the local file header + # The actual file data starts after the header + f.seek(info.header_offset) + local_header = f.read(30) + filename_len = int.from_bytes(local_header[26:28], "little") + extra_len = int.from_bytes(local_header[28:30], "little") + data_offset = info.header_offset + 30 + filename_len + extra_len + self.offsets[info.filename] = {"offset": data_offset, "length": info.file_size} + return self.offsets + + def close(self) -> None: + """Persist changes and cleanup.""" + # Repack estore + # TODO: for some reason this is not working + # if self.mode != "r": + # cframe = self._estore.to_cframe() + # with open(self._estore.urlpath, "wb") as f: + # f.write(cframe) + + if self.is_zip_store and self.mode in ("w", "a"): + # Serialize to b2z file + self.to_b2z(overwrite=True) + + # Clean up temporary directory if we created it + if self._temp_dir_obj is not None: + self._temp_dir_obj.cleanup() + + def __enter__(self): + """Context manager enter.""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit.""" + self.close() + # No need to handle exceptions, just close the DictStore + return False + + +if __name__ == "__main__": + # Example usage + localpath = "example_dstore.b2z" + if True: + with DictStore(localpath, mode="w") as dstore: + dstore["/node1"] = np.array([1, 2, 3]) + dstore["/node2"] = blosc2.ones(2) + + # Make /node3 an external file + arr_external = blosc2.arange(3, urlpath="ext_node3.b2nd", mode="w") + dstore["/dir1/node3"] = arr_external + + print("DictStore keys:", list(dstore.keys())) + print("Node1 data:", dstore["/node1"][:]) + print("Node2 data:", dstore["/node2"][:]) + print("Node3 data (external):", dstore["/dir1/node3"][:]) + + del dstore["/node1"] + print("After deletion, keys:", list(dstore.keys())) + + # Open the stored zip file + with DictStore(localpath, mode="r") as dstore_opened: + print("Opened dstore keys:", list(dstore_opened.keys())) + for key, value in dstore_opened.items(): + if isinstance(value, blosc2.NDArray): + print( + f"Key: {key}, Shape: {value.shape}, Values: {value[:10] if len(value) > 3 else value[:]}" + ) diff --git a/venv/Lib/site-packages/blosc2/dsl_kernel.py b/venv/Lib/site-packages/blosc2/dsl_kernel.py new file mode 100644 index 0000000..b8b3515 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/dsl_kernel.py @@ -0,0 +1,1169 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from __future__ import annotations + +import ast +import contextlib +import inspect +import os +import textwrap +import tokenize +from io import StringIO +from typing import ClassVar + +_PRINT_DSL_KERNEL = os.environ.get("PRINT_DSL_KERNEL", "").strip().lower() +_PRINT_DSL_KERNEL = _PRINT_DSL_KERNEL not in ("", "0", "false", "no", "off") +_DSL_USAGE_DOC_URL = "https://github.com/Blosc/python-blosc2/blob/main/doc/getting_started/dsl_syntax.md" + + +class DSLSyntaxError(ValueError): + """Raised when a @dsl_kernel function uses unsupported DSL syntax.""" + + +def _normalize_miniexpr_scalar(value): + # NumPy scalar-like values expose .item(); plain Python scalars do not. + if hasattr(value, "item") and callable(value.item): + with contextlib.suppress(Exception): + value = value.item() + if isinstance(value, bool): + return int(value) + if isinstance(value, int | float | str): + return value + raise TypeError("Unsupported scalar type for miniexpr specialization") + + +def _line_starts(text: str) -> list[int]: + starts = [0] + for i, ch in enumerate(text): + if ch == "\n": + starts.append(i + 1) + return starts + + +def _to_abs(line_starts: list[int], line: int, col: int) -> int: + return line_starts[line - 1] + col + + +def _find_def_signature_span(text: str): + tokens = list(tokenize.generate_tokens(StringIO(text).readline)) + for i, tok in enumerate(tokens): + if tok.type != tokenize.NAME or tok.string != "def": + continue + lparen = None + rparen = None + colon = None + depth = 0 + for j in range(i + 1, len(tokens)): + t = tokens[j] + if lparen is None: + if t.type == tokenize.OP and t.string == "(": + lparen = t + depth = 1 + continue + if t.type == tokenize.OP and t.string == "(": + depth += 1 + continue + if t.type == tokenize.OP and t.string == ")": + depth -= 1 + if depth == 0: + rparen = t + continue + if rparen is not None and t.type == tokenize.OP and t.string == ":": + colon = t + break + if lparen is not None and rparen is not None: + return lparen, rparen, colon + return None, None, None + + +def _remove_scalar_params_preserving_source(text: str, scalar_replacements: dict[str, int | float]): + if not scalar_replacements: + return text, 0 + + lparen, rparen, colon = _find_def_signature_span(text) + if lparen is None or rparen is None: + return text, 0 + + try: + tree = ast.parse(text) + except Exception: + return text, 0 + + func = next((n for n in tree.body if isinstance(n, ast.FunctionDef)), None) + if func is None: + return text, 0 + + kept = [a.arg for a in (func.args.posonlyargs + func.args.args) if a.arg not in scalar_replacements] + line_starts = _line_starts(text) + pstart = _to_abs(line_starts, lparen.end[0], lparen.end[1]) + pend = _to_abs(line_starts, rparen.start[0], rparen.start[1]) + updated = f"{text[:pstart]}{', '.join(kept)}{text[pend:]}" + body_start = 0 + if colon is not None: + # Signature shrink can move ':' to an earlier column, so recompute + # on the rewritten text to avoid skipping first-line body tokens. + _, _, updated_colon = _find_def_signature_span(updated) + if updated_colon is not None: + body_start = _to_abs(_line_starts(updated), updated_colon.end[0], updated_colon.end[1]) + return updated, body_start + + +def _replace_scalar_names_preserving_source( + text: str, scalar_replacements: dict[str, int | float], body_start: int +): + if not scalar_replacements: + return text + + line_starts = _line_starts(text) + tokens = list(tokenize.generate_tokens(StringIO(text).readline)) + significant = { + tokenize.NAME, + tokenize.NUMBER, + tokenize.STRING, + tokenize.OP, + tokenize.INDENT, + tokenize.DEDENT, + } + assign_ops = {"=", "+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", "<<=", ">>=", ":="} + edits = [] + for i, tok in enumerate(tokens): + if tok.type != tokenize.NAME or tok.string not in scalar_replacements: + continue + start_abs = _to_abs(line_starts, tok.start[0], tok.start[1]) + if start_abs < body_start: + continue + + prev_sig = None + for j in range(i - 1, -1, -1): + if tokens[j].type in significant: + prev_sig = tokens[j] + break + if prev_sig is not None and prev_sig.type == tokenize.OP and prev_sig.string == ".": + continue + + next_sig = None + for j in range(i + 1, len(tokens)): + if tokens[j].type in significant: + next_sig = tokens[j] + break + if next_sig is not None and next_sig.type == tokenize.OP and next_sig.string in assign_ops: + continue + + end_abs = _to_abs(line_starts, tok.end[0], tok.end[1]) + edits.append((start_abs, end_abs, repr(scalar_replacements[tok.string]))) + + if not edits: + return text + + out = text + for start, end, repl in sorted(edits, key=lambda e: e[0], reverse=True): + out = f"{out[:start]}{repl}{out[end:]}" + return out + + +def _fold_numeric_cast_calls_preserving_source(text: str, body_start: int): # noqa: C901 + """Fold float() and int() calls into literals. + + miniexpr parses DSL function calls in a restricted way, and scalar specialization can + produce calls like float(200) that fail to parse. Fold those into literals while + preserving source formatting/comments elsewhere. + """ + try: + tree = ast.parse(text) + except Exception: + return text + + line_starts = _line_starts(text) + edits = [] + + def _numeric_literal_value(node): + if isinstance(node, ast.Constant) and isinstance(node.value, int | float | bool): + return node.value + if ( + isinstance(node, ast.UnaryOp) + and isinstance(node.op, ast.UAdd | ast.USub) + and isinstance(node.operand, ast.Constant) + and isinstance(node.operand.value, int | float | bool) + ): + value = node.operand.value + return value if isinstance(node.op, ast.UAdd) else -value + return None + + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + if node.keywords or len(node.args) != 1: + continue + if not isinstance(node.func, ast.Name) or node.func.id not in {"float", "int"}: + continue + + arg = node.args[0] + value = _numeric_literal_value(arg) + if value is None: + continue + + start_abs = _to_abs(line_starts, node.lineno, node.col_offset) + if start_abs < body_start: + continue + end_abs = _to_abs(line_starts, node.end_lineno, node.end_col_offset) + + if node.func.id == "float": + repl = repr(float(value)) + else: + repl = repr(int(value)) + edits.append((start_abs, end_abs, repl)) + + if not edits: + return text + + out = text + for start, end, repl in sorted(edits, key=lambda e: e[0], reverse=True): + out = f"{out[:start]}{repl}{out[end:]}" + return out + + +def specialize_miniexpr_inputs(expr_string: str, operands: dict): + """Inline scalar operands as constants for miniexpr compilation.""" + scalar_replacements = {} + array_operands = {} + for name, value in operands.items(): + if hasattr(value, "shape") and value.shape == (): + scalar_replacements[name] = _normalize_miniexpr_scalar(value[()]) + continue + if isinstance(value, int | float | bool | str) or (hasattr(value, "item") and callable(value.item)): + try: + scalar_replacements[name] = _normalize_miniexpr_scalar(value) + continue + except TypeError: + pass + array_operands[name] = value + + if not scalar_replacements: + return expr_string, operands + + rewritten, body_start = _remove_scalar_params_preserving_source(expr_string, scalar_replacements) + rewritten = _replace_scalar_names_preserving_source(rewritten, scalar_replacements, body_start) + rewritten = _fold_numeric_cast_calls_preserving_source(rewritten, body_start) + return rewritten, array_operands + + +def specialize_dsl_miniexpr_inputs(expr_string: str, operands: dict): + """Backward-compatible alias for DSL-specific callers.""" + return specialize_miniexpr_inputs(expr_string, operands) + + +class _DSLValidator: + _binop_map: ClassVar[dict[type[ast.operator], str]] = { + ast.Add: "+", + ast.Sub: "-", + ast.Mult: "*", + ast.Div: "/", + ast.FloorDiv: "//", + ast.Mod: "%", + ast.Pow: "**", + ast.BitAnd: "&", + ast.BitOr: "|", + ast.BitXor: "^", + ast.LShift: "<<", + ast.RShift: ">>", + } + _cmp_map: ClassVar[dict[type[ast.cmpop], str]] = { + ast.Eq: "==", + ast.NotEq: "!=", + ast.Lt: "<", + ast.LtE: "<=", + ast.Gt: ">", + ast.GtE: ">=", + } + + def __init__(self, source: str, line_base: int = 0): + self._source = source + self._line_base = line_base + + def validate(self, func_node: ast.FunctionDef): + self._args(func_node) + if not func_node.body: + self._err(func_node, "DSL kernel must have a body") + for stmt in func_node.body: + self._stmt(stmt) + + def _err(self, node: ast.AST, msg: str, *, line: int | None = None, col: int | None = None): + if line is None: + line = getattr(node, "lineno", 0) + if col is None: + col = getattr(node, "col_offset", 0) + 1 + line -= self._line_base + location = f"{msg} at line {line}, column {col}" + dump = self._format_source_with_pointer(line, col) + raise DSLSyntaxError(f"{location}\n\nDSL kernel source:\n{dump}\n\nSee: {_DSL_USAGE_DOC_URL}") + + def _format_source_with_pointer(self, line: int, col: int) -> str: + lines = self._source.splitlines() + if not lines: + return "" + width = len(str(len(lines))) + out = [] + for lineno, text in enumerate(lines, start=1): + out.append(f"{lineno:>{width}} | {text}") + if lineno == line: + pointer = " " * max(col - 1, 0) + out.append(f"{' ' * width} | {pointer}^") + return "\n".join(out) + + def _args(self, func_node: ast.FunctionDef): + args = func_node.args + if args.vararg or args.kwarg or args.kwonlyargs: + self._err(args, "DSL kernel does not support *args/**kwargs/kwonly args") + if args.defaults or args.kw_defaults: + self._err(args, "DSL kernel does not support default arguments") + + def _stmt(self, node: ast.stmt): # noqa: C901 + if isinstance(node, ast.Assign): + if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): + self._err(node, "Only simple assignments are supported in DSL kernels") + self._expr(node.value) + return + if isinstance(node, ast.AugAssign): + if not isinstance(node.target, ast.Name): + self._err(node, "Only simple augmented assignments are supported") + self._binop(node.op) + self._expr(node.value) + return + if isinstance(node, ast.Return): + if node.value is None: + self._err(node, "DSL kernel return must have a value") + self._expr(node.value) + return + if isinstance(node, ast.Expr): + self._expr(node.value) + return + if isinstance(node, ast.If): + self._expr(node.test) + if not node.body: + self._err(node, "Empty if blocks are not supported in DSL kernels") + for stmt in node.body: + self._stmt(stmt) + for stmt in node.orelse: + self._stmt(stmt) + return + if isinstance(node, ast.For): + if node.orelse: + self._err(node, "for/else is not supported in DSL kernels") + if not isinstance(node.target, ast.Name): + self._err(node, "DSL for-loop target must be a simple name") + if not isinstance(node.iter, ast.Call): + self._err(node, "DSL for-loop must iterate over range()") + func_name = self._call_name(node.iter.func) + if func_name != "range": + self._err(node, "DSL for-loop must iterate over range()") + if node.iter.keywords or not (1 <= len(node.iter.args) <= 3): + self._err(node, "DSL range() must take 1 to 3 positional arguments") + for arg in node.iter.args: + self._expr(arg) + if not node.body: + self._err(node, "Empty for-loop bodies are not supported in DSL kernels") + for stmt in node.body: + self._stmt(stmt) + return + if isinstance(node, ast.While): + if node.orelse: + self._err(node, "while/else is not supported in DSL kernels") + self._expr(node.test) + if not node.body: + self._err(node, "Empty while-loop bodies are not supported in DSL kernels") + for stmt in node.body: + self._stmt(stmt) + return + if isinstance(node, ast.Break | ast.Continue): + return + self._err(node, f"Unsupported DSL statement: {type(node).__name__}") + + def _expr(self, node: ast.AST): # noqa: C901 + if isinstance(node, ast.Name): + return + if isinstance(node, ast.Constant): + val = node.value + if isinstance(val, bool | int | float | str): + return + self._err(node, "Unsupported constant in DSL expression") + if isinstance(node, ast.UnaryOp): + if isinstance(node.op, ast.UAdd | ast.USub | ast.Not): + self._expr(node.operand) + return + self._err(node, "Unsupported unary operator in DSL expression") + if isinstance(node, ast.BinOp): + self._binop(node.op) + self._expr(node.left) + self._expr(node.right) + return + if isinstance(node, ast.BoolOp): + for value in node.values: + self._expr(value) + return + if isinstance(node, ast.Compare): + if len(node.ops) != 1 or len(node.comparators) != 1: + self._err(node, "Chained comparisons are not supported in DSL") + self._cmpop(node.ops[0]) + self._expr(node.left) + self._expr(node.comparators[0]) + return + if isinstance(node, ast.Call): + self._call_name(node.func) + if node.keywords: + self._err(node, "Keyword arguments are not supported in DSL calls") + for arg in node.args: + self._expr(arg) + return + if isinstance(node, ast.IfExp): + seg = ast.get_source_segment(self._source, node) + col = getattr(node, "col_offset", 0) + 1 + if seg is not None: + rel = seg.find(" if ") + if rel >= 0: + col += rel + 1 + self._err( + node, + "Ternary expressions are not supported in DSL; use where(cond, a, b)", + col=col, + ) + self._err(node, f"Unsupported DSL expression: {type(node).__name__}") + + def _call_name(self, node: ast.AST) -> str: + if isinstance(node, ast.Name): + return node.id + if ( + isinstance(node, ast.Attribute) + and isinstance(node.value, ast.Name) + and node.value.id in {"np", "numpy", "math"} + ): + return node.attr + self._err(node, "Unsupported call target in DSL") + raise AssertionError("unreachable") + + def _binop(self, op: ast.operator): + for k in self._binop_map: + if isinstance(op, k): + return + self._err(op, "Unsupported binary operator in DSL") + + def _cmpop(self, op: ast.cmpop): + for k in self._cmp_map: + if isinstance(op, k): + return + self._err(op, "Unsupported comparison in DSL") + + +class DSLKernel: + """Wrap a Python function and optionally extract a miniexpr DSL kernel from it.""" + + def __init__(self, func): + self.func = func + self.__name__ = getattr(func, "__name__", self.__class__.__name__) + self.__qualname__ = getattr(func, "__qualname__", self.__name__) + self.__doc__ = getattr(func, "__doc__", None) + try: + sig = inspect.signature(func) + except (TypeError, ValueError): + sig = None + self._sig = sig + self._sig_has_varargs = False + self._sig_npositional = None + self._legacy_udf_signature = False + if sig is not None: + params = list(sig.parameters.values()) + positional_params = [p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)] + self._sig_has_varargs = any(p.kind == p.VAR_POSITIONAL for p in params) + self._sig_npositional = len(positional_params) + # Preserve support for classic lazyudf signature: (inputs_tuple, output, offset) + if not self._sig_has_varargs and len(positional_params) == 3: + p2 = positional_params[1].name.lower() + p3 = positional_params[2].name.lower() + self._legacy_udf_signature = p2 in {"output", "out"} and p3 == "offset" + self.dsl_source = None + self.input_names = None + self.dsl_error = None + try: + dsl_source, input_names = self._extract_dsl(func) + except DSLSyntaxError as e: + dsl_source = None + input_names = None + self.dsl_error = e + except Exception: + dsl_source = None + input_names = None + self.dsl_source = dsl_source + self.input_names = input_names + + def _extract_dsl(self, func): + source = inspect.getsource(func) + source = textwrap.dedent(source) + tree = ast.parse(source) + func_node = None + for node in tree.body: + if isinstance(node, ast.FunctionDef) and node.name == func.__name__: + func_node = node + break + if func_node is None: + for node in tree.body: + if isinstance(node, ast.FunctionDef): + func_node = node + break + if func_node is None: + raise ValueError("No function definition found for DSL extraction") + + dsl_source = self._slice_function_source(source, func_node) + dsl_tree = ast.parse(dsl_source) + dsl_func = next((node for node in dsl_tree.body if isinstance(node, ast.FunctionDef)), None) + if dsl_func is None: + raise ValueError("No function definition found in sliced DSL source") + _DSLValidator(dsl_source).validate(dsl_func) + input_names = self._input_names_from_signature(dsl_func) + if _PRINT_DSL_KERNEL: + func_name = getattr(func, "__name__", "") + print(f"[DSLKernel:{func_name}] dsl_source (full):") + print(dsl_source) + return dsl_source, input_names + + @staticmethod + def _slice_function_source(source: str, func_node: ast.FunctionDef) -> str: + lines = source.splitlines() + start = func_node.lineno - 1 + end_lineno = getattr(func_node, "end_lineno", None) + if end_lineno is None: + end = len(lines) + else: + end = end_lineno + return "\n".join(lines[start:end]) + + @staticmethod + def _input_names_from_signature(func_node: ast.FunctionDef) -> list[str]: + args = func_node.args + if args.vararg or args.kwarg or args.kwonlyargs: + raise ValueError("DSL kernel does not support *args/**kwargs/kwonly args") + if args.defaults or args.kw_defaults: + raise ValueError("DSL kernel does not support default arguments") + return [a.arg for a in (args.posonlyargs + args.args)] + + def __call__(self, inputs_tuple, output, offset=None): + if self.dsl_error is not None: + raise self.dsl_error + if self._legacy_udf_signature: + return self.func(inputs_tuple, output, offset) + + n_inputs = len(inputs_tuple) + if self._sig is not None and ( + self._sig_npositional in (n_inputs, n_inputs + 1) or self._sig_has_varargs + ): + if self._sig_npositional == n_inputs + 1: + result = self.func(*inputs_tuple, offset) + else: + result = self.func(*inputs_tuple) + output[...] = result + return None + + try: + return self.func(inputs_tuple, output, offset) + except TypeError: + result = self.func(*inputs_tuple) + output[...] = result + return None + + +def dsl_kernel(func): + """Decorator to wrap a function in a DSLKernel.""" + + return DSLKernel(func) + + +def validate_dsl(func): + """Validate a DSL kernel function without executing it. + + Parameters + ---------- + func + A Python callable or :class:`DSLKernel`. + + Returns + ------- + dict + A dictionary with: + - ``valid`` (bool): whether the DSL is valid + - ``dsl_source`` (str | None): extracted DSL source when valid + - ``input_names`` (list[str] | None): input signature names when valid + - ``error`` (str | None): user-facing error message when invalid + """ + + kernel = func if isinstance(func, DSLKernel) else DSLKernel(func) + err = kernel.dsl_error + return { + "valid": err is None, + "dsl_source": kernel.dsl_source, + "input_names": kernel.input_names, + "error": None if err is None else str(err), + } + + +class _DSLBuilder: + _binop_map: ClassVar[dict[type[ast.operator], str]] = { + ast.Add: "+", + ast.Sub: "-", + ast.Mult: "*", + ast.Div: "/", + ast.FloorDiv: "//", + ast.Mod: "%", + ast.Pow: "**", + ast.BitAnd: "&", + ast.BitOr: "|", + ast.BitXor: "^", + ast.LShift: "<<", + ast.RShift: ">>", + } + + _cmp_map: ClassVar[dict[type[ast.cmpop], str]] = { + ast.Eq: "==", + ast.NotEq: "!=", + ast.Lt: "<", + ast.LtE: "<=", + ast.Gt: ">", + ast.GtE: ">=", + } + + def __init__(self): + self._lines = [] + + def build(self, func_node: ast.FunctionDef): + input_names = self._args(func_node.args) + self._emit(f"def {func_node.name}({', '.join(input_names)}):", 0) + if not func_node.body: + raise ValueError("DSL kernel must have a body") + for stmt in func_node.body: + self._stmt(stmt, 4) + return "\n".join(self._lines), input_names + + def _emit(self, line: str, indent: int): + self._lines.append(" " * indent + line) + + def _args(self, args: ast.arguments): + if args.vararg or args.kwarg or args.kwonlyargs: + raise ValueError("DSL kernel does not support *args/**kwargs/kwonly args") + if args.defaults or args.kw_defaults: + raise ValueError("DSL kernel does not support default arguments") + names = [a.arg for a in (args.posonlyargs + args.args)] + if not names: + raise ValueError("DSL kernel must accept at least one argument") + return names + + def _stmt(self, node: ast.stmt, indent: int): + if isinstance(node, ast.Assign): + if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): + raise ValueError("Only simple assignments are supported in DSL kernels") + target = node.targets[0].id + value = self._expr(node.value) + self._emit(f"{target} = {value}", indent) + return + if isinstance(node, ast.AugAssign): + if not isinstance(node.target, ast.Name): + raise ValueError("Only simple augmented assignments are supported") + target = node.target.id + op = self._binop(node.op) + value = self._expr(node.value) + self._emit(f"{target} = {target} {op} {value}", indent) + return + if isinstance(node, ast.Return): + if node.value is None: + raise ValueError("DSL kernel return must have a value") + value = self._expr(node.value) + self._emit(f"return {value}", indent) + return + if isinstance(node, ast.Expr): + value = self._expr(node.value) + self._emit(value, indent) + return + if isinstance(node, ast.If): + self._if_stmt(node, indent) + return + if isinstance(node, ast.For): + self._for_stmt(node, indent) + return + if isinstance(node, ast.While): + self._while_stmt(node, indent) + return + if isinstance(node, ast.Break): + self._emit("break", indent) + return + if isinstance(node, ast.Continue): + self._emit("continue", indent) + return + raise ValueError(f"Unsupported DSL statement: {type(node).__name__}") + + def _stmt_block(self, body, indent: int): + if not body: + raise ValueError("Empty blocks are not supported in DSL kernels") + i = 0 + while i < len(body): + stmt = body[i] + if ( + isinstance(stmt, ast.If) + and not stmt.orelse + and self._block_terminates(stmt.body) + and i + 1 < len(body) + and isinstance(body[i + 1], ast.If) + ): + merged = ast.If(test=stmt.test, body=stmt.body, orelse=[body[i + 1]]) + self._if_stmt(merged, indent) + i += 2 + continue + self._stmt(stmt, indent) + i += 1 + + def _block_terminates(self, body) -> bool: + if not body: + return False + return self._stmt_terminates(body[-1]) + + def _stmt_terminates(self, node: ast.stmt) -> bool: + if isinstance(node, ast.Return | ast.Break | ast.Continue): + return True + if isinstance(node, ast.If) and node.orelse: + return self._block_terminates(node.body) and self._block_terminates(node.orelse) + return False + + def _if_stmt(self, node: ast.If, indent: int): + current = node + first = True + while True: + prefix = "if" if first else "elif" + cond = self._expr(current.test) + self._emit(f"{prefix} {cond}:", indent) + self._stmt_block(current.body, indent + 4) + first = False + if current.orelse and len(current.orelse) == 1 and isinstance(current.orelse[0], ast.If): + current = current.orelse[0] + continue + break + if current.orelse: + self._emit("else:", indent) + self._stmt_block(current.orelse, indent + 4) + + def _for_stmt(self, node: ast.For, indent: int): + if node.orelse: + raise ValueError("for/else is not supported in DSL kernels") + if not isinstance(node.target, ast.Name): + raise ValueError("DSL for-loop target must be a simple name") + if not isinstance(node.iter, ast.Call): + raise ValueError("DSL for-loop must iterate over range()") + func_name = self._call_name(node.iter.func) + if func_name != "range": + raise ValueError("DSL for-loop must iterate over range()") + if node.iter.keywords or len(node.iter.args) != 1: + raise ValueError("DSL range() must take a single argument") + limit = self._expr(node.iter.args[0]) + self._emit(f"for {node.target.id} in range({limit}):", indent) + self._stmt_block(node.body, indent + 4) + + def _while_stmt(self, node: ast.While, indent: int): + if node.orelse: + raise ValueError("while/else is not supported in DSL kernels") + cond = self._expr(node.test) + self._emit(f"while {cond}:", indent) + self._stmt_block(node.body, indent + 4) + + def _expr(self, node: ast.AST) -> str: # noqa: C901 + if isinstance(node, ast.Name): + return node.id + if isinstance(node, ast.Constant): + val = node.value + if isinstance(val, bool): + return "1" if val else "0" + if isinstance(val, int | float): + return repr(val) + raise ValueError("Unsupported constant in DSL expression") + if isinstance(node, ast.UnaryOp): + if isinstance(node.op, ast.UAdd): + return f"+{self._expr(node.operand)}" + if isinstance(node.op, ast.USub): + return f"-{self._expr(node.operand)}" + if isinstance(node.op, ast.Not): + return f"!{self._expr(node.operand)}" + raise ValueError("Unsupported unary operator in DSL expression") + if isinstance(node, ast.BinOp): + left = self._expr(node.left) + right = self._expr(node.right) + op = self._binop(node.op) + return f"({left} {op} {right})" + if isinstance(node, ast.BoolOp): + op = "&" if isinstance(node.op, ast.And) else "|" + values = [self._expr(v) for v in node.values] + expr = values[0] + for val in values[1:]: + expr = f"({expr} {op} {val})" + return expr + if isinstance(node, ast.Compare): + if len(node.ops) != 1 or len(node.comparators) != 1: + raise ValueError("Chained comparisons are not supported in DSL") + left = self._expr(node.left) + right = self._expr(node.comparators[0]) + op = self._cmpop(node.ops[0]) + return f"({left} {op} {right})" + if isinstance(node, ast.Call): + func_name = self._call_name(node.func) + if node.keywords: + raise ValueError("Keyword arguments are not supported in DSL calls") + args = ", ".join(self._expr(a) for a in node.args) + return f"{func_name}({args})" + if isinstance(node, ast.IfExp): + cond = self._expr(node.test) + body = self._expr(node.body) + orelse = self._expr(node.orelse) + return f"where({cond}, {body}, {orelse})" + raise ValueError(f"Unsupported DSL expression: {type(node).__name__}") + + def _call_name(self, node: ast.AST) -> str: + if isinstance(node, ast.Name): + return node.id + if ( + isinstance(node, ast.Attribute) + and isinstance(node.value, ast.Name) + and node.value.id in {"np", "numpy", "math"} + ): + return node.attr + raise ValueError("Unsupported call target in DSL") + + def _binop(self, op: ast.operator) -> str: + for k, v in self._binop_map.items(): + if isinstance(op, k): + return v + raise ValueError("Unsupported binary operator in DSL") + + def _cmpop(self, op: ast.cmpop) -> str: + for k, v in self._cmp_map.items(): + if isinstance(op, k): + return v + raise ValueError("Unsupported comparison in DSL") + + +class _DSLReducer: + _binop_map: ClassVar[dict[type[ast.operator], str]] = _DSLBuilder._binop_map + _cmp_map: ClassVar[dict[type[ast.cmpop], str]] = _DSLBuilder._cmp_map + + def __init__(self, max_unroll: int = 64): + self._env: dict[str, str] = {} + self._const_env: dict[str, object] = {} + self._return_expr: str | None = None + self._max_unroll = max_unroll + + def reduce(self, func_node: ast.FunctionDef): + input_names = self._args(func_node.args) + if not func_node.body: + return None + for stmt in func_node.body: + if not self._stmt(stmt): + return None + if self._return_expr is not None: + break + if self._return_expr is None: + return None + return self._return_expr, input_names + + def _args(self, args: ast.arguments): + if args.vararg or args.kwarg or args.kwonlyargs: + raise ValueError("DSL kernel does not support *args/**kwargs/kwonly args") + if args.defaults or args.kw_defaults: + raise ValueError("DSL kernel does not support default arguments") + names = [a.arg for a in (args.posonlyargs + args.args)] + if not names: + raise ValueError("DSL kernel must accept at least one argument") + return names + + def _stmt(self, node: ast.stmt) -> bool: # noqa: C901 + if isinstance(node, ast.Assign): + if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): + return False + target = node.targets[0].id + value = self._expr(node.value) + self._env[target] = value + const_val = self._const_eval(node.value) + if const_val is None: + self._const_env.pop(target, None) + else: + self._const_env[target] = const_val + return True + if isinstance(node, ast.AugAssign): + if not isinstance(node.target, ast.Name): + return False + target = node.target.id + op = self._binop(node.op) + value = self._expr(node.value) + left = self._env.get(target, target) + left_const = self._const_env.get(target) + right_const = self._const_eval(node.value) + simplified = self._simplify_binop_expr(op, left, value, left_const, right_const) + self._env[target] = simplified + if left_const is None or right_const is None: + self._const_env.pop(target, None) + else: + self._const_env[target] = self._apply_binop(left_const, right_const, node.op) + return True + if isinstance(node, ast.Return): + if node.value is None: + return False + self._return_expr = self._expr(node.value) + return True + if isinstance(node, ast.If): + test_val = self._const_eval(node.test) + if test_val is None: + return False + branch = node.body if bool(test_val) else node.orelse + if not branch: + return True + for stmt in branch: + if not self._stmt(stmt): + return False + if self._return_expr is not None: + return True + return True + if isinstance(node, ast.For): + if node.orelse: + return False + if not isinstance(node.target, ast.Name): + return False + if not isinstance(node.iter, ast.Call): + return False + func_name = self._call_name(node.iter.func) + if func_name != "range": + return False + if node.iter.keywords or len(node.iter.args) != 1: + return False + limit_val = self._const_eval(node.iter.args[0]) + if limit_val is None or not isinstance(limit_val, int): + return False + if limit_val < 0 or limit_val > self._max_unroll: + return False + loop_var = node.target.id + old_env = self._env.get(loop_var) + old_const = self._const_env.get(loop_var) + for i in range(limit_val): + self._env[loop_var] = str(i) + self._const_env[loop_var] = i + for stmt in node.body: + if not self._stmt(stmt): + if old_env is None: + self._env.pop(loop_var, None) + else: + self._env[loop_var] = old_env + if old_const is None: + self._const_env.pop(loop_var, None) + else: + self._const_env[loop_var] = old_const + return False + if self._return_expr is not None: + break + if self._return_expr is not None: + break + if old_env is None: + self._env.pop(loop_var, None) + else: + self._env[loop_var] = old_env + if old_const is None: + self._const_env.pop(loop_var, None) + else: + self._const_env[loop_var] = old_const + return True + return False + + def _expr(self, node: ast.AST) -> str: # noqa: C901 + const_val = self._const_eval(node) + if const_val is not None: + if isinstance(const_val, bool): + return "1" if const_val else "0" + return repr(const_val) + if isinstance(node, ast.Name): + if node.id in self._env: + val = self._env[node.id] + # Avoid double-wrapping if already parenthesized or is a function call + if (val.startswith("(") and val.endswith(")")) or "(" in val: + return val + return f"({val})" + return node.id + if isinstance(node, ast.Constant): + val = node.value + if isinstance(val, bool): + return "1" if val else "0" + if isinstance(val, int | float): + return repr(val) + raise ValueError("Unsupported constant in DSL expression") + if isinstance(node, ast.UnaryOp): + if isinstance(node.op, ast.UAdd): + return f"+{self._expr(node.operand)}" + if isinstance(node.op, ast.USub): + return f"-{self._expr(node.operand)}" + if isinstance(node.op, ast.Not): + return f"!{self._expr(node.operand)}" + raise ValueError("Unsupported unary operator in DSL expression") + if isinstance(node, ast.BinOp): + left = self._expr(node.left) + right = self._expr(node.right) + op = self._binop(node.op) + left_const = self._const_eval(node.left) + right_const = self._const_eval(node.right) + return self._simplify_binop_expr(op, left, right, left_const, right_const) + if isinstance(node, ast.BoolOp): + op = "&" if isinstance(node.op, ast.And) else "|" + values = [self._expr(v) for v in node.values] + expr = values[0] + for val in values[1:]: + expr = f"({expr} {op} {val})" + return expr + if isinstance(node, ast.Compare): + if len(node.ops) != 1 or len(node.comparators) != 1: + raise ValueError("Chained comparisons are not supported in DSL") + left = self._expr(node.left) + right = self._expr(node.comparators[0]) + op = self._cmpop(node.ops[0]) + return f"({left} {op} {right})" + if isinstance(node, ast.Call): + func_name = self._call_name(node.func) + if node.keywords: + raise ValueError("Keyword arguments are not supported in DSL calls") + args = ", ".join(self._expr(a) for a in node.args) + return f"{func_name}({args})" + if isinstance(node, ast.IfExp): + cond = self._expr(node.test) + body = self._expr(node.body) + orelse = self._expr(node.orelse) + return f"where({cond}, {body}, {orelse})" + raise ValueError(f"Unsupported DSL expression: {type(node).__name__}") + + def _call_name(self, node: ast.AST) -> str: + if isinstance(node, ast.Name): + return node.id + if ( + isinstance(node, ast.Attribute) + and isinstance(node.value, ast.Name) + and node.value.id in {"np", "numpy", "math"} + ): + return node.attr + raise ValueError("Unsupported call target in DSL") + + def _binop(self, op: ast.operator) -> str: + for k, v in self._binop_map.items(): + if isinstance(op, k): + return v + raise ValueError("Unsupported binary operator in DSL") + + def _cmpop(self, op: ast.cmpop) -> str: + for k, v in self._cmp_map.items(): + if isinstance(op, k): + return v + raise ValueError("Unsupported comparison in DSL") + + def _const_eval(self, node: ast.AST): # noqa: C901 + if isinstance(node, ast.Constant): + if isinstance(node.value, int | float | bool): + return node.value + return None + if isinstance(node, ast.Name): + return self._const_env.get(node.id) + if isinstance(node, ast.UnaryOp): + val = self._const_eval(node.operand) + if val is None: + return None + if isinstance(node.op, ast.UAdd): + return +val + if isinstance(node.op, ast.USub): + return -val + if isinstance(node.op, ast.Not): + return not val + return None + if isinstance(node, ast.BinOp): + left = self._const_eval(node.left) + right = self._const_eval(node.right) + if left is None or right is None: + return None + return self._apply_binop(left, right, node.op) + if isinstance(node, ast.BoolOp): + vals = [self._const_eval(v) for v in node.values] + if any(v is None for v in vals): + return None + if isinstance(node.op, ast.And): + return all(vals) + if isinstance(node.op, ast.Or): + return any(vals) + return None + if isinstance(node, ast.Compare): + if len(node.ops) != 1 or len(node.comparators) != 1: + return None + left = self._const_eval(node.left) + right = self._const_eval(node.comparators[0]) + if left is None or right is None: + return None + return self._apply_cmp(left, right, node.ops[0]) + return None + + def _apply_binop(self, left, right, op): + if isinstance(op, ast.Add): + return left + right + if isinstance(op, ast.Sub): + return left - right + if isinstance(op, ast.Mult): + return left * right + if isinstance(op, ast.Div): + return left / right + if isinstance(op, ast.FloorDiv): + return left // right + if isinstance(op, ast.Mod): + return left % right + if isinstance(op, ast.Pow): + return left**right + if isinstance(op, ast.BitAnd): + return left & right + if isinstance(op, ast.BitOr): + return left | right + if isinstance(op, ast.BitXor): + return left ^ right + if isinstance(op, ast.LShift): + return left << right + if isinstance(op, ast.RShift): + return left >> right + return None + + def _apply_cmp(self, left, right, op): + if isinstance(op, ast.Eq): + return left == right + if isinstance(op, ast.NotEq): + return left != right + if isinstance(op, ast.Lt): + return left < right + if isinstance(op, ast.LtE): + return left <= right + if isinstance(op, ast.Gt): + return left > right + if isinstance(op, ast.GtE): + return left >= right + return None + + def _simplify_binop_expr(self, op, left_expr, right_expr, left_const, right_const): + if op == "+": + if self._is_zero(left_const): + return right_expr + if self._is_zero(right_const): + return left_expr + if op == "-" and self._is_zero(right_const): + return left_expr + if op == "*": + if self._is_one(left_const): + return right_expr + if self._is_one(right_const): + return left_expr + return f"({left_expr} {op} {right_expr})" + + def _is_zero(self, value): + return isinstance(value, int | float | bool) and value == 0 + + def _is_one(self, value): + return isinstance(value, int | float | bool) and value == 1 diff --git a/venv/Lib/site-packages/blosc2/embed_store.py b/venv/Lib/site-packages/blosc2/embed_store.py new file mode 100644 index 0000000..84b1b20 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/embed_store.py @@ -0,0 +1,329 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +import copy +from collections.abc import Iterator, KeysView +from typing import Any + +import numpy as np + +import blosc2 +from blosc2.c2array import C2Array +from blosc2.schunk import SChunk + +PROFILE = False # Set to True to enable PROFILE prints in EmbedStore + + +class EmbedStore: + """ + A dictionary-like container for storing NumPy/Blosc2 arrays (NDArray or SChunk) as nodes. + + For NumPy arrays, Blosc2 NDArrays (even if they live in external ``.b2nd`` files), + and Blosc2 SChunk objects, the data is read and embedded into the store. For remote + arrays (``C2Array``), only lightweight references (URL base and path) are stored. + If you need a richer hierarchical container with optional external references, consider using + `blosc2.TreeStore` or `blosc2.DictStore`. + + Parameters + ---------- + urlpath : str or None, optional + Path for persistent storage. Using a '.b2e' extension is recommended. + If None, the embed store will be in memory only, which can be + deserialized later using the :func:`blosc2.from_cframe` function. + mode : str, optional + File mode ('r', 'w', 'a'). Default is 'w'. + mmap_mode : str or None, optional + Memory mapping mode for read access. For now, only ``"r"`` is supported, + and only when ``mode="r"``. Default is None. + cparams : dict or None, optional + Compression parameters for nodes and the embed store itself. + Default is None, which uses the default Blosc2 parameters. + dparams : dict or None, optional + Decompression parameters for nodes and the embed store itself. + Default is None, which uses the default Blosc2 parameters. + storage : blosc2.Storage or None, optional + Storage properties for the embed store. If passed, it will override + the `urlpath` and `mode` parameters. + chunksize : int, optional + Size of chunks for the backing storage. Default is 1 MiB. + + Examples + -------- + >>> estore = EmbedStore(urlpath="example_estore.b2e", mode="w") + >>> estore["/node1"] = np.array([1, 2, 3]) + >>> estore["/node2"] = blosc2.ones(2) + >>> estore["/node3"] = blosc2.arange(3, dtype="i4", urlpath="external_node3.b2nd", mode="w") + >>> urlpath = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo") + >>> estore["/node4"] = blosc2.open(urlpath, mode="r") + >>> print(list(estore.keys())) + ['/node1', '/node2', '/node3', '/node4'] + >>> print(estore["/node1"][:]) + [1 2 3] + + """ + + def __init__( + self, + urlpath: str | None = None, + mode: str = "a", + cparams: blosc2.CParams | None = None, + dparams: blosc2.CParams | None = None, + storage: blosc2.Storage | None = None, + chunksize: int | None = 2**13, + _from_schunk: SChunk | None = None, + *, + mmap_mode: str | None = None, + meta: dict | None = None, + ): + """Initialize EmbedStore.""" + + # For some reason, the SChunk store cannot achieve the same compression ratio as the NDArray store, + # although it is more efficient in terms of CPU usage. + # Let's use the SChunk store by default and continue experimenting. + self._schunk_store = True # put this to False to use an NDArray instead of a SChunk + self.urlpath = urlpath + if mmap_mode not in (None, "r"): + raise ValueError("For EmbedStore containers, mmap_mode must be None or 'r'") + if mmap_mode == "r" and mode != "r": + raise ValueError("For EmbedStore containers, mmap_mode='r' requires mode='r'") + self.mmap_mode = mmap_mode + + if _from_schunk is not None: + self.cparams = _from_schunk.cparams + self.dparams = _from_schunk.dparams + self.mode = mode + self._store = _from_schunk + self.storage = blosc2.Storage() + self.storage.meta = _from_schunk.meta + self._load_metadata() + return + + self.mode = mode + self.cparams = cparams or blosc2.CParams() + # self.cparams.nthreads = 1 # for debugging purposes, use only one thread + self.dparams = dparams or blosc2.DParams() + # self.dparams.nthreads = 1 # for debugging purposes, use only one thread + if storage is None: + self.storage = blosc2.Storage( + contiguous=True, + urlpath=urlpath, + mode=mode, + ) + else: + self.storage = storage + + if mode in ("r", "a") and urlpath: + self._store = blosc2.blosc2_ext.open(urlpath, mode=mode, offset=0, mmap_mode=mmap_mode) + self.storage.meta = self._store.meta + self._load_metadata() + return + + _cparams = copy.deepcopy(self.cparams) + _cparams.typesize = 1 # ensure typesize is set to 1 for byte storage + _storage = self.storage + _storage.meta = meta if meta is not None else {"b2embed": {"version": 1}} + if self._schunk_store: + self._store = blosc2.SChunk( + chunksize=chunksize, + data=None, + cparams=_cparams, + dparams=self.dparams, + storage=_storage, + ) + else: + self._store = blosc2.zeros( + chunksize, + dtype=np.uint8, + cparams=_cparams, + dparams=self.dparams, + storage=_storage, + ) + self._embed_map: dict = {} + self._current_offset = 0 + + def _validate_key(self, key: str) -> None: + """Validate node key.""" + if not isinstance(key, str): + raise TypeError("Key must be a string.") + if not key.startswith("/"): + raise ValueError("Key must start with '/'.") + if len(key) > 1 and key.endswith("/"): + raise ValueError("Key cannot end with '/' unless it is the root key '/'.") + if "//" in key: + raise ValueError("Key cannot contain consecutive slashes '//'.") + for char in (":", "\0", "\n", "\r", "\t"): + if char in key: + raise ValueError(f"Key cannot contain character: {char!r}") + if key in self._embed_map: + raise ValueError(f"Key '{key}' already exists in store.") + + def _ensure_capacity(self, needed_bytes: int) -> None: + """Ensure backing storage has enough capacity.""" + required_size = self._current_offset + needed_bytes + if required_size > self._store.shape[0]: + new_size = max(required_size, int(self._store.shape[0] * 1.5)) + self._store.resize((new_size,)) + + def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None: + """Add a node to the embed store.""" + if self.mode == "r": + raise ValueError("Cannot set items in read-only mode.") + self._validate_key(key) + if isinstance(value, C2Array): + self._embed_map[key] = {"urlbase": value.urlbase, "path": value.path} + else: + if isinstance(value, np.ndarray): + value = blosc2.asarray(value, cparams=self.cparams, dparams=self.dparams) + serialized_data = value.to_cframe() + data_len = len(serialized_data) + if not self._schunk_store: + self._ensure_capacity(data_len) + offset = self._current_offset + if self._schunk_store: + self._store[offset : offset + data_len] = serialized_data + else: + self._store[offset : offset + data_len] = np.frombuffer(serialized_data, dtype=np.uint8) + self._current_offset += data_len + self._embed_map[key] = {"offset": offset, "length": data_len} + self._save_metadata() + + def __getitem__(self, key: str) -> blosc2.NDArray | SChunk: + """Retrieve a node from the embed store.""" + if key not in self._embed_map: + raise KeyError(f"Key '{key}' not found in the embed store.") + node_info = self._embed_map[key] + urlbase = node_info.get("urlbase", None) + if urlbase: + urlpath = blosc2.URLPath(node_info["path"], urlbase=urlbase) + return blosc2.open(urlpath, mode="r") + offset = node_info["offset"] + length = node_info["length"] + serialized_data = bytes(self._store[offset : offset + length]) + # It is safer to copy data here, as the reference to the SChunk may disappear + # Use from_cframe so we can deserialize either an NDArray or an SChunk + return blosc2.from_cframe(serialized_data, copy=True) + + def get(self, key: str, default: Any = None) -> blosc2.NDArray | SChunk | Any: + """Retrieve a node, or default if not found.""" + return self[key] if key in self._embed_map else default + + def __delitem__(self, key: str) -> None: + """Remove a node from the embed store.""" + if key not in self._embed_map: + raise KeyError(f"Key '{key}' not found in the embed store.") + del self._embed_map[key] + self._save_metadata() + + def __contains__(self, key: str) -> bool: + """Check if a key exists.""" + return key in self._embed_map + + def __len__(self) -> int: + """Return number of nodes.""" + return len(self._embed_map) + + def __iter__(self) -> Iterator[str]: + """Iterate over keys.""" + return iter(self._embed_map) + + def keys(self) -> KeysView[str]: + """Return all keys.""" + return self._embed_map.keys() + + def values(self) -> Iterator[blosc2.NDArray | SChunk]: + """Iterate over all values.""" + for key in self._embed_map: + yield self[key] + + def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk]]: + """Iterate over (key, value) pairs.""" + for key in self._embed_map: + yield key, self[key] + + def _save_metadata(self) -> None: + """Save embed store map to vlmeta.""" + metadata = {"embed_map": self._embed_map, "current_offset": self._current_offset} + self._store.vlmeta["estore_metadata"] = metadata + + def _load_metadata(self) -> None: + """Load embed store map from vlmeta.""" + if "estore_metadata" in self._store.vlmeta: + metadata = self._store.vlmeta["estore_metadata"] + self._embed_map = metadata["embed_map"] + self._current_offset = metadata["current_offset"] + else: + self._embed_map = {} + self._current_offset = 0 + + def to_cframe(self) -> bytes: + """Serialize embed store to CFrame format.""" + return self._store.to_cframe() + + def __enter__(self): + """Context manager enter.""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit.""" + # No need to close anything as SChunk/NDArray handles persistence automatically + return False + + +def estore_from_cframe(cframe: bytes, copy: bool = False) -> EmbedStore: + """ + Deserialize a CFrame to an EmbedStore object. + + Parameters + ---------- + cframe : bytes + CFrame data to deserialize. + copy : bool, optional + If True, copy the data. Default is False. + + Returns + ------- + estore : EmbedStore + The deserialized EmbedStore object. + """ + schunk = blosc2.schunk_from_cframe(cframe, copy=copy) + return EmbedStore(_from_schunk=schunk) + + +if __name__ == "__main__": + # Example usage + persistent = False + if persistent: + estore = EmbedStore(urlpath="example_estore.b2e", mode="w") # , cparams=blosc2.CParams(clevel=0)) + else: + estore = EmbedStore() # , cparams=blosc2.CParams(clevel=0)) + # import pdb; pdb.set_trace() + estore["/node1"] = np.array([1, 2, 3]) + estore["/node2"] = blosc2.ones(2) + urlpath = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo") + arr_remote = blosc2.open(urlpath, mode="r") + estore["/dir1/node3"] = arr_remote + + print("EmbedStore keys:", list(estore.keys())) + print("Node1 data:", estore["/node1"][:]) + print("Node2 data:", estore["/node2"][:]) + print("Node3 data (remote):", estore["/dir1/node3"][:3]) + + del estore["/node1"] + print("After deletion, keys:", list(estore.keys())) + + # Reading back the estore + if persistent: + estore_read = EmbedStore(urlpath="example_estore.b2e", mode="r") + else: + estore_read = blosc2.from_cframe(estore.to_cframe()) + + print("Read keys:", list(estore_read.keys())) + for key, value in estore_read.items(): + print( + f"shape of {key}: {value.shape}, dtype: {value.dtype}, map: {estore_read._embed_map[key]}, " + f"values: {value[:10] if len(value) > 3 else value[:]}" + ) diff --git a/venv/Lib/site-packages/blosc2/exceptions.py b/venv/Lib/site-packages/blosc2/exceptions.py new file mode 100644 index 0000000..baa2118 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/exceptions.py @@ -0,0 +1,15 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + + +class MissingOperands(ValueError): + def __init__(self, expr, missing_ops): + self.expr = expr + self.missing_ops = missing_ops + + message = f'Lazy expression "{expr}" with missing operands: {missing_ops}' + super().__init__(message) diff --git a/venv/Lib/site-packages/blosc2/fft.py b/venv/Lib/site-packages/blosc2/fft.py new file mode 100644 index 0000000..3c5344d --- /dev/null +++ b/venv/Lib/site-packages/blosc2/fft.py @@ -0,0 +1,62 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + + +def fft(): + raise NotImplementedError + + +def ifft(): + raise NotImplementedError + + +def fftn(): + raise NotImplementedError + + +def ifftn(): + raise NotImplementedError + + +def rfft(): + raise NotImplementedError + + +def irfft(): + raise NotImplementedError + + +def rfftn(): + raise NotImplementedError + + +def irfftn(): + raise NotImplementedError + + +def hfft(): + raise NotImplementedError + + +def ihfft(): + raise NotImplementedError + + +def fftfreq(): + raise NotImplementedError + + +def rfftfreq(): + raise NotImplementedError + + +def fftshift(): + raise NotImplementedError + + +def ifftshift(): + raise NotImplementedError diff --git a/venv/Lib/site-packages/blosc2/include/b2nd.h b/venv/Lib/site-packages/blosc2/include/b2nd.h new file mode 100644 index 0000000..f693838 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/include/b2nd.h @@ -0,0 +1,675 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Copyright (c) 2021 Blosc Development Team + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/** @file b2nd.h + * @brief Blosc2 NDim header file. + * + * This file contains Blosc2 NDim public API and the structures needed to use it. + * @author Blosc Development Team + */ + +#ifndef BLOSC_B2ND_H +#define BLOSC_B2ND_H + +#ifdef __cplusplus +extern "C" { +#endif +#include "blosc2/blosc2-export.h" +#ifdef __cplusplus +} +#endif + +#include "blosc2.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_MSC_VER) +#define B2ND_DEPRECATED(msg) __declspec(deprecated(msg)) +#elif defined(__GNUC__) || defined(__clang__) +#define B2ND_DEPRECATED(msg) __attribute__((deprecated(msg))) +#else +#define B2ND_DEPRECATED(msg) +#endif + +/* The version for metalayer format; starts from 0 and it must not exceed 127 */ +#define B2ND_METALAYER_VERSION 0 + +/* The maximum number of dimensions for b2nd arrays */ +#define B2ND_MAX_DIM 16 + +/* The maximum number of metalayers for b2nd arrays */ +#define B2ND_MAX_METALAYERS (BLOSC2_MAX_METALAYERS - 1) + +/* NumPy dtype format + * https://numpy.org/doc/stable/reference/arrays.dtypes.html#arrays-dtypes-constructing + */ +#define DTYPE_NUMPY_FORMAT 0 + +/* The default data type */ +#define B2ND_DEFAULT_DTYPE "|u1" +/* The default data format */ +#define B2ND_DEFAULT_DTYPE_FORMAT DTYPE_NUMPY_FORMAT + +/** + * @brief An *optional* cache for a single block. + * + * When a chunk is needed, it is copied into this cache. In this way, if the same chunk is needed + * again afterwards, it is not necessary to recover it because it is already in the cache. + */ +struct chunk_cache_s { + uint8_t *data; + //!< The chunk data. + int64_t nchunk; + //!< The chunk number in cache. If @p nchunk equals to -1, it means that the cache is empty. +}; + +/** + * @brief General parameters needed for the creation of a b2nd array. + */ +typedef struct b2nd_context_s b2nd_context_t; /* opaque type */ + +/** + * @brief A multidimensional array of data that can be compressed. + */ +typedef struct { + blosc2_schunk *sc; + //!< Pointer to a Blosc super-chunk + int64_t shape[B2ND_MAX_DIM]; + //!< Shape of original data. + int32_t chunkshape[B2ND_MAX_DIM]; + //!< Shape of each chunk. + int64_t extshape[B2ND_MAX_DIM]; + //!< Shape of padded data. + int32_t blockshape[B2ND_MAX_DIM]; + //!< Shape of each block. + int64_t extchunkshape[B2ND_MAX_DIM]; + //!< Shape of padded chunk. + int64_t nitems; + //!< Number of items in original data. + int32_t chunknitems; + //!< Number of items in each chunk. + int64_t extnitems; + //!< Number of items in padded data. + int32_t blocknitems; + //!< Number of items in each block. + int64_t extchunknitems; + //!< Number of items in a padded chunk. + int8_t ndim; + //!< Data dimensions. + struct chunk_cache_s chunk_cache; + //!< A partition cache. + int64_t item_array_strides[B2ND_MAX_DIM]; + //!< Item - shape strides. + int64_t item_chunk_strides[B2ND_MAX_DIM]; + //!< Item - shape strides. + int64_t item_extchunk_strides[B2ND_MAX_DIM]; + //!< Item - shape strides. + int64_t item_block_strides[B2ND_MAX_DIM]; + //!< Item - shape strides. + int64_t block_chunk_strides[B2ND_MAX_DIM]; + //!< Item - shape strides. + int64_t chunk_array_strides[B2ND_MAX_DIM]; + //!< Item - shape strides. + char *dtype; + //!< Data type. Different formats can be supported (see dtype_format). + int8_t dtype_format; + //!< The format of the data type. Default is DTYPE_NUMPY_FORMAT. +} b2nd_array_t; + + +/** + * @brief Create b2nd params. + * + * @param b2_storage The Blosc2 storage params. + * @param ndim The dimensions. + * @param shape The shape. + * @param chunkshape The chunk shape. + * @param blockshape The block shape. + * @param dtype The data type expressed as a string version. + * @param dtype_format The data type format; DTYPE_NUMPY_FORMAT should be chosen for NumPy compatibility. + * @param metalayers The memory pointer to the list of the metalayers desired. + * @param nmetalayers The number of metalayers. + * + * @return A pointer to the new b2nd params. NULL is returned if this fails. + * + * @note The pointer returned must be freed when not used anymore with #b2nd_free_ctx. + * + */ +BLOSC_EXPORT b2nd_context_t * +b2nd_create_ctx(const blosc2_storage *b2_storage, int8_t ndim, const int64_t *shape, const int32_t *chunkshape, + const int32_t *blockshape, const char *dtype, int8_t dtype_format, const blosc2_metalayer *metalayers, + int32_t nmetalayers); + + +/** + * @brief Free the resources associated with b2nd_context_t. + * + * @param ctx The b2nd context to free. + * + * @return An error code. + * + * @note This is safe in the sense that it will not free the schunk pointer in internal cparams. + * + */ +BLOSC_EXPORT int b2nd_free_ctx(b2nd_context_t *ctx); + + +/** + * @brief Create an uninitialized array. + * + * @param ctx The b2nd context for the new array. + * @param array The memory pointer where the array will be created. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_uninit(b2nd_context_t *ctx, b2nd_array_t **array); + + +/** + * @brief Create an empty array. + * + * @param ctx The b2nd context for the new array. + * @param array The memory pointer where the array will be created. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_empty(b2nd_context_t *ctx, b2nd_array_t **array); + + +/** + * Create an array, with zero being used as the default value for + * uninitialized portions of the array. + * + * @param ctx The b2nd context for the new array. + * @param array The memory pointer where the array will be created. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_zeros(b2nd_context_t *ctx, b2nd_array_t **array); + + +/** + * Create an array, with NaN being used as the default value for + * uninitialized portions of the array. Should only be used with type sizes + * of either 4 or 8. Other sizes generate an error. + * + * @param ctx The b2nd context for the new array. + * @param array The memory pointer where the array will be created. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_nans(b2nd_context_t *ctx, b2nd_array_t **array); + + +/** + * Create an array, with @p fill_value being used as the default value for + * uninitialized portions of the array. + * + * @param ctx The b2nd context for the new array. + * @param array The memory pointer where the array will be created. + * @param fill_value Default value for uninitialized portions of the array. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_full(b2nd_context_t *ctx, b2nd_array_t **array, const void *fill_value); + +/** + * @brief Free an array. + * + * @param array The array. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_free(b2nd_array_t *array); + +/** + * @brief Create a b2nd array from a super-chunk. It can only be used if the array + * is backed by a blosc super-chunk. + * + * @param schunk The blosc super-chunk where the b2nd array is stored. + * @param array The memory pointer where the array will be created. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_from_schunk(blosc2_schunk *schunk, b2nd_array_t **array); + +/** + * Create a serialized super-chunk from a b2nd array. + * + * @param array The b2nd array to be serialized. + * @param cframe The pointer of the buffer where the in-memory array will be copied. + * @param cframe_len The length of the in-memory array buffer. + * @param needs_free Whether the buffer should be freed or not. + * + * @return An error code + */ +BLOSC_EXPORT int b2nd_to_cframe(const b2nd_array_t *array, uint8_t **cframe, + int64_t *cframe_len, bool *needs_free); + +/** + * @brief Create a b2nd array from a serialized super-chunk. + * + * @param cframe The buffer of the in-memory array. + * @param cframe_len The size (in bytes) of the in-memory array. + * @param copy Whether b2nd should make a copy of the cframe data or not. The copy will be made to an internal sparse frame. + * @param array The memory pointer where the array will be created. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_from_cframe(uint8_t *cframe, int64_t cframe_len, bool copy, b2nd_array_t **array); + +/** + * @brief Open a b2nd array from a file. + * + * @param urlpath The path of the b2nd array on disk. + * @param array The memory pointer where the array info will be stored. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_open(const char *urlpath, b2nd_array_t **array); + +/** + * @brief Open a b2nd array from a file using an offset. + * + * @param urlpath The path of the b2nd array on disk. + * @param array The memory pointer where the array info will be stored. + * @param offset The offset in the file where the b2nd array frame starts. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_open_offset(const char *urlpath, b2nd_array_t **array, int64_t offset); + +/** + * @brief Save b2nd array into a specific urlpath. + * + * @param array The array to be saved. + * @param urlpath The urlpath where the array will be stored. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_save(const b2nd_array_t *array, char *urlpath); + +/** + * @brief Append a b2nd array into a file. + * + * @param array The array to write. + * @param urlpath The path for persistent storage. + * + * @return If successful, return the offset where @p array has been appended in @p urlpath. + * Else, a negative value. + */ +BLOSC_EXPORT int64_t b2nd_save_append(const b2nd_array_t *array, const char *urlpath); + +/** + * @brief Create a b2nd array from a C buffer. + * + * @param ctx The b2nd context for the new array. + * @param array The memory pointer where the array will be created. + * @param buffer The buffer where source data is stored. + * @param buffersize The size (in bytes) of the buffer. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_from_cbuffer(b2nd_context_t *ctx, b2nd_array_t **array, const void *buffer, int64_t buffersize); + +/** + * @brief Extract the data from a b2nd array into a C buffer. + * + * @param array The b2nd array. + * @param buffer The buffer where the data will be stored. + * @param buffersize Size (in bytes) of the buffer. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_to_cbuffer(const b2nd_array_t *array, void *buffer, int64_t buffersize); + +/** + * @brief Get a slice from an array and store it into a new array. + * + * @param ctx The b2nd context for the new array. + * @param array The memory pointer where the array will be created. + * @param src The array from which the slice will be extracted + * @param start The coordinates where the slice will begin. + * @param stop The coordinates where the slice will end. + * + * @return An error code. + * + * @note The ndim and shape from ctx will be overwritten by the src and stop-start respectively. + * + */ +BLOSC_EXPORT int b2nd_get_slice(b2nd_context_t *ctx, b2nd_array_t **array, const b2nd_array_t *src, + const int64_t *start, const int64_t *stop); + +/** + * @brief Squeeze a b2nd array + * + * This function remove selected single-dimensional entries from the shape of a + b2nd array. + * + * @param array The b2nd array. + * @param view The memory pointer where the new view will be created. + * @param index Indexes of the single-dimensional entries to remove. + * + * @return An error code + */ +BLOSC_EXPORT int b2nd_squeeze_index(b2nd_array_t *array, b2nd_array_t **view, const bool *index); + +/** + * @brief Squeeze a b2nd array + * + * This function remove single-dimensional entries from the shape of a b2nd array. + * + * @param array The b2nd array. + * @param view The memory pointer where the new view will be created. + * + * @return An error code + */ +BLOSC_EXPORT int b2nd_squeeze(b2nd_array_t *array, b2nd_array_t **view); + +/** + * @brief Add a newaxis to a b2nd array at location @p axis. + * + * @param array The b2nd array to be expanded. + * @param axis The axes where the new dimensions will be added. + * @param view The memory pointer where the new view will be created. + * @param final_dims The final number of dimensions. Should be same as the number of elements in @p axis. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_expand_dims(const b2nd_array_t *array, b2nd_array_t **view, const bool *axis, + const uint8_t final_dims); + +/** + * @brief Get a slice from an array and store it into a C buffer. + * + * @param array The array from which the slice will be extracted. + * @param start The coordinates where the slice will begin. + * @param stop The coordinates where the slice will end. + * @param buffershape The shape of the buffer. + * @param buffer The buffer where the data will be stored. + * @param buffersize The size (in bytes) of the buffer. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_get_slice_cbuffer(const b2nd_array_t *array, const int64_t *start, const int64_t *stop, + void *buffer, const int64_t *buffershape, int64_t buffersize); + +/** + * @brief Set a slice in a b2nd array using a C buffer. + * + * @param buffer The buffer where the slice data is. + * @param buffershape The shape of the buffer. + * @param buffersize The size (in bytes) of the buffer. + * @param start The coordinates where the slice will begin. + * @param stop The coordinates where the slice will end. + * @param array The b2nd array where the slice will be set + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_set_slice_cbuffer(const void *buffer, const int64_t *buffershape, int64_t buffersize, + const int64_t *start, const int64_t *stop, b2nd_array_t *array); + +/** + * @brief Make a copy of the array data. The copy is done into a new b2nd array. + * + * @param ctx The b2nd context for the new array. + * @param src The array from which data is copied. + * @param array The memory pointer where the array will be created. + * + * @return An error code + * + * @note The ndim and shape in ctx will be overwritten by the src ctx. + * + */ +BLOSC_EXPORT int b2nd_copy(b2nd_context_t *ctx, const b2nd_array_t *src, b2nd_array_t **array); + +/** + * @brief Concatenate arrays. The result is stored in a new b2nd array, or an enlarged one. + * + * @param ctx The b2nd context for the new array. + * @param src1 The first array from which data is copied. + * @param src2 The second array from which data is copied. + * @param axis The axis along which the arrays will be concatenated. + * @param copy Whether the data should be copied or not. If false, the @p src1 array + * will be expanded as needed to keep the result. + * @param array The memory pointer where the array will be created. It will have the same + * metalayers of @p src1, except for the b2nd metalayer, which will be updated with the + * new shape. + * + * @ note The two arrays must have the same shape in all dimensions except the concatenation axis. + * Also, the typesize of the two arrays must be the same. + * + * @return An error code + * + * @note The ndim and shape in ctx will be overwritten by the src1 ctx. + * + */ +BLOSC_EXPORT int b2nd_concatenate(b2nd_context_t *ctx, const b2nd_array_t *src1, const b2nd_array_t *src2, + int8_t axis, bool copy, b2nd_array_t **array); + +/** + * @brief Print metalayer parameters. + * + * @param array The array where the metalayer is stored. + * + * @return An error code + */ +BLOSC_EXPORT int b2nd_print_meta(const b2nd_array_t *array); + +/** + * @brief Resize the shape of an array + * + * @param array The array to be resized. + * @param new_shape The new shape from the array. + * @param start The position in which the array will be extended or shrunk. + * + * @return An error code + */ +BLOSC_EXPORT int b2nd_resize(b2nd_array_t *array, const int64_t *new_shape, const int64_t *start); + + +/** + * @brief Insert given buffer in an array extending the given axis. + * + * @param array The array to insert the data in. + * @param buffer The buffer data to be inserted. + * @param buffersize The size (in bytes) of the buffer. + * @param axis The axis that will be extended. + * @param insert_start The position inside the axis to start inserting the data. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_insert(b2nd_array_t *array, const void *buffer, int64_t buffersize, + int8_t axis, int64_t insert_start); + +/** + * Append a buffer at the end of a b2nd array. + * + * @param array The array to append the data in. + * @param buffer The buffer data to be appended. + * @param buffersize Size (in bytes) of the buffer. + * @param axis The axis that will be extended to append the data. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_append(b2nd_array_t *array, const void *buffer, int64_t buffersize, + int8_t axis); + +/** + * @brief Delete shrinking the given axis delete_len items. + * + * @param array The array to shrink. + * @param axis The axis to shrink. + * @param delete_start The start position from the axis to start deleting chunks. + * @param delete_len The number of items to delete to the array->shape[axis]. + * The newshape[axis] will be the old array->shape[axis] - delete_len + * + * @return An error code. + * + * @note See also b2nd_resize + */ +BLOSC_EXPORT int b2nd_delete(b2nd_array_t *array, int8_t axis, + int64_t delete_start, int64_t delete_len); + + +// Indexing section + +/** + * @brief Get an element selection along each dimension of an array independently. + * + * @param array The array to get the data from. + * @param selection The elements along each dimension. + * @param selection_size The size of the selection along each dimension. + * @param buffer The buffer for getting the data. + * @param buffershape The shape of the buffer. + * @param buffersize The buffer size (in bytes). + * + * @return An error code. + * + * @note See also b2nd_set_orthogonal_selection. + */ +BLOSC_EXPORT int b2nd_get_orthogonal_selection(const b2nd_array_t *array, int64_t **selection, + int64_t *selection_size, void *buffer, + int64_t *buffershape, int64_t buffersize); + +/** + * @brief Set an element selection along each dimension of an array independently. + * + * @param array The array to set the data to. + * @param selection The elements along each dimension. + * @param selection_size The size of the selection along each dimension. + * @param buffer The buffer with the data for setting. + * @param buffershape The shape of the buffer. + * @param buffersize The buffer size (in bytes). + * + * @return An error code. + * + * @note See also b2nd_get_orthogonal_selection. + */ +BLOSC_EXPORT int b2nd_set_orthogonal_selection(b2nd_array_t *array, int64_t **selection, + int64_t *selection_size, const void *buffer, + int64_t *buffershape, int64_t buffersize); + + +/** + * @brief Create the metainfo for the b2nd metalayer. + * + * @param ndim The number of dimensions in the array. + * @param shape The shape of the array. + * @param chunkshape The shape of the chunks in the array. + * @param blockshape The shape of the blocks in the array. + * @param dtype A string representation of the data type of the array. + * @param dtype_format The format of the dtype representation. 0 means NumPy. + * @param smeta The msgpack buffer (output). + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_serialize_meta(int8_t ndim, const int64_t *shape, const int32_t *chunkshape, + const int32_t *blockshape, const char *dtype, + int8_t dtype_format, uint8_t **smeta); + +/** + * @brief Read the metainfo in the b2nd metalayer. + * + * @param smeta The msgpack buffer (input). + * @param smeta_len The length of the smeta buffer (input). + * @param ndim The number of dimensions in the array (output). + * @param shape The shape of the array (output). + * @param chunkshape The shape of the chunks in the array (output). + * @param blockshape The shape of the blocks in the array (output). + * @param dtype A string representation of the data type of the array (output). + * @param dtype_format The format of the dtype representation (output). 0 means NumPy (the default). + * + * @note This function is inlined and available even when not linking with libblosc2. + * + * @return An error code. + */ +BLOSC_EXPORT int b2nd_deserialize_meta(const uint8_t *smeta, int32_t smeta_len, int8_t *ndim, int64_t *shape, + int32_t *chunkshape, int32_t *blockshape, char **dtype, int8_t *dtype_format); + +// Utilities for C buffers representing multidimensional arrays + +/** + * @brief Copy a slice of a source array into another array. The arrays have + * the same number of dimensions (though their shapes may differ), the same + * item size, and they are stored as C buffers with contiguous data (any + * padding is considered part of the array). + * + * @param ndim The number of dimensions in both arrays. + * @param itemsize The size of the individual data item in both arrays. + * @param src The buffer for getting the data from the source array. + * @param src_pad_shape The shape of the source array, including padding. + * @param src_start The source coordinates where the slice will begin. + * @param src_stop The source coordinates where the slice will end. + * @param dst The buffer for setting the data into the destination array. + * @param dst_pad_shape The shape of the destination array, including padding. + * @param dst_start The destination coordinates where the slice will be placed. + * + * @return An error code. + * + * @note This is kept for backward compatibility with existing code out there. New code should use + * b2nd_copy_buffer2 instead. + * + * @note Please make sure that slice boundaries fit within the source and + * destination arrays before using this function, as it does not perform these + * checks itself. + */ +B2ND_DEPRECATED("Use b2nd_copy_buffer2 instead.") +BLOSC_EXPORT int b2nd_copy_buffer(int8_t ndim, + uint8_t itemsize, + const void *src, const int64_t *src_pad_shape, + const int64_t *src_start, const int64_t *src_stop, + void *dst, const int64_t *dst_pad_shape, + const int64_t *dst_start); + +/** + * @brief Copy a slice of a source array into another array. The arrays have + * the same number of dimensions (though their shapes may differ), the same + * item size, and they are stored as C buffers with contiguous data (any + * padding is considered part of the array). + * + * @param ndim The number of dimensions in both arrays. + * @param itemsize The size of the individual data item in both arrays. + * @param src The buffer for getting the data from the source array. + * @param src_pad_shape The shape of the source array, including padding. + * @param src_start The source coordinates where the slice will begin. + * @param src_stop The source coordinates where the slice will end. + * @param dst The buffer for setting the data into the destination array. + * @param dst_pad_shape The shape of the destination array, including padding. + * @param dst_start The destination coordinates where the slice will be placed. + * + * @return An error code. + * + * @note This is a version of (now deprecated) b2nd_copy_buffer() that uses + * signed 32-bit integers for copying data. This is useful when data is stored + * in a buffer that uses itemsizes that are larger than 255 bytes. + * + * @note Please make sure that slice boundaries fit within the source and + * destination arrays before using this function, as it does not perform these + * checks itself. + */ +BLOSC_EXPORT int b2nd_copy_buffer2(int8_t ndim, + int32_t itemsize, + const void *src, const int64_t *src_pad_shape, + const int64_t *src_start, const int64_t *src_stop, + void *dst, const int64_t *dst_pad_shape, + const int64_t *dst_start); + + +#ifdef __cplusplus +} +#endif + +#endif /* BLOSC_B2ND_H */ diff --git a/venv/Lib/site-packages/blosc2/include/blosc2.h b/venv/Lib/site-packages/blosc2/include/blosc2.h new file mode 100644 index 0000000..3a97c31 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/include/blosc2.h @@ -0,0 +1,2645 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Copyright (c) 2021 Blosc Development Team + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/********************************************************************* + @file blosc2.h + @brief Blosc2 header file. + + This file contains Blosc2 public API and the structures needed to use it. + @author Blosc Development Team +**********************************************************************/ + +#ifndef BLOSC_BLOSC2_H +#define BLOSC_BLOSC2_H + +#include "blosc2/blosc2-export.h" +#include "blosc2/blosc2-common.h" +#include "blosc2/blosc2-stdio.h" + +#if defined(_WIN32) && !defined(__MINGW32__) +#include +#include +#include +#define getpid _getpid +#endif + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// For compatibility with the Blosc 1.x series +#ifdef BLOSC1_COMPAT + // Blosc2 symbols that should be accessible from Blosc 1.x API + #define BLOSC_VERSION_MAJOR BLOSC2_VERSION_MAJOR + #define BLOSC_VERSION_MINOR BLOSC2_VERSION_MINOR + #define BLOSC_VERSION_RELEASE BLOSC2_VERSION_RELEASE + #define BLOSC_VERSION_STRING BLOSC2_VERSION_STRING + #define BLOSC_VERSION_DATE BLOSC2_VERSION_DATE + #define BLOSC_MAX_OVERHEAD BLOSC2_MAX_OVERHEAD + #define BLOSC_MAX_BUFFERSIZE BLOSC2_MAX_BUFFERSIZE + + // API that changed to blosc1_ prefix + #define blosc_compress blosc1_compress + #define blosc_decompress blosc1_decompress + #define blosc_getitem blosc1_getitem + #define blosc_get_compressor blosc1_get_compressor + #define blosc_set_compressor blosc1_set_compressor + #define blosc_cbuffer_sizes blosc1_cbuffer_sizes + #define blosc_cbuffer_validate blosc1_cbuffer_validate + #define blosc_cbuffer_metainfo blosc1_cbuffer_metainfo + #define blosc_get_blocksize blosc1_get_blocksize + #define blosc_set_blocksize blosc1_set_blocksize + #define blosc_set_splitmode blosc1_set_splitmode + + // API that changed to blosc2_ prefix + #define blosc_init blosc2_init + #define blosc_destroy blosc2_destroy + #define blosc_free_resources blosc2_free_resources + #define blosc_get_nthreads blosc2_get_nthreads + #define blosc_set_nthreads blosc2_set_nthreads + #define blosc_compcode_to_compname blosc2_compcode_to_compname + #define blosc_compname_to_compcode blosc2_compname_to_compcode + #define blosc_list_compressors blosc2_list_compressors + #define blosc_get_version_string blosc2_get_version_string + #define blosc_get_complib_info blosc2_get_complib_info + #define blosc_cbuffer_versions blosc2_cbuffer_versions + #define blosc_cbuffer_complib blosc2_cbuffer_complib +#endif + + +/* Version numbers */ +#define BLOSC2_VERSION_MAJOR 2 /* for major interface/format changes */ +#define BLOSC2_VERSION_MINOR 23 /* for minor interface/format changes */ +#define BLOSC2_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */ + +#define BLOSC2_VERSION_STRING "2.23.1" /* string version. Sync with above! */ +#define BLOSC2_VERSION_DATE "$Date:: 2026-03-03 #$" /* date version year-month-day */ + + +/* The maximum number of dimensions for Blosc2 NDim arrays */ +#define BLOSC2_MAX_DIM 8 + + +/* Tracing macros */ +#define BLOSC_TRACE_ERROR(msg, ...) BLOSC_TRACE(error, msg, ##__VA_ARGS__) +#define BLOSC_TRACE_WARNING(msg, ...) BLOSC_TRACE(warning, msg, ##__VA_ARGS__) +#define BLOSC_TRACE_INFO(msg, ...) BLOSC_TRACE(info, msg, ##__VA_ARGS__) +#define BLOSC_TRACE(cat, msg, ...) \ + do { \ + const char *__e = getenv("BLOSC_TRACE"); \ + if (!__e) { break; } \ + fprintf(stderr, "[%s] - " msg " (%s:%d)\n", #cat, ##__VA_ARGS__, __FILE__, __LINE__); \ + } while(0) + +#define BLOSC_ERROR_NULL(pointer, rc) \ + do { \ + if ((pointer) == NULL) { \ + BLOSC_TRACE_ERROR("Pointer is null"); \ + return (rc); \ + } \ + } while (0) +#define BLOSC_ERROR(rc) \ + do { \ + int rc_ = (rc); \ + if (rc_ < BLOSC2_ERROR_SUCCESS) { \ + char *error_msg = print_error(rc_); \ + BLOSC_TRACE_ERROR("%s", error_msg); \ + return rc_; \ + } \ + } while (0) + +#define BLOSC_INFO(msg, ...) \ + do { \ + const char *__e = getenv("BLOSC_INFO"); \ + if (!__e) { break; } \ + fprintf(stderr, "[INFO] - " msg "\n", ##__VA_ARGS__); \ + } while(0) + + +/* The VERSION_FORMAT symbols below should be just 1-byte long */ +enum { + /* Blosc format version, starting at 1 + 1 -> Blosc pre-1.0 + 2 -> Blosc 1.x stable series + 3 -> Blosc 2-alpha.x series + 4 -> Blosc 2.x beta.1 series + 5 -> Blosc 2.x stable series + */ + BLOSC1_VERSION_FORMAT_PRE1 = 1, + BLOSC1_VERSION_FORMAT = 2, + BLOSC2_VERSION_FORMAT_ALPHA = 3, + BLOSC2_VERSION_FORMAT_BETA1 = 4, + BLOSC2_VERSION_FORMAT_STABLE = 5, + BLOSC2_VERSION_FORMAT = BLOSC2_VERSION_FORMAT_STABLE, +}; + + +/* The FRAME_FORMAT_VERSION symbols below should be just 4-bit long */ +enum { + /* Blosc format version + * 1 -> First version (introduced in beta.2) + * 2 -> Second version (introduced in rc.1) + * + */ + BLOSC2_VERSION_FRAME_FORMAT_BETA2 = 1, // for 2.0.0-beta2 and after + BLOSC2_VERSION_FRAME_FORMAT_RC1 = 2, // for 2.0.0-rc1 and after + BLOSC2_VERSION_FRAME_FORMAT = BLOSC2_VERSION_FRAME_FORMAT_RC1, +}; + + +//!< Struct for storing data from instrumentation of codecs +// This can be flexible because it is typically used mainly for development +typedef struct { + float cratio; + float cspeed; + float filter_speed; + //float memory; + //float power; + uint8_t flags[4]; +} blosc2_instr; + + +enum { +#ifndef BLOSC_H + BLOSC_MIN_HEADER_LENGTH = 16, + //!< Minimum header length (Blosc1) +#endif // BLOSC_H + BLOSC_EXTENDED_HEADER_LENGTH = 32, + //!< Extended header length (Blosc2, see README_HEADER) + BLOSC2_MAX_OVERHEAD = BLOSC_EXTENDED_HEADER_LENGTH, + //!< The maximum overhead during compression in bytes. This equals + //!< to @ref BLOSC_EXTENDED_HEADER_LENGTH now, but can be higher in future + //!< implementations. + BLOSC2_MAX_BUFFERSIZE = (INT_MAX - BLOSC2_MAX_OVERHEAD), + //!< Maximum source buffer size to be compressed +#ifndef BLOSC_H + BLOSC_MAX_TYPESIZE = UINT8_MAX, + //!< Maximum typesize before considering source buffer as a stream of bytes. + //!< Cannot be larger than 255. +#endif // BLOSC_H + BLOSC_MIN_BUFFERSIZE = 32, + //!< Minimum buffer size to be compressed. +}; + +enum { + BLOSC2_DEFINED_TUNER_START = 0, + BLOSC2_DEFINED_TUNER_STOP = 31, + //!< Blosc-defined tuners must be between 0 - 31. + BLOSC2_GLOBAL_REGISTERED_TUNER_START = 32, + BLOSC2_GLOBAL_REGISTERED_TUNER_STOP = 159, + //!< Blosc-registered tuners must be between 31 - 159. + BLOSC2_GLOBAL_REGISTERED_TUNERS = 0, + //!< Number of Blosc-registered tuners at the moment. + BLOSC2_USER_REGISTERED_TUNER_START = 160, + BLOSC2_USER_REGISTERED_TUNER_STOP = 255, + //!< User-defined tuners must be between 160 - 255. +}; + +/** + * @brief Codes for the different tuners shipped with Blosc + */ +enum { + BLOSC_STUNE = 0, + BLOSC_LAST_TUNER = 1, + //!< Determine the last tuner defined by Blosc. + BLOSC_LAST_REGISTERED_TUNE = BLOSC2_GLOBAL_REGISTERED_TUNER_START + BLOSC2_GLOBAL_REGISTERED_TUNERS - 1, + //!< Determine the last registered tuner. It is used to check if a tuner is registered or not. +}; + +enum { + BLOSC2_DEFINED_FILTERS_START = 0, + BLOSC2_DEFINED_FILTERS_STOP = 31, + //!< Blosc-defined filters must be between 0 - 31. + BLOSC2_GLOBAL_REGISTERED_FILTERS_START = 32, + BLOSC2_GLOBAL_REGISTERED_FILTERS_STOP = 159, + //!< Blosc-registered filters must be between 32 - 159. + BLOSC2_GLOBAL_REGISTERED_FILTERS = 5, + //!< Number of Blosc-registered filters at the moment. + BLOSC2_USER_REGISTERED_FILTERS_START = 160, + BLOSC2_USER_REGISTERED_FILTERS_STOP = 255, + //!< User-defined filters must be between 128 - 255. + BLOSC2_MAX_FILTERS = 6, + //!< Maximum number of filters in the filter pipeline + BLOSC2_MAX_UDFILTERS = 16, + //!< Maximum number of filters that a user can register. +}; + + +/** + * @brief Codes for filters. + * + * @sa #blosc1_compress + */ +enum { +#ifndef BLOSC_H + BLOSC_NOSHUFFLE = 0, + //!< No shuffle (for compatibility with Blosc1). + BLOSC_NOFILTER = 0, + //!< No filter. + BLOSC_SHUFFLE = 1, + //!< Byte-wise shuffle. `filters_meta` is the number of bytestreams to shuffle from the input. If 0, defaults to typesize. + BLOSC_BITSHUFFLE = 2, + //!< Bit-wise shuffle. `filters_meta` does not have any effect. +#endif // BLOSC_H + BLOSC_DELTA = 3, + //!< Delta filter (bitwise XOR relative to reference). `filters_meta` does not have any effect. + BLOSC_TRUNC_PREC = 4, + //!< Truncate mantissa precision. + //!< Positive values in `filters_meta` will keep bits; negative values will zero bits. + BLOSC_LAST_FILTER = 5, + //!< sentinel + BLOSC_LAST_REGISTERED_FILTER = BLOSC2_GLOBAL_REGISTERED_FILTERS_START + BLOSC2_GLOBAL_REGISTERED_FILTERS - 1, + //!< Determine the last registered filter. It is used to check if a filter is registered or not. +}; + +/** + * @brief Codes for internal flags (see blosc1_cbuffer_metainfo) + */ +enum { +#ifndef BLOSC_H + BLOSC_DOSHUFFLE = 0x1, //!< byte-wise shuffle + BLOSC_MEMCPYED = 0x2, //!< plain copy + BLOSC_DOBITSHUFFLE = 0x4, //!< bit-wise shuffle +#endif // BLOSC_H + BLOSC_DODELTA = 0x8, //!< delta coding +}; + +/** + * @brief Codes for new internal flags in Blosc2 + */ +enum { + BLOSC2_USEDICT = 0x1, //!< use dictionaries with codec + BLOSC2_BIGENDIAN = 0x2, //!< data is in big-endian ordering + BLOSC2_INSTR_CODEC = 0x80, //!< codec is instrumented (mainly for development) +}; + +/** + * @brief Values for different Blosc2 capabilities + */ +enum { + BLOSC2_MAXDICTSIZE = 128 * 1024, //!< maximum size for compression dicts + BLOSC2_MAXBLOCKSIZE = 536866816, //!< maximum size for blocks + BLOSC2_MAXTYPESIZE = BLOSC2_MAXBLOCKSIZE, //!< maximum size for types +}; + + +enum { + BLOSC2_DEFINED_CODECS_START = 0, + BLOSC2_DEFINED_CODECS_STOP = 31, + //!< Blosc-defined codecs must be between 0 - 31. + BLOSC2_GLOBAL_REGISTERED_CODECS_START = 32, + BLOSC2_GLOBAL_REGISTERED_CODECS_STOP = 159, + //!< Blosc-registered codecs must be between 31 - 159. + BLOSC2_GLOBAL_REGISTERED_CODECS = 5, + //!< Number of Blosc-registered codecs at the moment. + BLOSC2_USER_REGISTERED_CODECS_START = 160, + BLOSC2_USER_REGISTERED_CODECS_STOP = 255, + //!< User-defined codecs must be between 160 - 255. +}; + +/** + * @brief Codes for the different compressors shipped with Blosc + */ +enum { +#ifndef BLOSC_H + BLOSC_BLOSCLZ = 0, + BLOSC_LZ4 = 1, + BLOSC_LZ4HC = 2, + BLOSC_ZLIB = 4, + BLOSC_ZSTD = 5, +#endif // BLOSC_H + BLOSC_LAST_CODEC = 6, + //!< Determine the last codec defined by Blosc. + BLOSC_LAST_REGISTERED_CODEC = BLOSC2_GLOBAL_REGISTERED_CODECS_START + BLOSC2_GLOBAL_REGISTERED_CODECS - 1, + //!< Determine the last registered codec. It is used to check if a codec is registered or not. +}; + + +// Names for the different compressors shipped with Blosc + +#ifndef BLOSC_H +#define BLOSC_BLOSCLZ_COMPNAME "blosclz" +#define BLOSC_LZ4_COMPNAME "lz4" +#define BLOSC_LZ4HC_COMPNAME "lz4hc" +#define BLOSC_ZLIB_COMPNAME "zlib" +#define BLOSC_ZSTD_COMPNAME "zstd" +#endif // BLOSC_H + +/** + * @brief Codes for compression libraries shipped with Blosc (code must be < 8) + */ +enum { +#ifndef BLOSC_H + BLOSC_BLOSCLZ_LIB = 0, + BLOSC_LZ4_LIB = 1, + BLOSC_ZLIB_LIB = 3, + BLOSC_ZSTD_LIB = 4, +#endif // BLOSC_H + BLOSC_UDCODEC_LIB = 6, + BLOSC_SCHUNK_LIB = 7, //!< compressor library in super-chunk header +}; + +/** + * @brief Names for the different compression libraries shipped with Blosc + */ +#ifndef BLOSC_H +#define BLOSC_BLOSCLZ_LIBNAME "BloscLZ" +#define BLOSC_LZ4_LIBNAME "LZ4" +#define BLOSC_ZLIB_LIBNAME "Zlib" +#define BLOSC_ZSTD_LIBNAME "Zstd" +#endif // BLOSC_H + +/** + * @brief The codes for compressor formats shipped with Blosc + */ +enum { +#ifndef BLOSC_H + BLOSC_BLOSCLZ_FORMAT = BLOSC_BLOSCLZ_LIB, + BLOSC_LZ4_FORMAT = BLOSC_LZ4_LIB, + //!< LZ4HC and LZ4 share the same format + BLOSC_LZ4HC_FORMAT = BLOSC_LZ4_LIB, + BLOSC_ZLIB_FORMAT = BLOSC_ZLIB_LIB, + BLOSC_ZSTD_FORMAT = BLOSC_ZSTD_LIB, +#endif // BLOSC_H + BLOSC_UDCODEC_FORMAT = BLOSC_UDCODEC_LIB, +}; + +/** + * @brief The version formats for compressors shipped with Blosc. + * All versions here starts at 1 + */ +enum { +#ifndef BLOSC_H + BLOSC_BLOSCLZ_VERSION_FORMAT = 1, + BLOSC_LZ4_VERSION_FORMAT = 1, + BLOSC_LZ4HC_VERSION_FORMAT = 1, /* LZ4HC and LZ4 share the same format */ + BLOSC_ZLIB_VERSION_FORMAT = 1, + BLOSC_ZSTD_VERSION_FORMAT = 1, +#endif // BLOSC_H + BLOSC_UDCODEC_VERSION_FORMAT = 1, +}; + +/** + * @brief Split mode for blocks. + * NEVER and ALWAYS are for experimenting with compression ratio. + * AUTO for nearly optimal behaviour (based on heuristics). + * FORWARD_COMPAT provides best forward compatibility (default). + */ +#ifndef BLOSC_H +enum { + BLOSC_ALWAYS_SPLIT = 1, + BLOSC_NEVER_SPLIT = 2, + BLOSC_AUTO_SPLIT = 3, + BLOSC_FORWARD_COMPAT_SPLIT = 4, +}; +#endif // BLOSC_H + +/** + * @brief Offsets for fields in Blosc2 chunk header. + */ +enum { + BLOSC2_CHUNK_VERSION = 0x0, //!< the version for the chunk format + BLOSC2_CHUNK_VERSIONLZ = 0x1, //!< the version for the format of internal codec + BLOSC2_CHUNK_FLAGS = 0x2, //!< flags and codec info + BLOSC2_CHUNK_TYPESIZE = 0x3, //!< (uint8) the number of bytes of the atomic type + BLOSC2_CHUNK_NBYTES = 0x4, //!< (int32) uncompressed size of the buffer (this header is not included) + BLOSC2_CHUNK_BLOCKSIZE = 0x8, //!< (int32) size of internal blocks + BLOSC2_CHUNK_CBYTES = 0xc, //!< (int32) compressed size of the buffer (including this header) + BLOSC2_CHUNK_FILTER_CODES = 0x10, //!< the codecs for the filter pipeline (1 byte per code) + BLOSC2_CHUNK_FILTER_META = 0x18, //!< meta info for the filter pipeline (1 byte per code) + BLOSC2_CHUNK_BLOSC2_FLAGS = 0x1F, //!< flags specific for Blosc2 functionality +}; + +/** + * @brief Run lengths for special values for chunks/frames + */ +enum { + BLOSC2_NO_SPECIAL = 0x0, //!< no special value + BLOSC2_SPECIAL_ZERO = 0x1, //!< zero special value + BLOSC2_SPECIAL_NAN = 0x2, //!< NaN special value + BLOSC2_SPECIAL_VALUE = 0x3, //!< repeated special value + BLOSC2_SPECIAL_UNINIT = 0x4, //!< non initialized values + BLOSC2_SPECIAL_LASTID = 0x4, //!< last valid ID for special value (update this adequately) + BLOSC2_SPECIAL_MASK = 0x7 //!< special value mask (prev IDs cannot be larger than this) +}; + +/** + * @brief Error codes + * Each time an error code is added here, its corresponding message error should be added in + * print_error() + */ +enum { + BLOSC2_ERROR_SUCCESS = 0, //=0). In case the compressor + * is not recognized, or there is not support for it in this build, + * it returns a -1. + */ +BLOSC_EXPORT int blosc1_set_compressor(const char* compname); + + +/** + * @brief Select the delta coding filter to be used. + * + * @param dodelta A value >0 will activate the delta filter. + * If 0, it will be de-activated + * + * This call should always succeed. + */ +BLOSC_EXPORT void blosc2_set_delta(int dodelta); + + +/** + * @brief Get the compressor name associated with the compressor code. + * + * @param compcode The code identifying the compressor + * @param compname The pointer to a string where the compressor name will be put. + * + * @return The compressor code. If the compressor code is not recognized, + * or there is not support for it in this build, -1 is returned. + */ +BLOSC_EXPORT int blosc2_compcode_to_compname(int compcode, const char** compname); + + +/** + * @brief Get the compressor code associated with the compressor name. + * + * @param compname The string containing the compressor name. + * + * @return The compressor code. If the compressor name is not recognized, + * or there is not support for it in this build, -1 is returned instead. + */ +BLOSC_EXPORT int blosc2_compname_to_compcode(const char* compname); + + +/** + * @brief Get a list of compressors supported in the current build. + * + * @return The comma separated string with the list of compressor names + * supported. + * + * This function does not leak, so you should not free() the returned + * list. + * + * This function should always succeed. + */ +BLOSC_EXPORT const char* blosc2_list_compressors(void); + + +/** + * @brief Get the version of Blosc in string format. + * + * @return The string with the current Blosc version. + * Useful for dynamic libraries. + */ +BLOSC_EXPORT const char* blosc2_get_version_string(void); + + +/** + * @brief Get info from compression libraries included in the current build. + * + * @param compname The compressor name that you want info from. + * @param complib The pointer to a string where the + * compression library name, if available, will be put. + * @param version The pointer to a string where the + * compression library version, if available, will be put. + * + * @warning You are in charge of the @p complib and @p version strings, + * you should free() them so as to avoid leaks. + * + * @return The code for the compression library (>=0). If it is not supported, + * this function returns -1. + */ +BLOSC_EXPORT int blosc2_get_complib_info(const char* compname, char** complib, + char** version); + + +/** + * @brief Free possible memory temporaries and thread resources. Use this + * when you are not going to use Blosc for a long while. + * + * @return A 0 if succeeds, in case of problems releasing the resources, + * it returns a negative number. + */ +BLOSC_EXPORT int blosc2_free_resources(void); + + +/** + * @brief Get information about a compressed buffer, namely the number of + * uncompressed bytes (@p nbytes) and compressed (@p cbytes). It also + * returns the @p blocksize (which is used internally for doing the + * compression by blocks). + * + * @remark Equivalent to function #blosc2_cbuffer_sizes. + * + * @param cbuffer The buffer of compressed data. + * @param nbytes The pointer where the number of uncompressed bytes will be put. + * @param cbytes The pointer where the number of compressed bytes will be put. + * @param blocksize The pointer where the block size will be put. + * + * You only need to pass the first BLOSC_MIN_HEADER_LENGTH bytes of a + * compressed buffer for this call to work. + * + * This function should always succeed. + */ +BLOSC_EXPORT void blosc1_cbuffer_sizes(const void* cbuffer, size_t* nbytes, + size_t* cbytes, size_t* blocksize); +/** + * @brief Get information about a compressed buffer, namely the number of + * uncompressed bytes (@p nbytes) and compressed (@p cbytes). It also + * returns the @p blocksize (which is used internally for doing the + * compression by blocks). + * + * @param cbuffer The buffer of compressed data. + * @param nbytes The pointer where the number of uncompressed bytes will be put. + * @param cbytes The pointer where the number of compressed bytes will be put. + * @param blocksize The pointer where the block size will be put. + * + * @note: if any of the nbytes, cbytes or blocksize is NULL, it will not be returned. + * + * You only need to pass the first BLOSC_MIN_HEADER_LENGTH bytes of a + * compressed buffer for this call to work. + * + * @return On failure, returns negative value. + */ +BLOSC_EXPORT int blosc2_cbuffer_sizes(const void* cbuffer, int32_t* nbytes, + int32_t* cbytes, int32_t* blocksize); + +/** + * @brief Checks that the compressed buffer starting at @p cbuffer of length @p cbytes + * may contain valid blosc compressed data, and that it is safe to call + * blosc1_decompress/blosc1_getitem. + * On success, returns 0 and sets @p nbytes to the size of the uncompressed data. + * This does not guarantee that the decompression function won't return an error, + * but does guarantee that it is safe to attempt decompression. + * + * @param cbuffer The buffer of compressed data. + * @param cbytes The number of compressed bytes. + * @param nbytes The pointer where the number of uncompressed bytes will be put. + * + * @return On failure, returns negative value. + */ +BLOSC_EXPORT int blosc1_cbuffer_validate(const void* cbuffer, size_t cbytes, + size_t* nbytes); + +/** + * @brief Get information about a compressed buffer, namely the type size + * (@p typesize), as well as some internal @p flags. + * + * @param cbuffer The buffer of compressed data. + * @param typesize The pointer where the type size will be put. + * @param flags The pointer of the integer where the additional info is encoded. + * The @p flags is a set of bits, where the currently used ones are: + * * bit 0: whether the shuffle filter has been applied or not + * * bit 1: whether the internal buffer is a pure memcpy or not + * * bit 2: whether the bitshuffle filter has been applied or not + * * bit 3: whether the delta coding filter has been applied or not + * + * You can use the @p BLOSC_DOSHUFFLE, @p BLOSC_DOBITSHUFFLE, @p BLOSC_DODELTA + * and @p BLOSC_MEMCPYED symbols for extracting the interesting bits + * (e.g. @p flags & @p BLOSC_DOSHUFFLE says whether the buffer is byte-shuffled + * or not). + * + * This function should always succeed. + */ +BLOSC_EXPORT void blosc1_cbuffer_metainfo(const void* cbuffer, size_t* typesize, + int* flags); + + +/** + * @brief Get information about a compressed buffer, namely the internal + * Blosc format version (@p version) and the format for the internal + * Lempel-Ziv compressor used (@p versionlz). + * + * @param cbuffer The buffer of compressed data. + * @param version The pointer where the Blosc format version will be put. + * @param versionlz The pointer where the Lempel-Ziv version will be put. + * + * This function should always succeed. + */ +BLOSC_EXPORT void blosc2_cbuffer_versions(const void* cbuffer, int* version, + int* versionlz); + + +/** + * @brief Get the compressor library/format used in a compressed buffer. + * + * @param cbuffer The buffer of compressed data. + * + * @return The string identifying the compressor library/format used. + * + * This function should always succeed. + */ +BLOSC_EXPORT const char* blosc2_cbuffer_complib(const void* cbuffer); + +/********************************************************************* + Structures and functions related with user-defined input/output. +*********************************************************************/ + +enum { + BLOSC2_IO_FILESYSTEM = 0, + BLOSC2_IO_FILESYSTEM_MMAP = 1, + BLOSC_IO_LAST_BLOSC_DEFINED = 2, // sentinel + BLOSC_IO_LAST_REGISTERED = 32, // sentinel +}; + +enum { + BLOSC2_IO_BLOSC_DEFINED = 32, + BLOSC2_IO_REGISTERED = 160, + BLOSC2_IO_USER_DEFINED = 256 +}; + +typedef void* (*blosc2_open_cb)(const char *urlpath, const char *mode, void *params); +typedef int (*blosc2_close_cb)(void *stream); +typedef int64_t (*blosc2_size_cb)(void *stream); +typedef int64_t (*blosc2_write_cb)(const void *ptr, int64_t size, int64_t nitems, int64_t position, void *stream); +typedef int64_t (*blosc2_read_cb)(void **ptr, int64_t size, int64_t nitems, int64_t position, void *stream); +typedef int (*blosc2_truncate_cb)(void *stream, int64_t size); +typedef int (*blosc2_destroy_cb)(void *params); + + +/* + * Input/Output callbacks. + */ +typedef struct { + uint8_t id; + //!< The IO identifier. + char* name; + //!< The IO name. + bool is_allocation_necessary; + //!< If true, the caller needs to allocate data for the read function (ptr argument). If false, the read function + //!< takes care of memory allocation and stores the address in the allocated_ptr argument. + blosc2_open_cb open; + //!< The IO open callback. + blosc2_close_cb close; + //!< The IO close callback. + blosc2_size_cb size; + //!< The IO size callback. + blosc2_write_cb write; + //!< The IO write callback. + blosc2_read_cb read; + //!< The IO read callback. + blosc2_truncate_cb truncate; + //!< The IO truncate callback. + blosc2_destroy_cb destroy; + //!< The IO destroy callback (called in the end when finished with the schunk). +} blosc2_io_cb; + + +/* + * Input/Output parameters. + */ +typedef struct { + uint8_t id; + const char *name; + //!< The IO identifier. + void *params; + //!< The IO parameters. +} blosc2_io; + +static const blosc2_io BLOSC2_IO_DEFAULTS = { + /* .id = */ BLOSC2_IO_FILESYSTEM, + /* .name = */ "filesystem", + /* .params = */ NULL, +}; + + +/** + * @brief Register a user-defined input/output callbacks in Blosc. + * + * @param io The callbacks API to register. + * + * @return 0 if succeeds. Else a negative code is returned. + */ +BLOSC_EXPORT int blosc2_register_io_cb(const blosc2_io_cb *io); + +/** + * @brief Get a user-defined input/output callback in Blosc. + * + * @param id The id of the callback to get. + * + * @return A pointer containing the desired callback if success. Else a NULL pointer is returned. + */ +BLOSC_EXPORT blosc2_io_cb *blosc2_get_io_cb(uint8_t id); + +/********************************************************************* + Structures and functions related with contexts. +*********************************************************************/ + +typedef struct blosc2_context_s blosc2_context; /* opaque type */ + +typedef struct { + int (*init)(void * config, blosc2_context* cctx, blosc2_context* dctx); + //!< Initialize tuner. Keep in mind dctx may be NULL. This should memcpy the cctx->tuner_params. + int (*next_blocksize)(blosc2_context * context); + //!< Only compute the next blocksize. Only it is executed if tuner is not initialized. + int (*next_cparams)(blosc2_context * context); + //!< Compute the next cparams. Only is executed if tuner is initialized. + int (*update)(blosc2_context * context, double ctime); + //!< Update the tuner parameters. + int (*free)(blosc2_context * context); + //!< Free the tuner. + int id; + //!< The tuner id + char *name; + //!< The tuner name +} blosc2_tuner; + + +/** + * @brief Register locally a user-defined tuner in Blosc. + * + * @param tuner The tuner to register. + * + * @return 0 if succeeds. Else a negative code is returned. + */ +BLOSC_EXPORT int blosc2_register_tuner(blosc2_tuner *tuner); + + +/** + * @brief The parameters for a prefilter function. + * + */ +typedef struct { + void *user_data; // user-provided info (optional) + const uint8_t *input; // the input buffer + uint8_t *output; // the output buffer + int32_t output_size; // the output size (in bytes) + int32_t output_typesize; // the output typesize + int32_t output_offset; // offset to reach the start of the output buffer + int64_t nchunk; // the current nchunk in associated schunk (if exists; if not -1) + int32_t nblock; // the current nblock in associated chunk + int32_t tid; // thread id + uint8_t *ttmp; // a temporary that is able to hold several blocks for the output and is private for each thread + size_t ttmp_nbytes; // the size of the temporary in bytes + blosc2_context *ctx; // the compression context + bool output_is_disposable; // whether the output buffer is disposable +} blosc2_prefilter_params; + +/** + * @brief The parameters for a postfilter function. + * + */ +typedef struct { + void *user_data; // user-provided info (optional) + const uint8_t *input; // the input buffer + uint8_t *output; // the output buffer + int32_t size; // the input size (in bytes) + int32_t typesize; // the input typesize + int32_t offset; // offset to reach the start of the input buffer + int64_t nchunk; // the current nchunk in associated schunk (if exists; if not -1) + int32_t nblock; // the current nblock in associated chunk + int32_t tid; // thread id + uint8_t *ttmp; // a temporary that is able to hold several blocks for the output and is private for each thread + size_t ttmp_nbytes; // the size of the temporary in bytes + blosc2_context *ctx; // the decompression context +} blosc2_postfilter_params; + +/** + * @brief The type of the prefilter function. + * + * If the function call is successful, the return value should be 0; else, a negative value. + */ +typedef int (*blosc2_prefilter_fn)(blosc2_prefilter_params* params); + +/** + * @brief The type of the postfilter function. + * + * If the function call is successful, the return value should be 0; else, a negative value. + */ +typedef int (*blosc2_postfilter_fn)(blosc2_postfilter_params* params); + +/** + * @brief The parameters for creating a context for compression purposes. + * + * In parenthesis it is shown the default value used internally when a 0 + * (zero) in the fields of the struct is passed to a function. + */ +typedef struct { + uint8_t compcode; + //!< The compressor codec. + uint8_t compcode_meta; + //!< The metadata for the compressor codec. + uint8_t clevel; + //!< The compression level (5). + int use_dict; + //!< Use dicts or not when compressing (only for ZSTD). + int32_t typesize; + //!< The type size (8). + int16_t nthreads; + //!< The number of threads to use internally (1). + int32_t blocksize; + //!< The requested size of the compressed blocks (0 means automatic). + int32_t splitmode; + //!< Whether the blocks should be split or not. + void* schunk; + //!< The associated schunk, if any (NULL). + uint8_t filters[BLOSC2_MAX_FILTERS]; + //!< The (sequence of) filters. + uint8_t filters_meta[BLOSC2_MAX_FILTERS]; + //!< The metadata for filters. + blosc2_prefilter_fn prefilter; + //!< The prefilter function. + blosc2_prefilter_params *preparams; + //!< The prefilter parameters. + void *tuner_params; + //!< Tune configuration. + int tuner_id; + //!< The tuner id. + bool instr_codec; + //!< Whether the codec is instrumented or not + void *codec_params; + //!< User defined parameters for the codec + void *filter_params[BLOSC2_MAX_FILTERS]; + //!< User defined parameters for the filters +} blosc2_cparams; + +/** + * @brief Default struct for compression params meant for user initialization. + */ +static const blosc2_cparams BLOSC2_CPARAMS_DEFAULTS = { + BLOSC_BLOSCLZ, 0, 5, 0, 8, 1, 0, + BLOSC_FORWARD_COMPAT_SPLIT, NULL, + {0, 0, 0, 0, 0, BLOSC_SHUFFLE}, + {0, 0, 0, 0, 0, 0}, + NULL, NULL, NULL, 0, 0, + NULL, {NULL, NULL, NULL, NULL, NULL, NULL} + }; + + +/** + @brief The parameters for creating a context for decompression purposes. + + In parenthesis it is shown the default value used internally when a 0 + (zero) in the fields of the struct is passed to a function. + */ +typedef struct { + int16_t nthreads; + //!< The number of threads to use internally (1). + void* schunk; + //!< The associated schunk, if any (NULL). + blosc2_postfilter_fn postfilter; + //!< The postfilter function. + blosc2_postfilter_params *postparams; + //!< The postfilter parameters. + int32_t typesize; + //!< The type size (8). +} blosc2_dparams; + +/** + * @brief Default struct for decompression params meant for user initialization. + */ +static const blosc2_dparams BLOSC2_DPARAMS_DEFAULTS = {1, NULL, NULL, NULL, 8}; + + +/** + * @brief Create a context for @a *_ctx() compression functions. + * + * @param cparams The blosc2_cparams struct with the compression parameters. + * + * @return A pointer to the new context. NULL is returned if this fails. + * + * @note This supports the same environment variables than #blosc2_compress + * for overriding the programmatic compression values. + * + * @sa #blosc2_compress + */ +BLOSC_EXPORT blosc2_context* blosc2_create_cctx(blosc2_cparams cparams); + +/** + * @brief Create a context for *_ctx() decompression functions. + * + * @param dparams The blosc2_dparams struct with the decompression parameters. + * + * @return A pointer to the new context. NULL is returned if this fails. + * + * @note This supports the same environment variables than #blosc2_decompress + * for overriding the programmatic decompression values. + * + * @sa #blosc2_decompress + * + */ +BLOSC_EXPORT blosc2_context* blosc2_create_dctx(blosc2_dparams dparams); + +/** + * @brief Free the resources associated with a context. + * + * @param context The context to free. + * + * This function should always succeed and is valid for contexts meant for + * both compression and decompression. + */ +BLOSC_EXPORT void blosc2_free_ctx(blosc2_context* context); + +/** + * @brief Create a @p cparams associated to a context. + * + * @param ctx The context from where to extract the compression parameters. + * @param cparams The pointer where the compression params will be stored. + * + * @return 0 if succeeds. Else a negative code is returned. + */ +BLOSC_EXPORT int blosc2_ctx_get_cparams(blosc2_context *ctx, blosc2_cparams *cparams); + +/** + * @brief Create a @p dparams associated to a context. + * + * @param ctx The context from where to extract the decompression parameters. + * @param dparams The pointer where the decompression params will be stored. + * + * @return 0 if succeeds. Else a negative code is returned. + */ +BLOSC_EXPORT int blosc2_ctx_get_dparams(blosc2_context *ctx, blosc2_dparams *dparams); + +/** + * @brief Set a maskout so as to avoid decompressing specified blocks. + * + * @param ctx The decompression context to update. + * + * @param maskout The boolean mask for the blocks where decompression + * is to be avoided. + * + * @remark The maskout is valid for contexts *only* meant for decompressing + * a chunk via #blosc2_decompress_ctx. Once a call to #blosc2_decompress_ctx + * is done, this mask is reset so that next call to #blosc2_decompress_ctx + * will decompress the whole chunk. + * + * @param nblocks The number of blocks in maskout above. + * + * @return If success, a 0 is returned. An error is signaled with a negative int. + * + */ +BLOSC_EXPORT int blosc2_set_maskout(blosc2_context *ctx, bool *maskout, int nblocks); + +/** + * @brief Compress a block of data in the @p src buffer and returns the size of + * compressed block. + * + * @remark Compression is memory safe and guaranteed not to write @p dest + * more than what is specified in @p destsize. + * There is not a minimum for @p src buffer size @p nbytes. + * + * @warning The @p src buffer and the @p dest buffer can not overlap. + * + * @param clevel The desired compression level and must be a number + * between 0 (no compression) and 9 (maximum compression). + * @param doshuffle Specifies whether the shuffle compression preconditioner + * should be applied or not. #BLOSC_NOFILTER means not applying filters, + * #BLOSC_SHUFFLE means applying shuffle at a byte level and + * #BLOSC_BITSHUFFLE at a bit level (slower but *may* achieve better + * compression). + * @param typesize Is the number of bytes for the atomic type in binary + * @p src buffer. This is mainly useful for the shuffle preconditioner. + * For implementation reasons, only a 1 < typesize < 256 will allow the + * shuffle filter to work. When typesize is not in this range, shuffle + * will be silently disabled. + * @param src The buffer containing the data to compress. + * @param srcsize The number of bytes to compress in the @p src buffer. + * @param dest The buffer where the compressed data will be put, + * must have at least the size of @p destsize. + * @param destsize The size of the dest buffer. Blosc + * guarantees that if you set @p destsize to, at least, + * (@p nbytes + #BLOSC2_MAX_OVERHEAD), the compression will always succeed. + * + * @return The number of bytes compressed. + * If @p src buffer cannot be compressed into @p destsize, the return + * value is zero and you should discard the contents of the @p dest + * buffer. A negative return value means that either a parameter is not correct + * or that an internal error happened. Set the BLOSC_TRACE environment variable + * for getting more info on what is happening. If the error is not related with + * wrong params, please report it back together with the buffer data causing this, + * as well as the compression params used. +*/ +/* + * Environment variables + * _____________________ + * + * *blosc2_compress()* honors different environment variables to control + * internal parameters without the need of doing that programmatically. + * Here are the ones supported: + * + * **BLOSC_CLEVEL=(INTEGER)**: This will overwrite the @p clevel parameter + * before the compression process starts. + * + * **BLOSC_SHUFFLE=[NOSHUFFLE | SHUFFLE | BITSHUFFLE]**: This will + * overwrite the *doshuffle* parameter before the compression process + * starts. + * + * **BLOSC_DELTA=(1|0)**: This will call *blosc2_set_delta()^* before the + * compression process starts. + * + * **BLOSC_TYPESIZE=(INTEGER)**: This will overwrite the *typesize* + * parameter before the compression process starts. + * + * **BLOSC_COMPRESSOR=[BLOSCLZ | LZ4 | LZ4HC | ZLIB | ZSTD]**: + * This will call #blosc_set_compressor before the compression process starts. + * + * **BLOSC_NTHREADS=(INTEGER)**: This will call + * #blosc_set_nthreads before the compression process + * starts. + * + * **BLOSC_SPLITMODE=(ALWAYS | NEVER | AUTO | FORWARD_COMPAT)**: + * This will call #blosc1_set_splitmode() before the compression process starts. + * + * **BLOSC_BLOCKSIZE=(INTEGER)**: This will call + * #blosc_set_blocksize before the compression process starts. + * *NOTE:* The blocksize is a critical parameter with + * important restrictions in the allowed values, so use this with care. + * + * **BLOSC_NOLOCK=(ANY VALUE)**: This will call #blosc2_compress_ctx under + * the hood, with the *compressor*, *blocksize* and + * *numinternalthreads* parameters set to the same as the last calls to + * #blosc1_set_compressor, #blosc1_set_blocksize and + * #blosc2_set_nthreads. *BLOSC_CLEVEL*, *BLOSC_SHUFFLE*, *BLOSC_DELTA* and + * *BLOSC_TYPESIZE* environment vars will also be honored. + * + */ +BLOSC_EXPORT int blosc2_compress(int clevel, int doshuffle, int32_t typesize, + const void* src, int32_t srcsize, void* dest, + int32_t destsize); + + +/** + * @brief Decompress a block of compressed data in @p src, put the result in + * @p dest and returns the size of the decompressed block. + * + * @warning The @p src buffer and the @p dest buffer can not overlap. + * + * @remark Decompression is memory safe and guaranteed not to write the @p dest + * buffer more than what is specified in @p destsize. + * + * @remark In case you want to keep under control the number of bytes read from + * source, you can call #blosc1_cbuffer_sizes first to check whether the + * @p nbytes (i.e. the number of bytes to be read from @p src buffer by this + * function) in the compressed buffer is ok with you. + * + * @param src The buffer to be decompressed. + * @param srcsize The size of the buffer to be decompressed. + * @param dest The buffer where the decompressed data will be put. + * @param destsize The size of the @p dest buffer. + * + * @return The number of bytes decompressed. + * If an error occurs, e.g. the compressed data is corrupted or the + * output buffer is not large enough, then a negative value + * will be returned instead. +*/ +/* + * Environment variables + * _____________________ + * + * *blosc1_decompress* honors different environment variables to control + * internal parameters without the need of doing that programmatically. + * Here are the ones supported: + * + * **BLOSC_NTHREADS=(INTEGER)**: This will call + * *blosc_set_nthreads(BLOSC_NTHREADS)* before the proper decompression + * process starts. + * + * **BLOSC_NOLOCK=(ANY VALUE)**: This will call *blosc2_decompress_ctx* + * under the hood, with the *numinternalthreads* parameter set to the + * same value as the last call to *blosc2_set_nthreads*. + * + */ +BLOSC_EXPORT int blosc2_decompress(const void* src, int32_t srcsize, + void* dest, int32_t destsize); + +/** + * @brief Context interface to Blosc compression. This does not require a call + * to #blosc2_init and can be called from multithreaded applications + * without the global lock being used, so allowing Blosc be executed + * simultaneously in those scenarios. + * + * @param context A blosc2_context struct with the different compression params. + * @param src The buffer containing the data to be compressed. + * @param srcsize The number of bytes to be compressed from the @p src buffer. + * @param dest The buffer where the compressed data will be put. + * @param destsize The size in bytes of the @p dest buffer. + * + * @return The number of bytes compressed. + * If @p src buffer cannot be compressed into @p destsize, the return + * value is zero and you should discard the contents of the @p dest + * buffer. A negative return value means that an internal error happened. + * It could happen that context is not meant for compression (which is stated in stderr). + * Otherwise, please report it back together with the buffer data causing this + * and compression settings. + */ +BLOSC_EXPORT int blosc2_compress_ctx( + blosc2_context* context, const void* src, int32_t srcsize, void* dest, + int32_t destsize); + + +/** + * @brief Context interface to Blosc decompression. This does not require a + * call to #blosc2_init and can be called from multithreaded + * applications without the global lock being used, so allowing Blosc + * be executed simultaneously in those scenarios. + * + * @param context The blosc2_context struct with the different compression params. + * @param src The buffer of compressed data. + * @param srcsize The length of buffer of compressed data. + * @param dest The buffer where the decompressed data will be put. + * @param destsize The size in bytes of the @p dest buffer. + * + * @warning The @p src buffer and the @p dest buffer can not overlap. + * + * @remark Decompression is memory safe and guaranteed not to write the @p dest + * buffer more than what is specified in @p destsize. + * + * @remark In case you want to keep under control the number of bytes read from + * source, you can call #blosc1_cbuffer_sizes first to check the @p nbytes + * (i.e. the number of bytes to be read from @p src buffer by this function) + * in the compressed buffer. + * + * @remark If #blosc2_set_maskout is called prior to this function, its + * @p block_maskout parameter will be honored for just *one single* shot; + * i.e. the maskout in context will be automatically reset to NULL, so + * mask won't be used next time (unless #blosc2_set_maskout is called again). + * + * @return The number of bytes decompressed (i.e. the maskout blocks are not + * counted). If an error occurs, e.g. the compressed data is corrupted, + * @p destsize is not large enough or context is not meant for decompression, + * then a negative value will be returned instead. + */ +BLOSC_EXPORT int blosc2_decompress_ctx(blosc2_context* context, const void* src, + int32_t srcsize, void* dest, int32_t destsize); + +/** + * @brief Create a chunk made of zeros. + * + * @param cparams The compression parameters. + * @param nbytes The size (in bytes) of the chunk. + * @param dest The buffer where the data chunk will be put. + * @param destsize The size (in bytes) of the @p dest buffer; + * must be BLOSC_EXTENDED_HEADER_LENGTH at least. + * + * @return The number of bytes compressed (BLOSC_EXTENDED_HEADER_LENGTH). + * If negative, there has been an error and @p dest is unusable. + * */ +BLOSC_EXPORT int blosc2_chunk_zeros(blosc2_cparams cparams, int32_t nbytes, + void* dest, int32_t destsize); + + +/** + * @brief Create a chunk made of nans. + * + * @param cparams The compression parameters; + * only 4 bytes (float) and 8 bytes (double) are supported. + * @param nbytes The size (in bytes) of the chunk. + * @param dest The buffer where the data chunk will be put. + * @param destsize The size (in bytes) of the @p dest buffer; + * must be BLOSC_EXTENDED_HEADER_LENGTH at least. + * + * @note Whether the NaNs are floats or doubles will be given by the typesize. + * + * @return The number of bytes compressed (BLOSC_EXTENDED_HEADER_LENGTH). + * If negative, there has been an error and @p dest is unusable. + * */ +BLOSC_EXPORT int blosc2_chunk_nans(blosc2_cparams cparams, int32_t nbytes, + void* dest, int32_t destsize); + + +/** + * @brief Create a chunk made of repeated values. + * + * @param cparams The compression parameters. + * @param nbytes The size (in bytes) of the chunk. + * @param dest The buffer where the data chunk will be put. + * @param destsize The size (in bytes) of the @p dest buffer. + * @param repeatval A pointer to the repeated value (little endian). + * The size of the value is given by @p cparams.typesize param. + * + * @return The number of bytes compressed (BLOSC_EXTENDED_HEADER_LENGTH + typesize). + * If negative, there has been an error and @p dest is unusable. + * */ +BLOSC_EXPORT int blosc2_chunk_repeatval(blosc2_cparams cparams, int32_t nbytes, + void* dest, int32_t destsize, const void* repeatval); + + +/** + * @brief Create a chunk made of uninitialized values. + * + * @param cparams The compression parameters. + * @param nbytes The size (in bytes) of the chunk. + * @param dest The buffer where the data chunk will be put. + * @param destsize The size (in bytes) of the @p dest buffer; + * must be BLOSC_EXTENDED_HEADER_LENGTH at least. + * + * @return The number of bytes compressed (BLOSC_EXTENDED_HEADER_LENGTH). + * If negative, there has been an error and @p dest is unusable. + * */ +BLOSC_EXPORT int blosc2_chunk_uninit(blosc2_cparams cparams, int32_t nbytes, + void* dest, int32_t destsize); + + +/** + * @brief Context interface counterpart for #blosc1_getitem. + * + * @param context Context pointer. + * @param src The compressed buffer from data will be decompressed. + * @param srcsize Compressed buffer length. + * @param start The position of the first item (of @p typesize size) from where data + * will be retrieved. + * @param nitems The number of items (of @p typesize size) that will be retrieved. + * @param dest The buffer where the decompressed data retrieved will be put. + * @param destsize Output buffer length. + * + * @return The number of bytes copied to @p dest or a negative value if + * some error happens. + */ +BLOSC_EXPORT int blosc2_getitem_ctx(blosc2_context* context, const void* src, + int32_t srcsize, int start, int nitems, void* dest, + int32_t destsize); + + +/********************************************************************* + Super-chunk related structures and functions. +*********************************************************************/ + +#define BLOSC2_MAX_METALAYERS 16 +#define BLOSC2_METALAYER_NAME_MAXLEN 31 + +// Allow for a reasonable number of vl metalayers +// max is 64 * 1024 due to msgpack map 16 in frame +// mem usage 8 * 1024 entries for blosc2_schunk.vlmetalayers[] is 64 KB +#define BLOSC2_MAX_VLMETALAYERS (8 * 1024) +#define BLOSC2_VLMETALAYERS_NAME_MAXLEN BLOSC2_METALAYER_NAME_MAXLEN + +/** + * @brief This struct is meant for holding storage parameters for a + * for a blosc2 container, allowing to specify, for example, how to interpret + * the contents included in the schunk. + */ +typedef struct { + bool contiguous; + //!< Whether the chunks are contiguous or sparse. + char* urlpath; + //!< The path for persistent storage. If NULL, that means in-memory. + blosc2_cparams* cparams; + //!< The compression params when creating a schunk. + //!< If NULL, sensible defaults are used depending on the context. + blosc2_dparams* dparams; + //!< The decompression params when creating a schunk. + //!< If NULL, sensible defaults are used depending on the context. + blosc2_io *io; + //!< Input/output backend. +} blosc2_storage; + +/** + * @brief Default struct for #blosc2_storage meant for user initialization. + */ +static const blosc2_storage BLOSC2_STORAGE_DEFAULTS = {false, NULL, NULL, NULL, NULL}; + +/** + * @brief Get default struct for compression params meant for user initialization. + */ +BLOSC_EXPORT blosc2_cparams blosc2_get_blosc2_cparams_defaults(void); + +/** + * @brief Get default struct for decompression params meant for user initialization. + */ +BLOSC_EXPORT blosc2_dparams blosc2_get_blosc2_dparams_defaults(void); + +/** + * @brief Get default struct for #blosc2_storage meant for user initialization. + */ +BLOSC_EXPORT blosc2_storage blosc2_get_blosc2_storage_defaults(void); + +/** + * @brief Get default struct for #blosc2_io meant for user initialization. + */ +BLOSC_EXPORT blosc2_io blosc2_get_blosc2_io_defaults(void); + +/** + * @brief Get default struct for #blosc2_stdio_mmap meant for user initialization. + */ +BLOSC_EXPORT blosc2_stdio_mmap blosc2_get_blosc2_stdio_mmap_defaults(void); + +typedef struct blosc2_frame_s blosc2_frame; /* opaque type */ + +/** + * @brief This struct is meant to store metadata information inside + * a #blosc2_schunk, allowing to specify, for example, how to interpret + * the contents included in the schunk. + */ +typedef struct blosc2_metalayer { + char* name; //!< The metalayer identifier for Blosc client (e.g. Blosc2 NDim). + uint8_t* content; //!< The serialized (msgpack preferably) content of the metalayer. + int32_t content_len; //!< The length in bytes of the content. +} blosc2_metalayer; + +/** + * @brief This struct is the standard container for Blosc 2 compressed data. + * + * This is essentially a container for Blosc 1 chunks of compressed data, + * and it allows to overcome the 32-bit limitation in Blosc 1. Optionally, + * a #blosc2_frame can be attached so as to store the compressed chunks contiguously. + */ +typedef struct blosc2_schunk { + uint8_t version; + uint8_t compcode; + //!< The default compressor. Each chunk can override this. + uint8_t compcode_meta; + //!< The default compressor metadata. Each chunk can override this. + uint8_t clevel; + //!< The compression level and other compress params. + uint8_t splitmode; + //!< The split mode. + int32_t typesize; + //!< The type size. + int32_t blocksize; + //!< The requested size of the compressed blocks (0; meaning automatic). + int32_t chunksize; + //!< Size of each chunk. 0 if not a fixed chunksize. + uint8_t filters[BLOSC2_MAX_FILTERS]; + //!< The (sequence of) filters. 8-bit per filter. + uint8_t filters_meta[BLOSC2_MAX_FILTERS]; + //!< Metadata for filters. 8-bit per meta-slot. + int64_t nchunks; + //!< Number of chunks in super-chunk. + int64_t current_nchunk; + //!< The current chunk that is being accessed + int64_t nbytes; + //!< The data size (uncompressed). + int64_t cbytes; + //!< The data size + chunks header size (compressed). + uint8_t** data; + //!< Pointer to chunk data pointers buffer. + size_t data_len; + //!< Length of the chunk data pointers buffer. + blosc2_storage* storage; + //!< Pointer to storage info. + blosc2_frame* frame; + //!< Pointer to frame used as store for chunks. + //! BLOSC2_METALAYER_NAME_MAXLEN) { + BLOSC_TRACE_ERROR("Metalayers cannot be larger than %d chars.", BLOSC2_METALAYER_NAME_MAXLEN); + return BLOSC2_ERROR_INVALID_PARAM; + } + + if (schunk == NULL) { + BLOSC_TRACE_ERROR("Schunk must not be NUll."); + return BLOSC2_ERROR_INVALID_PARAM; + } + + for (int nmetalayer = 0; nmetalayer < schunk->nmetalayers; nmetalayer++) { + if (strcmp(name, schunk->metalayers[nmetalayer]->name) == 0) { + return nmetalayer; + } + } + return BLOSC2_ERROR_NOT_FOUND; +} + +/** + * @brief Add content into a new metalayer. + * + * @param schunk The super-chunk to which the metalayer should be added. + * @param name The name of the metalayer. + * @param content The content of the metalayer. + * @param content_len The length of the content. + * + * @return If successful, the index of the new metalayer. Else, return a negative value. + */ +BLOSC_EXPORT int blosc2_meta_add(blosc2_schunk *schunk, const char *name, uint8_t *content, + int32_t content_len); + +/** + * @brief Update the content of an existing metalayer. + * + * @param schunk The frame containing the metalayer. + * @param name The name of the metalayer to be updated. + * @param content The new content of the metalayer. + * @param content_len The length of the content. + * + * @note Contrarily to #blosc2_meta_add the updates to metalayers + * are automatically serialized into a possible attached frame. + * + * @return If successful, the index of the metalayer. Else, return a negative value. + */ +BLOSC_EXPORT int blosc2_meta_update(blosc2_schunk *schunk, const char *name, uint8_t *content, + int32_t content_len); + +/** + * @brief Get the content out of a metalayer. + * + * @param schunk The frame containing the metalayer. + * @param name The name of the metalayer. + * @param content The pointer where the content will be put. + * @param content_len The length of the content. + * + * @warning The @p **content receives a malloc'ed copy of the content. + * The user is responsible of freeing it. + * + * @note This function is inlined and available even when not linking with libblosc2. + * + * @return If successful, the index of the new metalayer. Else, return a negative value. + */ +static inline int blosc2_meta_get(blosc2_schunk *schunk, const char *name, uint8_t **content, + int32_t *content_len) { + int nmetalayer = blosc2_meta_exists(schunk, name); + if (nmetalayer < 0) { + BLOSC_TRACE_WARNING("Metalayer \"%s\" not found.", name); + return nmetalayer; + } + *content_len = schunk->metalayers[nmetalayer]->content_len; + *content = (uint8_t*)malloc((size_t)*content_len); + memcpy(*content, schunk->metalayers[nmetalayer]->content, (size_t)*content_len); + return nmetalayer; +} + + +/********************************************************************* + Variable-length metalayers functions. +*********************************************************************/ + +/** + * @brief Find whether the schunk has a variable-length metalayer or not. + * + * @param schunk The super-chunk from which the variable-length metalayer will be checked. + * @param name The name of the variable-length metalayer to be checked. + * + * @return If successful, return the index of the variable-length metalayer. Else, return a negative value. + */ +BLOSC_EXPORT int blosc2_vlmeta_exists(blosc2_schunk *schunk, const char *name); + +/** + * @brief Add content into a new variable-length metalayer. + * + * @param schunk The super-chunk to which the variable-length metalayer should be added. + * @param name The name of the variable-length metalayer. + * @param content The content to be added. + * @param content_len The length of the content. + * @param cparams The parameters for compressing the variable-length metalayer content. If NULL, + * the `BLOSC2_CPARAMS_DEFAULTS` will be used. + * + * @return If successful, the index of the new variable-length metalayer. Else, return a negative value. + */ +BLOSC_EXPORT int blosc2_vlmeta_add(blosc2_schunk *schunk, const char *name, + uint8_t *content, int32_t content_len, + blosc2_cparams *cparams); + +/** + * @brief Update the content of an existing variable-length metalayer. + * + * @param schunk The super-chunk containing the variable-length metalayer. + * @param name The name of the variable-length metalayer to be updated. + * @param content The new content of the variable-length metalayer. + * @param content_len The length of the content. + * @param cparams The parameters for compressing the variable-length metalayer content. If NULL, + * the `BLOSC2_CPARAMS_DEFAULTS` will be used. + * + * @return If successful, the index of the variable-length metalayer. Else, return a negative value. + */ +BLOSC_EXPORT int blosc2_vlmeta_update(blosc2_schunk *schunk, const char *name, + uint8_t *content, int32_t content_len, + blosc2_cparams *cparams); + +/** + * @brief Get the content out of a variable-length metalayer. + * + * @param schunk The super-chunk containing the variable-length metalayer. + * @param name The name of the variable-length metalayer. + * @param content The pointer where the content will be put. + * @param content_len The pointer where the length of the content will be put. + * + * @warning The @p **content receives a malloc'ed copy of the content. + * The user is responsible of freeing it. + * + * @return If successful, the index of the new variable-length metalayer. Else, return a negative value. + */ +BLOSC_EXPORT int blosc2_vlmeta_get(blosc2_schunk *schunk, const char *name, + uint8_t **content, int32_t *content_len); + +/** + * @brief Delete the variable-length metalayer from the super-chunk. + * + * @param schunk The super-chunk containing the variable-length metalayer. + * @param name The name of the variable-length metalayer. + * + * @return If successful, the number of the variable-length metalayers in the super-chunk. Else, return a negative value. + */ +BLOSC_EXPORT int blosc2_vlmeta_delete(blosc2_schunk *schunk, const char *name); + +/** + * @brief Get a list of all the variable-length metalayer names. + * + * @param schunk The super-chunk containing the variable-length metalayers. + * @param names The pointer to a char** to store the name pointers. This should + * be of size *schunk->nvlmetalayers * sizeof(char*). + * + * @return The number of the variable-length metalayers in the super-chunk. + * This cannot fail unless the user does not pass a @p names which is large enough to + * keep pointers to all names, in which case funny things (seg faults and such) will happen. + */ +BLOSC_EXPORT int blosc2_vlmeta_get_names(blosc2_schunk *schunk, char **names); + + +/********************************************************************* + Time measurement utilities. +*********************************************************************/ + +#if defined(_WIN32) +/* For QueryPerformanceCounter(), etc. */ + #include +#elif defined(__MACH__) && defined(__APPLE__) +#include +#include +#include +#elif defined(__unix__) +#if defined(__linux__) + #include + #else + #include + #endif +#else + #error Unable to detect platform. +#endif + +/* The type of timestamp used on this system. */ +#if defined(_WIN32) +typedef LARGE_INTEGER blosc_timestamp_t; +#else +typedef struct timespec blosc_timestamp_t; +#endif + +/* + * @brief Set a timestamp. + * + * @param timestamp + * + */ +BLOSC_EXPORT void blosc_set_timestamp(blosc_timestamp_t* timestamp); + +/* + * @brief Get the nanoseconds between 2 timestamps. + * + * @param start_time + * @param end_time + * + * @return The nanoseconds between start_time and end_time. + */ +BLOSC_EXPORT double blosc_elapsed_nsecs(blosc_timestamp_t start_time, + blosc_timestamp_t end_time); + +/* + * @brief Get the seconds between 2 timestamps. + * + * @param start_time + * @param end_time + * + * @return The seconds between start_time and end_time. + */ +BLOSC_EXPORT double blosc_elapsed_secs(blosc_timestamp_t start_time, + blosc_timestamp_t end_time); + + +/********************************************************************* + Low-level functions follows. Use them only if you are an expert! +*********************************************************************/ + +/** + * @brief Get the internal blocksize to be used during compression. 0 means + * that an automatic blocksize is computed internally. + * + * @return The size in bytes of the internal block size. + */ +BLOSC_EXPORT int blosc1_get_blocksize(void); + +/** + * @brief Force the use of a specific blocksize. If 0, an automatic + * blocksize will be used (the default). + * + * @warning The blocksize is a critical parameter with important + * restrictions in the allowed values, so use this with care. + */ +BLOSC_EXPORT void blosc1_set_blocksize(size_t blocksize); + + +/** + * @brief Set the split mode. + + * @param splitmode It can take the next values: + * BLOSC_FORWARD_COMPAT_SPLIT + * BLOSC_AUTO_SPLIT + * BLOSC_NEVER_SPLIT + * BLOSC_ALWAYS_SPLIT + * + * BLOSC_FORWARD_COMPAT offers reasonably forward compatibility, + * BLOSC_AUTO_SPLIT is for nearly optimal results (based on heuristics), + * BLOSC_NEVER_SPLIT and BLOSC_ALWAYS_SPLIT are for the user experimenting + * when trying to get best compression ratios and/or speed. + * + * If not called, the default mode is BLOSC_FORWARD_COMPAT_SPLIT. + * + * This function should always succeed. + */ +BLOSC_EXPORT void blosc1_set_splitmode(int splitmode); + + +/** + * @brief Get the offsets of a frame in a super-chunk. + * + * @param schunk The super-chunk containing the frame. + * + * @return If successful, return a pointer to a buffer of the decompressed offsets. + * The number of offsets is equal to schunk->nchunks; the user is + * responsible to free this buffer. Else, return a NULL value. + */ +BLOSC_EXPORT int64_t* blosc2_frame_get_offsets(blosc2_schunk *schunk); + + +/********************************************************************* + Structures and functions related with compression codecs. +*********************************************************************/ + +typedef int (* blosc2_codec_encoder_cb) (const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len, + uint8_t meta, blosc2_cparams *cparams, const void* chunk); +typedef int (* blosc2_codec_decoder_cb) (const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len, + uint8_t meta, blosc2_dparams *dparams, const void* chunk); + +typedef struct { + uint8_t compcode; + //!< The codec identifier. + char *compname; + //!< The codec name. + uint8_t complib; + //!< The codec library format. + uint8_t version; + //!< The codec version. + blosc2_codec_encoder_cb encoder; + //!< The codec encoder that is used during compression. + blosc2_codec_decoder_cb decoder; + //!< The codec decoder that is used during decompression. + // int (*free)(void* codec_params); + // //!< Free the codec_params stored in blosc2_context. +} blosc2_codec; + +/** + * @brief Register locally a user-defined codec in Blosc. + * + * @param codec The codec to register. + * + * @return 0 if succeeds. Else a negative code is returned. + */ +BLOSC_EXPORT int blosc2_register_codec(blosc2_codec *codec); + + +/********************************************************************* + Structures and functions related with filters plugins. +*********************************************************************/ + +typedef int (* blosc2_filter_forward_cb) (const uint8_t *, uint8_t *, int32_t, uint8_t, blosc2_cparams *, + uint8_t); +typedef int (* blosc2_filter_backward_cb) (const uint8_t *, uint8_t *, int32_t, uint8_t, blosc2_dparams *, + uint8_t); + +/** + * @brief The parameters for a user-defined filter. + */ +typedef struct { + uint8_t id; + //!< The filter identifier. + char * name; + //!< The filter name. + uint8_t version; + //!< The filter version. + blosc2_filter_forward_cb forward; + //!< The filter function that is used during compression. + blosc2_filter_backward_cb backward; + //!< The filter function that is used during decompression. +} blosc2_filter; + +/** + * @brief Register locally a user-defined filter in Blosc. + * + * @param filter The filter to register. + * + * @return 0 if succeeds. Else a negative code is returned. + */ +BLOSC_EXPORT int blosc2_register_filter(blosc2_filter *filter); + +/********************************************************************* + Directory utilities. +*********************************************************************/ + +/* + * @brief Remove a directory and its files. + * + * @param path The directory to remove. + * + * @return 0 if succeeds. Else a negative code is returned. + */ +BLOSC_EXPORT int blosc2_remove_dir(const char *path); + +/* + * @brief Remove a file or a directory given by path. + * + * @param path The file or directory to remove. + * + * @return 0 if succeeds. Else a negative code is returned. + */ +BLOSC_EXPORT int blosc2_remove_urlpath(const char *path); + +/* + * @brief Rename a file or a directory given by old_urlpath to new_path. + * + * @param old_urlpath The original path to the directory or file. + * @param new_path The new path to the directory or file. + * + * @return 0 if succeeds. Else a negative code is returned. + */ +BLOSC_EXPORT int blosc2_rename_urlpath(char* old_urlpath, char* new_path); + + +/********************************************************************* + Index utilities. +*********************************************************************/ + +/* + * @brief Convert a sequential index into a multidimensional index + * + * This function assume ndim <= B2ND_MAX_DIM. + */ +BLOSC_EXPORT void blosc2_unidim_to_multidim(uint8_t ndim, int64_t *shape, int64_t i, int64_t *index); + +/* + * @brief Convert a multidimensional index into a sequential index + */ +BLOSC_EXPORT void blosc2_multidim_to_unidim(const int64_t *index, int8_t ndim, const int64_t *strides, int64_t *i); + +/* + * @brief Get the unidimensional chunk indexes needed to get a slice of a schunk or a b2nd array + * + * @param schunk The super-chunk (of b2nd array or not). + * @param start Index (0-based if it is a schunk) where the slice begins. + * @param stop The first index (0-based if it is a schunk) that is not in the selected slice. + * @param chunks_idx The pointer to the buffer where the indexes will be written. It is the user responsibility + * to free the buffer. + * + * @return The number of chunks needed to get the slice. If some problem is + * detected, a negative code is returned instead. + */ +BLOSC_EXPORT int blosc2_get_slice_nchunks(blosc2_schunk* schunk, int64_t *start, int64_t *stop, int64_t **chunks_idx); + + +/********************************************************************* + Raw shuffle functions. +*********************************************************************/ + +/** + * @brief Applies the shuffle operation to a block of data in @p src, and puts the result in @p dest. + * + * @warning The @p src buffer and the @p dest buffer can not overlap. + * + * @param typesize Is the number of bytes for the atomic type in binary @p src buffer. Only 1 < typesize + * < 256 is allowed. + * @param blocksize The size of the block. + * @param src The source buffer to be shuffled. + * @param dest The destination buffer where the shuffled data will be written. + * + * @return @p blocksize on success or a negative value if some error happens (mainly an invalid parameter). + */ +BLOSC_EXPORT int32_t blosc2_shuffle(const int32_t typesize, const int32_t blocksize, const void* src, + void* dest); + +/** + * @brief Applies the inverse shuffle operation to a block of data in @p src, and puts the result in @p dest. + * + * @warning The @p src buffer and the @p dest buffer can not overlap. + * + * @param typesize Is the number of bytes for the atomic type in binary @p src buffer. Only 1 < typesize + * < 256 is allowed. + * @param blocksize The size of the block. + * @param src The source buffer to be unshuffled. + * @param dest The destination buffer where the unshuffled data will be written. + * + * @return @p blocksize on success or a negative value if some error happens (mainly an invalid parameter). + */ +BLOSC_EXPORT int32_t blosc2_unshuffle(const int32_t typesize, const int32_t blocksize, const void* src, + void* dest); + +/** + * @brief Applies the bitshuffle operation to a block of data in @p src, and puts the result in @p dest. + * + * @warning The @p src buffer and the @p dest buffer can not overlap. + * + * @remark The function will shuffle the maximum amount of elements that can be divided by 8, and copy the + * rest to the destination buffer unchanged. There are @p blocksize / @p typesize elements. + * + * @param typesize Is the number of bytes for the atomic type in binary @p src buffer. Only 1 < typesize + * < 256 is allowed. + * @param blocksize The size of the block. + * @param src The source buffer to be shuffled. + * @param dest The destination buffer where the shuffled data will be written. + * + * @return @p blocksize on success or a negative value if some error happens (mainly an invalid parameter). + */ +BLOSC_EXPORT int32_t blosc2_bitshuffle(const int32_t typesize, const int32_t blocksize, const void* src, + void* dest); + +/** + * @brief Applies the inverse bitshuffle operation to a block of data in @p src, and puts the result in @p dest. + * + * @warning The @p src buffer and the @p dest buffer can not overlap. + * + * @remark The function will shuffle the maximum amount of elements that can be divided by 8, and copy the + * rest to the destination buffer unchanged. There are @p blocksize / @p typesize elements. + * + * @param typesize Is the number of bytes for the atomic type in binary @p src buffer. Only 1 < typesize + * < 256 is allowed. + * @param blocksize The size of the block. + * @param src The source buffer to be unshuffled. + * @param dest The destination buffer where the unshuffled data will be written. + * + * @return @p blocksize on success or a negative value if some error happens (mainly an invalid parameter). + */ +BLOSC_EXPORT int32_t blosc2_bitunshuffle(const int32_t typesize, const int32_t blocksize, const void* src, + void* dest); + + +/********************************************************************* + Private functions, these are here for convenience, + and are not meant to be included in public docs +*********************************************************************/ + +// Private function needed in b2nd.h for deserializing meta +static inline void swap_store(void *dest, const void *pa, int size) { + uint8_t *pa_ = (uint8_t *) pa; + uint8_t *pa2_ = (uint8_t*)malloc((size_t) size); + int i = 1; /* for big/little endian detection */ + char *p = (char *) &i; + + if (p[0] == 1) { + /* little endian */ + switch (size) { + case 8: + pa2_[0] = pa_[7]; + pa2_[1] = pa_[6]; + pa2_[2] = pa_[5]; + pa2_[3] = pa_[4]; + pa2_[4] = pa_[3]; + pa2_[5] = pa_[2]; + pa2_[6] = pa_[1]; + pa2_[7] = pa_[0]; + break; + case 4: + pa2_[0] = pa_[3]; + pa2_[1] = pa_[2]; + pa2_[2] = pa_[1]; + pa2_[3] = pa_[0]; + break; + case 2: + pa2_[0] = pa_[1]; + pa2_[1] = pa_[0]; + break; + case 1: + pa2_[0] = pa_[0]; + break; + default: + fprintf(stderr, "Unhandled nitems: %d\n", size); + } + } + memcpy(dest, pa2_, size); + free(pa2_); +} + +#ifdef __cplusplus +} +#endif + +#endif /* BLOSC_BLOSC2_H */ diff --git a/venv/Lib/site-packages/blosc2/include/blosc2/blosc2-common.h b/venv/Lib/site-packages/blosc2/include/blosc2/blosc2-common.h new file mode 100644 index 0000000..dd93aff --- /dev/null +++ b/venv/Lib/site-packages/blosc2/include/blosc2/blosc2-common.h @@ -0,0 +1,80 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Copyright (c) 2021 Blosc Development Team + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#ifndef BLOSC_BLOSC2_BLOSC2_COMMON_H +#define BLOSC_BLOSC2_BLOSC2_COMMON_H + +#include "blosc2-export.h" + +#include +#include + +// For shutting up stupid compiler warning about some 'unused' variables in GCC +#ifdef __GNUC__ +#define BLOSC_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#define BLOSC_UNUSED_VAR __attribute__ ((unused)) +#else +#define BLOSC_UNUSED_VAR +#endif // __GNUC__ + +// For shutting up compiler warning about unused parameters +#define BLOSC_UNUSED_PARAM(x) ((void)(x)) + +/* Use inlined functions for supported systems */ +#if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */ + #define inline __inline /* Visual C is not C99, but supports some kind of inline */ +#endif + + +/* Define the __SSE2__ symbol if compiling with Visual C++ and + targeting the minimum architecture level supporting SSE2. + Other compilers define this as expected and emit warnings + when it is re-defined. */ +#if !defined(__SSE2__) && defined(_MSC_VER) && \ + (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2)) + #define __SSE2__ +#endif + +/* + * Detect if the architecture is fine with unaligned access. + */ +#if !defined(BLOSC_STRICT_ALIGN) +#define BLOSC_STRICT_ALIGN +#if defined(__i386__) || defined(__386) || defined (__amd64) /* GNU C, Sun Studio */ +#undef BLOSC_STRICT_ALIGN +#elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */ +#undef BLOSC_STRICT_ALIGN +#elif defined(_M_IX86) || defined(_M_X64) /* Intel, MSVC */ +#undef BLOSC_STRICT_ALIGN +#elif defined(__386) +#undef BLOSC_STRICT_ALIGN +#elif defined(_X86_) /* MinGW */ +#undef BLOSC_STRICT_ALIGN +#elif defined(__I86__) /* Digital Mars */ +#undef BLOSC_STRICT_ALIGN +/* Modern ARM systems (like ARM64) should support unaligned access + quite efficiently. */ +#elif defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM64_ARCH_8__) +#undef BLOSC_STRICT_ALIGN +#elif defined(_ARCH_PPC) || defined(__PPC__) +/* Modern PowerPC systems (like POWER8) should support unaligned access + quite efficiently. */ +#undef BLOSC_STRICT_ALIGN +#endif +#endif + +#if defined(__SSE2__) + #include +#endif +#if defined(__AVX2__) || defined(__AVX512F__) || defined (__AVX512BW__) + #include +#endif + +#endif /* BLOSC_BLOSC2_BLOSC2_COMMON_H */ diff --git a/venv/Lib/site-packages/blosc2/include/blosc2/blosc2-export.h b/venv/Lib/site-packages/blosc2/include/blosc2/blosc2-export.h new file mode 100644 index 0000000..e5daed2 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/include/blosc2/blosc2-export.h @@ -0,0 +1,48 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Copyright (c) 2021 Blosc Development Team + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#ifndef BLOSC_BLOSC2_BLOSC2_EXPORT_H +#define BLOSC_BLOSC2_BLOSC2_EXPORT_H + +/* Macros for specifying exported symbols. + BLOSC_EXPORT is used to decorate symbols that should be + exported by the blosc shared library. + BLOSC_NO_EXPORT is used to decorate symbols that should NOT + be exported by the blosc shared library. +*/ +#if defined(BLOSC_SHARED_LIBRARY) + #if defined(_MSC_VER) + #define BLOSC_EXPORT __declspec(dllexport) + #elif (defined(__GNUC__) && __GNUC__ >= 4) || defined(__clang__) + #if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) + #define BLOSC_EXPORT __attribute__((dllexport)) + #else + #define BLOSC_EXPORT __attribute__((visibility("default"))) + #endif /* defined(_WIN32) || defined(__CYGWIN__) */ + #else + #error Cannot determine how to define BLOSC_EXPORT for this compiler. + #endif +#else + #define BLOSC_EXPORT +#endif /* defined(BLOSC_SHARED_LIBRARY) */ + +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MINGW32__) + #define BLOSC_NO_EXPORT __attribute__((visibility("hidden"))) +#else + #define BLOSC_NO_EXPORT +#endif /* (defined(__GNUC__) || defined(__clang__)) && !defined(__MINGW32__) */ + +/* When testing, export everything to make it easier to implement tests. */ +#if defined(BLOSC_TESTING) + #undef BLOSC_NO_EXPORT + #define BLOSC_NO_EXPORT BLOSC_EXPORT +#endif /* defined(BLOSC_TESTING) */ + +#endif /* BLOSC_BLOSC2_BLOSC2_EXPORT_H */ diff --git a/venv/Lib/site-packages/blosc2/include/blosc2/blosc2-stdio.h b/venv/Lib/site-packages/blosc2/include/blosc2/blosc2-stdio.h new file mode 100644 index 0000000..3d7c3ae --- /dev/null +++ b/venv/Lib/site-packages/blosc2/include/blosc2/blosc2-stdio.h @@ -0,0 +1,117 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Copyright (c) 2021 Blosc Development Team + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#ifndef BLOSC_BLOSC2_BLOSC2_STDIO_H +#define BLOSC_BLOSC2_BLOSC2_STDIO_H + +#include "blosc2-export.h" + +#if defined(_MSC_VER) +#include +#else +#include +#endif + +#include +#include +#include +#include + +#if defined(_WIN32) +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + FILE *file; +} blosc2_stdio_file; + +BLOSC_EXPORT void *blosc2_stdio_open(const char *urlpath, const char *mode, void* params); +BLOSC_EXPORT int blosc2_stdio_close(void *stream); +BLOSC_EXPORT int64_t blosc2_stdio_size(void *stream); +BLOSC_EXPORT int64_t blosc2_stdio_write(const void *ptr, int64_t size, int64_t nitems, int64_t position, void *stream); +BLOSC_EXPORT int64_t blosc2_stdio_read(void **ptr, int64_t size, int64_t nitems, int64_t position, void *stream); +BLOSC_EXPORT int blosc2_stdio_truncate(void *stream, int64_t size); +BLOSC_EXPORT int blosc2_stdio_destroy(void* params); + + +/** + * @brief Parameters for memory-mapped I/O. You can use the blosc2_schunk_open*_udio functions to memory-map existing + * schunk files from disk. To create a new schunk which is backed up by a memory-mapped file on disk, set the io member + * of the #blosc2_storage struct (see test_mmap for examples). Please note that memory-mapped I/O is only available for + * cframes and not sframes. + */ +typedef struct { + /* Arguments of the mapping */ + const char* mode; + //!< The opening mode of the memory-mapped file (r, r+, w+ or c) similar to Numpy's np.memmap + //!< (https://numpy.org/doc/stable/reference/generated/numpy.memmap.html). Set to r if the file should only be read, + //!< r+ if you want to extend data to an existing file, w+ to create a new file and c to use an existing file as basis + //!< but keep all modifications in-memory. On Windows, the size of the mapping cannot change in the c mode. + int64_t initial_mapping_size; + //!< The initial size of the memory mapping used as a large enough write buffer for the r+, w+ and c modes (for + //!< Windows, only the r+ and w+ modes). On Windows, this will also be the size of the file while the file is opened. + //!< It will be truncated to the target size when the file is closed (e.g., when the schunk is destroyed). + bool needs_free; + //!< Indicates whether this object should be freed in the blosc2_destroy_cb callback (set to true if the + //!< blosc2_stdio_mmap struct was created on the heap). + + /* Internal attributes of the mapping */ + char* addr; + //!< The starting address of the mapping. + char* urlpath; + //!< The path to the file which is associated with this object. + int64_t file_size; + //!< The size of the file. + int64_t mapping_size; + //!< The size of the mapping (mapping_size >= file_size). + bool is_memory_only; + //!< Whether the mapping is only in-memory and changes are not reflected to the file on disk (c mode). + FILE* file; + //!< The underlying file handle. + int fd; + //!< The underlying file descriptor. + int64_t access_flags; + //!< The access attributes for the memory pages. + int64_t map_flags; + //!< The attributes of the mapping. +#if defined(_WIN32) + HANDLE mmap_handle; + //!< The Windows handle to the memory mapping. +#endif +} blosc2_stdio_mmap; + +/** + * @brief Default struct for memory-mapped I/O for user initialization. + */ +static const blosc2_stdio_mmap BLOSC2_STDIO_MMAP_DEFAULTS = { + "r", (1 << 30), false, NULL, NULL, -1, -1, false, NULL, -1, -1, -1 +#if defined(_WIN32) + , INVALID_HANDLE_VALUE +#endif +}; + +BLOSC_EXPORT void *blosc2_stdio_mmap_open(const char *urlpath, const char *mode, void* params); +BLOSC_EXPORT int blosc2_stdio_mmap_close(void *stream); +BLOSC_EXPORT int64_t blosc2_stdio_mmap_size(void *stream); +BLOSC_EXPORT int64_t blosc2_stdio_mmap_write( + const void *ptr, int64_t size, int64_t nitems, int64_t position, void *stream); +BLOSC_EXPORT int64_t blosc2_stdio_mmap_read(void **ptr, int64_t size, int64_t nitems, int64_t position, void *stream); +BLOSC_EXPORT int blosc2_stdio_mmap_truncate(void *stream, int64_t size); +BLOSC_EXPORT int blosc2_stdio_mmap_destroy(void* params); + +#ifdef __cplusplus +} +#endif + +#endif /* BLOSC_BLOSC2_BLOSC2_STDIO_H */ diff --git a/venv/Lib/site-packages/blosc2/include/blosc2/codecs-registry.h b/venv/Lib/site-packages/blosc2/include/blosc2/codecs-registry.h new file mode 100644 index 0000000..1260763 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/include/blosc2/codecs-registry.h @@ -0,0 +1,58 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Copyright (c) 2021 Blosc Development Team + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#ifndef BLOSC_BLOSC2_CODECS_REGISTRY_H +#define BLOSC_BLOSC2_CODECS_REGISTRY_H + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + BLOSC_CODEC_NDLZ = 32, + //!< Simple Lempel-Ziv compressor for NDim data. Experimental, mainly for teaching purposes. + BLOSC_CODEC_ZFP_FIXED_ACCURACY = 33, + //!< ZFP compressor for fixed accuracy mode. The desired accuracy is set in `compcode_meta`. + //!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/codecs/zfp/README.md + BLOSC_CODEC_ZFP_FIXED_PRECISION = 34, + //!< ZFP compressor for fixed precision. The desired precision is set in `compcode_meta`. + //!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/codecs/zfp/README.md + BLOSC_CODEC_ZFP_FIXED_RATE = 35, + //!< ZFP compressor for fixed precision. The desired rate is set in `compcode_meta`. + //!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/codecs/zfp/README.md + BLOSC_CODEC_OPENHTJ2K = 36, + //!< OpenHTJ2K compressor for JPEG 2000 HT. + //!< See https://github.com/Blosc/blosc2_openhtj2k + BLOSC_CODEC_GROK = 37, + //!< Grok compressor for JPEG 2000. + //!< See https://github.com/Blosc/blosc2_grok + BLOSC_CODEC_OPENZL = 38, + //!< OpenZL metacompressor. + //!< See https://github.com/Blosc/blosc2_openzl +}; + +void register_codecs(void); + +// For dynamically loaded codecs +typedef struct { + char *encoder; + char *decoder; +} codec_info; + +// If ever add .free func for codecs, may be needed +// typedef struct { +// char *free; +// } codecparams_info; + +#ifdef __cplusplus +} +#endif + +#endif /* BLOSC_BLOSC2_CODECS_REGISTRY_H */ diff --git a/venv/Lib/site-packages/blosc2/include/blosc2/filters-registry.h b/venv/Lib/site-packages/blosc2/include/blosc2/filters-registry.h new file mode 100644 index 0000000..8c27f10 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/include/blosc2/filters-registry.h @@ -0,0 +1,49 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Copyright (c) 2021 Blosc Development Team + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#ifndef BLOSC_BLOSC2_FILTERS_REGISTRY_H +#define BLOSC_BLOSC2_FILTERS_REGISTRY_H + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + BLOSC_FILTER_NDCELL = 32, + //!< Simple filter for grouping NDim cell data together. + //!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/filters/ndcell/README.md + BLOSC_FILTER_NDMEAN = 33, + //!< Simple filter for replacing content of a NDim cell with its mean value. + //!< See https://github.com/Blosc/c-blosc2/blob/main/plugins/filters/ndmean/README.md + BLOSC_FILTER_BYTEDELTA_BUGGY = 34, + // buggy version. See #524 + BLOSC_FILTER_BYTEDELTA = 35, + //!< Byte-wise delta. Assumes M streams of bytes of length N, where M is the typesize (specified by `filters_meta`). + //!< Should be used in combination with @ref BLOSC_SHUFFLE or @ref BLOSC_BITSHUFFLE. + //!< See https://www.blosc.org/posts/bytedelta-enhance-compression-toolset/ + BLOSC_FILTER_INT_TRUNC = 36, + //!< Truncate int precision; positive values in `filters_meta` slot will keep bits; + //!< negative values will remove (set to zero) bits. + //!< This is similar to @ref BLOSC_TRUNC_PREC, but for integers instead of floating point data. +}; + +void register_filters(void); + +// For dynamically loaded filters +typedef struct { + char *forward; + char *backward; +} filter_info; + +#ifdef __cplusplus +} +#endif + +#endif /* BLOSC_BLOSC2_FILTERS_REGISTRY_H */ diff --git a/venv/Lib/site-packages/blosc2/include/blosc2/tuners-registry.h b/venv/Lib/site-packages/blosc2/include/blosc2/tuners-registry.h new file mode 100644 index 0000000..94300a5 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/include/blosc2/tuners-registry.h @@ -0,0 +1,37 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Copyright (C) 2021 The Blosc Developers + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#ifndef BLOSC_BLOSC2_TUNERS_REGISTRY_H +#define BLOSC_BLOSC2_TUNERS_REGISTRY_H + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + BLOSC_BTUNE = 32, +}; + +void register_tuners(void); + +// For dynamically loaded tuners +typedef struct { + char *init; + char *next_blocksize; + char *next_cparams; + char *update; + char *free; +} tuner_info; + +#ifdef __cplusplus +} +#endif + +#endif /* BLOSC_BLOSC2_TUNERS_REGISTRY_H */ diff --git a/venv/Lib/site-packages/blosc2/info.py b/venv/Lib/site-packages/blosc2/info.py new file mode 100644 index 0000000..4ac629d --- /dev/null +++ b/venv/Lib/site-packages/blosc2/info.py @@ -0,0 +1,64 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +import io +import pprint +from textwrap import TextWrapper + + +def info_text_report_(items: list) -> str: + with io.StringIO() as buf: + print(items, file=buf) + return buf.getvalue() + + +def info_text_report(items: list) -> str: + keys = [k for k, v in items] + max_key_len = max(len(k) for k in keys) + report = "" + for k, v in items: + if isinstance(v, dict): + # rich way, this is disabled because it doesn't work well in the notebooks + # with io.StringIO() as buf: + # v_sorted = {k: val for k, val in sorted(v.items())} + # rich.print(v_sorted, file=buf) + # str_v = buf.getvalue()[:-1] # remove the trailing \n + # text = k.ljust(max_key_len) + " : " + str_v + # pprint way + text = k.ljust(max_key_len) + " : " + pprint.pformat(v) + else: + wrapper = TextWrapper( + width=96, + initial_indent=k.ljust(max_key_len) + " : ", + subsequent_indent=" " * max_key_len + " : ", + ) + text = wrapper.fill(str(v)) + report += text + "\n" + return report + + +def info_html_report(items: list) -> str: + report = '' + report += "" + for k, v in items: + report += f'' + report += "" + report += "
{k}{v}
" + return report + + +class InfoReporter: + def __init__(self, obj): + self.obj = obj + + def __repr__(self): + items = self.obj.info_items + return info_text_report(items) + + def _repr_html_(self): + items = self.obj.info_items + return info_html_report(items) diff --git a/venv/Lib/site-packages/blosc2/lazyexpr.py b/venv/Lib/site-packages/blosc2/lazyexpr.py new file mode 100644 index 0000000..07a7824 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lazyexpr.py @@ -0,0 +1,4567 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +# Avoid checking the name of type annotations at run time +from __future__ import annotations + +import ast +import asyncio +import builtins +import concurrent.futures +import copy +import enum +import inspect +import linecache +import math +import os +import pathlib +import re +import sys +import textwrap +import threading +from abc import ABC, abstractmethod, abstractproperty +from dataclasses import asdict +from enum import Enum +from pathlib import Path +from queue import Empty, Queue +from typing import TYPE_CHECKING, Any + +from numpy.exceptions import ComplexWarning + +from . import exceptions + +if TYPE_CHECKING: + from collections.abc import Callable, Sequence + +import ndindex +import numpy as np + +import blosc2 + +from .dsl_kernel import DSLKernel, DSLSyntaxError, specialize_miniexpr_inputs, validate_dsl + +if blosc2._HAS_NUMBA: + import numba +from blosc2 import compute_chunks_blocks +from blosc2.info import InfoReporter + +from .proxy import _convert_dtype +from .utils import ( + _format_expr_scalar, + _get_chunk_operands, + _sliced_chunk_iter, + check_smaller_shape, + compute_smaller_slice, + constructors, + elementwise_funcs, + get_chunks_idx, + get_intersecting_chunks, + infer_shape, + linalg_attrs, + linalg_funcs, + npcumprod, + npcumsum, + process_key, + reducers, + safe_numpy_globals, +) + +if not blosc2.IS_WASM: + import numexpr + +global safe_blosc2_globals +safe_blosc2_globals = {} + +# Set this to False if miniexpr should not be tried out +try_miniexpr = not blosc2.IS_WASM or getattr(blosc2, "_WASM_MINIEXPR_ENABLED", False) + + +def _toggle_miniexpr(FLAG): + global try_miniexpr + try_miniexpr = FLAG + + +def ne_evaluate(expression, local_dict=None, **kwargs): + """Safely evaluate expressions using numexpr when possible, falling back to numpy.""" + if local_dict is None: + local_dict = {} + # Get local vars dict from the stack frame + _frame_depth = kwargs.pop("_frame_depth", 1) + local_dict |= { + k: v + for k, v in dict(sys._getframe(_frame_depth).f_locals).items() + if ( + (hasattr(v, "shape") or np.isscalar(v)) + and + # Do not overwrite the local_dict with the expression variables + not (k in local_dict or k in ("_where_x", "_where_y")) + ) + } + if blosc2.IS_WASM: + global safe_numpy_globals + if "out" in kwargs: + out = kwargs.pop("out") + out[:] = eval(expression, safe_numpy_globals, local_dict) + return out + res = eval(expression, safe_numpy_globals, local_dict) + return np.asarray(res) if not hasattr(res, "shape") else res + try: + return numexpr.evaluate(expression, local_dict=local_dict, **kwargs) + except ValueError as e: + if e.args and e.args[0] == "NumExpr 2 does not support Unicode as a dtype.": + pass + else: + raise # unsafe expression + except Exception: + pass + # Try with blosc2 funcs as presence of non-numexpr funcs probably caused failure + # ne_evaluate will need safe_blosc2_globals for some functions (e.g. clip, logaddexp, + # startswith, matmul) that are implemented incompletely in numexpr/miniexpr or not implemented at all + global safe_blosc2_globals + if len(safe_blosc2_globals) == 0: + # First eval call, fill blosc2_safe_globals + safe_blosc2_globals = {"blosc2": blosc2} + # Add all first-level blosc2 functions + safe_blosc2_globals.update( + { + name: getattr(blosc2, name) + for name in dir(blosc2) + if callable(getattr(blosc2, name)) and not name.startswith("_") + } + ) + res = eval(expression, safe_blosc2_globals, local_dict) + if "out" in kwargs: + out = kwargs.pop("out") + out[:] = res # will handle calc/decomp if res is lazyarray + return out + return res[()] if isinstance(res, blosc2.Operand) else res + + +def _get_result(expression, chunk_operands, ne_args, where=None, indices=None, _order=None): + chunk_indices = None + if expression in {"o0", "(o0)"} and where is None: + # We don't have an actual expression, so avoid a copy except to make contiguous (later) + return chunk_operands["o0"], None + # Apply the where condition (in result) + if where is not None and len(where) == 2: + # x = chunk_operands["_where_x"] + # y = chunk_operands["_where_y"] + # result = np.where(result, x, y) + # numexpr is a bit faster than np.where, and we can fuse operations in this case + new_expr = f"where({expression}, _where_x, _where_y)" + return ne_evaluate(new_expr, chunk_operands, **ne_args), None + + result = ne_evaluate(expression, chunk_operands, **ne_args) + if where is None: + return result, None + elif len(where) == 1: + x = chunk_operands["_where_x"] + if (indices is not None) or (_order is not None): + # Return indices only makes sense when the where condition is a tuple with one element + # and result is a boolean array + if len(x.shape) > 1: + raise ValueError("indices() and sort() only support 1D arrays") + if result.dtype != np.bool_: + raise ValueError("indices() and sort() only support bool conditions") + if _order: + # We need to cumulate all the fields in _order, as well as indices + chunk_indices = indices[result] + result = x[_order][result] + else: + chunk_indices = None + result = indices[result] + return result, chunk_indices + else: + return x[result], None + raise ValueError("The where condition must be a tuple with one or two elements") + + +# Define empty ndindex tuple for function defaults +NDINDEX_EMPTY_TUPLE = ndindex.Tuple() + +# All the dtypes that are supported by the expression evaluator +dtype_symbols = { + "int8": np.int8, + "int16": np.int16, + "int32": np.int32, + "int64": np.int64, + "uint8": np.uint8, + "uint16": np.uint16, + "uint32": np.uint32, + "uint64": np.uint64, + "float32": np.float32, + "float64": np.float64, + "complex64": np.complex64, + "complex128": np.complex128, + "bool": np.bool_, + "str": np.str_, + "bytes": np.bytes_, + "i1": np.int8, + "i2": np.int16, + "i4": np.int32, + "i8": np.int64, + "u1": np.uint8, + "u2": np.uint16, + "u4": np.uint32, + "u8": np.uint64, + "f4": np.float32, + "f8": np.float64, + "c8": np.complex64, + "c16": np.complex128, + "b1": np.bool_, + "S": np.str_, + "V": np.bytes_, +} +blosc2_funcs = constructors + linalg_funcs + elementwise_funcs + reducers +# functions that have to be evaluated before chunkwise lazyexpr machinery +eager_funcs = linalg_funcs + reducers + ["slice"] + ["." + attr for attr in linalg_attrs] +# Gather all callable functions in numpy +numpy_funcs = { + name + for name, member in inspect.getmembers(np, callable) + if not name.startswith("_") and not isinstance(member, np.ufunc) +} +numpy_ufuncs = {name for name, member in inspect.getmembers(np, lambda x: isinstance(x, np.ufunc))} +# Add these functions to the list of available functions +# (will be evaluated via the array interface) +additional_funcs = sorted((numpy_funcs | numpy_ufuncs) - set(blosc2_funcs)) +functions = blosc2_funcs + additional_funcs +_constructor_call_patterns = {name: re.compile(rf"\b{re.escape(name)}\s*\(") for name in constructors} + + +def _has_constructor_call(expression: str, constructor: str) -> bool: + return _constructor_call_patterns[constructor].search(expression) is not None + + +def _find_constructor_call(expression: str, constructor: str) -> re.Match | None: + return _constructor_call_patterns[constructor].search(expression) + + +relational_ops = ["==", "!=", "<", "<=", ">", ">="] +logical_ops = ["&", "|", "^", "~"] +not_complex_ops = ["maximum", "minimum", "<", "<=", ">", ">="] +funcs_2args = ( + "arctan2", + "contains", + "pow", + "power", + "nextafter", + "copysign", + "hypot", + "maximum", + "minimum", + "startswith", + "endswith", +) + + +def get_expr_globals(expression): + """Build a dictionary of functions needed for evaluating the expression.""" + _globals = {"np": np, "blosc2": blosc2} + # Only check for functions that actually appear in the expression + # This avoids many unnecessary string searches + for func in functions: + if func in expression: + # Try blosc2 first + if hasattr(blosc2, func): + _globals[func] = getattr(blosc2, func) + # Fall back to numpy + else: + try: + _globals[func] = safe_numpy_globals[func] + # Function not found in either module + except KeyError as e: + raise AttributeError(f"Function {func} not found in blosc2 or numpy") from e + + return _globals + + +if not hasattr(enum, "member"): + # copy-pasted from Lib/enum.py + class _mymember: + """ + Forces item to become an Enum member during class creation. + """ + + def __init__(self, value): + self.value = value +else: + _mymember = enum.member # only available after python 3.11 + + +class ReduceOp(Enum): + """ + Available reduce operations. + """ + + # wrap as enum.member so that Python doesn't treat some funcs + # as class methods (rather than Enum members) + SUM = _mymember(np.add) + PROD = _mymember(np.multiply) + MEAN = _mymember(np.mean) + STD = _mymember(np.std) + VAR = _mymember(np.var) + # Computing a median from partial results is not straightforward because the median + # is a positional statistic, which means it depends on the relative ordering of all + # the data points. Unlike statistics such as the sum or mean, you can't compute a median + # from partial results without knowing the entire dataset, and this is way too expensive + # for arrays that cannot typically fit in-memory (e.g. disk-based NDArray). + # MEDIAN = np.median + MAX = _mymember(np.maximum) + MIN = _mymember(np.minimum) + ANY = _mymember(np.any) + ALL = _mymember(np.all) + ARGMAX = _mymember(np.argmax) + ARGMIN = _mymember(np.argmin) + CUMULATIVE_SUM = _mymember(npcumsum) + CUMULATIVE_PROD = _mymember(npcumprod) + + +class LazyArrayEnum(Enum): + """ + Available LazyArrays. + """ + + Expr = 0 + UDF = 1 + + +class LazyArray(ABC, blosc2.Operand): + @abstractmethod + def indices(self, order: str | list[str] | None = None) -> blosc2.LazyArray: + """ + Return an :ref:`LazyArray` containing the indices where self is True. + + The LazyArray must be of bool dtype (e.g. a condition). + + Parameters + ---------- + order: str, list of str, optional + Specifies which fields to compare first, second, etc. A single + field can be specified as a string. Not all fields need to be + specified, only the ones by which the array is to be sorted. + + Returns + ------- + out: :ref:`LazyArray` + The indices of the :ref:`LazyArray` self that are True. + """ + pass + + @abstractmethod + def sort(self, order: str | list[str] | None = None) -> blosc2.LazyArray: + """ + Return a sorted :ref:`LazyArray`. + + This is only valid for LazyArrays with structured dtypes. + + Parameters + ---------- + order: str, list of str, optional + Specifies which fields to compare first, second, etc. A single + field can be specified as a string. Not all fields need to be + specified, only the ones by which the array is to be sorted. + + Returns + ------- + out: :ref:`LazyArray` + A sorted :ref:`LazyArray`. + """ + pass + + @abstractmethod + def compute( + self, + item: slice | list[slice] | None = None, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs: Any, + ) -> blosc2.NDArray: + """ + Return a :ref:`NDArray` containing the evaluation of the :ref:`LazyArray`. + + Parameters + ---------- + item: slice, list of slices, optional + If provided, item is used to slice the operands *prior* to computation; not to retrieve specified slices of + the evaluated result. This difference between slicing operands and slicing the final expression + is important when reductions or a where clause are used in the expression. + + fp_accuracy: :ref:`blosc2.FPAccuracy`, optional + Specifies the floating-point accuracy to be used during computation. + By default, :ref:`blosc2.FPAccuracy.DEFAULT` is used. + + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + These arguments will be set in the resulting :ref:`NDArray`. + Additionally, the following special kwargs are supported: + - ``strict_miniexpr`` (bool): controls whether miniexpr compilation/execution + failures are raised instead of silently falling back to regular chunked eval + for non-DSL expressions. + + Returns + ------- + out: :ref:`NDArray` + A :ref:`NDArray` containing the result of evaluating the + :ref:`LazyUDF` or :ref:`LazyExpr`. + + Notes + ----- + * If self is a LazyArray from an udf, the kwargs used to store the resulting + array will be the ones passed to the constructor in :func:`lazyudf` (except the + `urlpath`) updated with the kwargs passed when calling this method. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> dtype = np.float64 + >>> shape = [3, 3] + >>> size = shape[0] * shape[1] + >>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) + >>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) + >>> # Convert numpy arrays to Blosc2 arrays + >>> a1 = blosc2.asarray(a) + >>> b1 = blosc2.asarray(b) + >>> # Perform the mathematical operation + >>> expr = a1 + b1 + >>> output = expr.compute() + >>> f"Result of a + b (lazy evaluation): {output[:]}" + Result of a + b (lazy evaluation): + [[ 0. 1.25 2.5 ] + [ 3.75 5. 6.25] + [ 7.5 8.75 10. ]] + """ + pass + + @abstractmethod + def __getitem__(self, item: int | slice | Sequence[slice]) -> np.ndarray: + """ + Return a numpy.ndarray containing the evaluation of the :ref:`LazyArray`. + + Parameters + ---------- + item: int, slice or sequence of slices + If provided, item is used to slice the operands *prior* to computation; not to retrieve specified slices of + the evaluated result. This difference between slicing operands and slicing the final expression + is important when reductions or a where clause are used in the expression. + + Returns + ------- + out: np.ndarray + An array with the data containing the evaluated slice. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> dtype = np.float64 + >>> shape = [30, 4] + >>> size = shape[0] * shape[1] + >>> a = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape) + >>> b = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape) + >>> # Convert numpy arrays to Blosc2 arrays + >>> a1 = blosc2.asarray(a) + >>> b1 = blosc2.asarray(b) + >>> # Perform the mathematical operation + >>> expr = a1 + b1 # LazyExpr expression + >>> expr[3] + [2.01680672 2.18487395 2.35294118 2.5210084 ] + >>> expr[2:4] + [[1.34453782 1.51260504 1.68067227 1.8487395 ] + [2.01680672 2.18487395 2.35294118 2.5210084 ]] + """ + pass + + @abstractmethod + def save(self, **kwargs: Any) -> None: + """ + Save the :ref:`LazyArray` on disk. + + Parameters + ---------- + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + The `urlpath` must always be provided. + + Returns + ------- + out: None + + Notes + ----- + * All the operands of the LazyArray must be Python scalars, or :ref:`blosc2.Array` objects. + * If an operand is a :ref:`Proxy`, keep in mind that Python-Blosc2 will only be able to reopen it as such + if its source is a :ref:`SChunk`, :ref:`NDArray` or a :ref:`C2Array` (see :func:`blosc2.open` notes + section for more info). + * This is currently only supported for :ref:`LazyExpr` and :ref:`LazyUDF` + (including kernels decorated with :func:`blosc2.dsl_kernel`). + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> dtype = np.float64 + >>> shape = [3, 3] + >>> size = shape[0] * shape[1] + >>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) + >>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) + >>> # Define file paths for storing the arrays + >>> a1 = blosc2.asarray(a, urlpath='a_array.b2nd', mode='w') + >>> b1 = blosc2.asarray(b, urlpath='b_array.b2nd', mode='w') + >>> # Perform the mathematical operation to create a LazyExpr expression + >>> expr = a1 + b1 + >>> # Save the LazyExpr to disk + >>> expr.save(urlpath='lazy_array.b2nd', mode='w') + >>> # Open and load the LazyExpr from disk + >>> disk_expr = blosc2.open('lazy_array.b2nd') + >>> disk_expr[:2] + [[0. 1.25 2.5 ] + [3.75 5. 6.25]] + """ + pass + + # Provide a way to serialize the LazyArray + def to_cframe(self) -> bytes: + """ + Compute LazyArray and convert to cframe. + + Returns + ------- + out: bytes + The buffer containing the serialized :ref:`NDArray` instance. + """ + return self.compute().to_cframe() + + @abstractproperty + def chunks(self) -> tuple[int]: + """ + Return :ref:`LazyArray` chunks. + """ + pass + + @abstractproperty + def blocks(self) -> tuple[int]: + """ + Return :ref:`LazyArray` blocks. + """ + pass + + def get_chunk(self, nchunk): + """Get the `nchunk` of the expression, evaluating only that one.""" + # Create an empty array with the chunkshape and dtype; this is fast + shape = self.shape + chunks = self.chunks + # Calculate the shape of the (chunk) slice_ (especially at the end of the array) + chunks_idx, _ = get_chunks_idx(shape, chunks) + coords = tuple(np.unravel_index(nchunk, chunks_idx)) + slice_ = tuple( + slice(c * s, min((c + 1) * s, shape[i])) + for i, (c, s) in enumerate(zip(coords, chunks, strict=True)) + ) + loc_chunks = tuple(s.stop - s.start for s in slice_) + out = blosc2.empty(shape=self.chunks, dtype=self.dtype, chunks=self.chunks, blocks=self.blocks) + if loc_chunks == self.chunks: + self.compute(item=slice_, out=out) + else: + _slice_ = tuple(slice(0, s) for s in loc_chunks) + out[_slice_] = self.compute(item=slice_) + return out.schunk.get_chunk(0) + + +def convert_inputs(inputs): + if not inputs or len(inputs) == 0: + return [] + inputs_ = [] + for obj in inputs: + if not isinstance(obj, (np.ndarray, blosc2.Operand)) and not np.isscalar(obj): + try: + obj = blosc2.SimpleProxy(obj) + except Exception: + print( + "Inputs not being np.ndarray, Array or Python scalar objects" + " should be convertible to SimpleProxy." + ) + raise + inputs_.append(obj) + return inputs_ + + +def compute_broadcast_shape(arrays): + """ + Returns the shape of the outcome of an operation with the input arrays. + """ + # When dealing with UDFs, one can arrive params that are not arrays + shapes = [arr.shape for arr in arrays if hasattr(arr, "shape") and arr is not np] + return np.broadcast_shapes(*shapes) if shapes else None + + +# Define the patterns for validation +validation_patterns = [ + r"[\;]", # Flow control characters + r"(^|[^\w])__[\w]+__($|[^\w])", # Dunder methods + r"\.\b(?!real|imag|T|mT|(\d*[eE]?[+-]?\d+)|(\d*[eE]?[+-]?\d+j)|\d*j\b|(sum|prod|min|max|std|mean|var|any|all|where)" + r"\s*\([^)]*\)|[a-zA-Z_]\w*\s*\([^)]*\))", # Attribute patterns +] + +# Compile the blacklist regex +_blacklist_re = re.compile("|".join(validation_patterns)) + +# Define valid method names +valid_methods = { + "sum", + "prod", + "min", + "max", + "std", + "mean", + "var", + "any", + "all", + "where", + "reshape", + "slice", +} +valid_methods |= {"int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64"} +valid_methods |= {"float32", "float64", "complex64", "complex128"} +valid_methods |= {"bool", "str", "bytes"} +valid_methods |= { + name for name in dir(blosc2.NDArray) if not name.startswith("_") +} # allow attributes and methods + + +def validate_expr(expr: str) -> None: + """ + Validate expression for forbidden syntax and valid method names. + + Parameters + ---------- + expr : str + The expression to validate. + + Returns + ------- + None + """ + # Remove whitespace and skip quoted strings + no_whitespace = re.sub(r"\s+", "", expr) + skip_quotes = re.sub(r"(\'[^\']*\')", "", no_whitespace) + + # Check for forbidden patterns + forbiddens = _blacklist_re.search(skip_quotes) + if forbiddens is not None: + raise ValueError(f"'{expr}' is not a valid expression.") + + # Check for invalid characters not covered by the tokenizer + invalid_chars = re.compile(r"[^\w\s+\-*/%()[].,=<>!&|~^]") + if invalid_chars.search(skip_quotes) is not None: + invalid_chars = invalid_chars.findall(skip_quotes) + raise ValueError(f"Expression {expr} contains invalid characters: {invalid_chars}") + + # Check for invalid method names + method_calls = re.findall(r"\.\b(\w+)\s*\(", skip_quotes) + for method in method_calls: + if method not in valid_methods: + raise ValueError(f"Invalid method name: {method}") + + +def extract_and_replace_slices(expr, operands): + """ + Return new expression and operands with op.slice(...) replaced by temporary operands. + """ + # Copy shapes and operands + shapes = {k: () if not hasattr(v, "shape") else v.shape for k, v in operands.items()} + new_ops = operands.copy() # copy dictionary + + # Parse the expression + tree = ast.parse(expr, mode="eval") + + # Mapping of AST nodes to new variable names + replacements = {} + + class SliceCollector(ast.NodeTransformer): + def visit_Call(self, node): + # Recursively visit children first + self.generic_visit(node) + + # Detect method calls: obj.slice(...) + if isinstance(node.func, ast.Attribute) and node.func.attr == "slice": + obj = node.func.value + + # If the object is already replaced, keep the replacement + base_name = None + if isinstance(obj, ast.Name): + base_name = obj.id + elif isinstance(obj, ast.Call) and obj in replacements: + base_name = replacements[obj]["base_var"] + + # Build the full slice chain expression as a string + full_expr = ast.unparse(node) + + # Create a new temporary variable + new_var = f"o{len(new_ops)}" + + # Infer shape + try: + shape = infer_shape(full_expr, shapes) + except Exception as e: + print(f"Shape inference failed for {full_expr}: {e}") + shape = () + + # Determine dtype + dtype = new_ops[base_name].dtype if base_name else None + + # Create placeholder array + if isinstance(new_ops[base_name], blosc2.NDArray): + new_op = blosc2.ones((1,) * len(shape), dtype=dtype) + else: + new_op = np.ones((1,) * len(shape), dtype=dtype) + + new_ops[new_var] = new_op + shapes[new_var] = shape + + # Record replacement + replacements[node] = {"new_var": new_var, "base_var": base_name} + + # Replace the AST node with the new variable + return ast.Name(id=new_var, ctx=ast.Load()) + + return node + + # Transform the AST + transformer = SliceCollector() + new_tree = transformer.visit(tree) + ast.fix_missing_locations(new_tree) + + # Convert back to expression string + new_expr = ast.unparse(new_tree) + + return new_expr, new_ops + + +def get_expr_operands(expression: str) -> set: + """ + Given an expression in string form, return its operands. + + Parameters + ---------- + expression : str + The expression in string form. + + Returns + ------- + set + A set of operands found in the expression. + """ + + class OperandVisitor(ast.NodeVisitor): + def __init__(self): + self.operands = set() + self.function_names = set() + + def visit_Name(self, node): + if node.id == "np": + # Skip NumPy namespace (e.g. np.int8, which will be treated separately) + return + if node.id not in self.function_names and node.id not in dtype_symbols: + self.operands.add(node.id) + self.generic_visit(node) + + def visit_Call(self, node): + if isinstance(node.func, ast.Name): + self.function_names.add(node.func.id) + self.generic_visit(node) + + tree = ast.parse(expression) + visitor = OperandVisitor() + visitor.visit(tree) + return set(visitor.operands) + + +def conserve_functions( # noqa: C901 + expression: str, + operands_old: dict[str, blosc2.Array], + operands_new: dict[str, blosc2.Array], +) -> tuple[str, dict[str, blosc2.Array]]: + """ + Given an expression in string form, return its operands. + + Parameters + ---------- + expression : str + The expression in string form. + + operands_old: dict[str : blosc2.ndarray | blosc2.LazyExpr] + Dict of operands from expression prior to eval. + + operands_new: dict[str : blosc2.ndarray | blosc2.LazyExpr] + Dict of operands from expression after eval. + Returns + ------- + newexpression + A modified string expression with the functions/constructors conserved and + true operands rebased and written in o- notation. + newoperands + Dict of the set of rebased operands. + """ + + operand_to_key = {id(v): k for k, v in operands_new.items()} + for k, v in operands_old.items(): # extend operands_to_key with old operands + if isinstance( + v, blosc2.LazyExpr + ): # unroll operands in LazyExpr (only necessary when have reduced a lazyexpr) + d = v.operands + else: + d = {k: v} + for newk, newv in d.items(): + try: + operand_to_key[id(newv)] + except KeyError: + newk = ( + f"o{len(operands_new)}" if newk in operands_new else newk + ) # possible that names coincide + operand_to_key[id(newv)] = newk + operands_new[newk] = newv + + class OperandVisitor(ast.NodeVisitor): + def __init__(self): + self.operandmap = {} + self.operands = {} + self.opcounter = 0 + self.function_names = set() + + def update_func(self, localop): + k = operand_to_key[id(localop)] + if k not in self.operandmap: + newkey = f"o{self.opcounter}" + self.operands[newkey] = operands_new[k] + self.operandmap[k] = newkey + self.opcounter += 1 + return newkey + else: + return self.operandmap[k] + + def visit_Name(self, node): + if node.id == "np": # Skip NumPy namespace (e.g. np.int8, which will be treated separately) + return + if node.id in self.function_names: # Skip function names + return + elif node.id not in dtype_symbols: + localop = operands_old[node.id] + if isinstance(localop, blosc2.LazyExpr): + newexpr = localop.expression + for ( + opname, + v, + ) in localop.operands.items(): # expression operands already in terms of basic operands + # add illegal character ; to track changed operands and not overwrite later + newopname = ";" + self.update_func(v) + newexpr = re.sub( + rf"(?<=\s){opname}|(?<=\(){opname}", newopname, newexpr + ) # replace with newopname + # remove all instances of ; as all changes completed + node.id = newexpr.replace(";", "") + else: + node.id = self.update_func(localop) + self.generic_visit(node) + + def visit_Call(self, node): + if isinstance( + node.func, ast.Name + ): # visits Call first, then Name, so don't increment operandcounter yet + self.function_names.add(node.func.id) + self.generic_visit(node) + + tree = ast.parse(expression) + visitor = OperandVisitor() + visitor.visit(tree) + newexpression, newoperands = ast.unparse(tree), visitor.operands + return newexpression, newoperands + + +def convert_to_slice(expression): + """ + Takes expression and converts all instances of [] to .slice(....) + + Parameters + ---------- + expression: str + + Returns + ------- + new_expr : str + """ + + new_expr = "" + skip_to_char = 0 + for i, expr_i in enumerate(expression): + if i < skip_to_char: + continue + if expr_i == "[": + k = expression[i:].find("]") # start checking from after [ + slice_convert = expression[i : i + k + 1] # include [ and ] + try: + slicer = eval(f"np.s_{slice_convert}") + slicer = (slicer,) if not isinstance(slicer, tuple) else slicer # standardise to tuple + if any(isinstance(el, str) for el in slicer): # handle fields + raise ValueError("Cannot handle fields for slicing lazy expressions.") + slicer = str(slicer) + # use slice so that lazyexpr uses blosc arrays internally + # (and doesn't decompress according to getitem syntax) + new_expr += f".slice({slicer})" + skip_to_char = i + k + 1 + continue + except Exception: + pass + new_expr += expr_i # if slice_convert is e.g. a list, not a slice, do nothing + return new_expr + + +class TransformNumpyCalls(ast.NodeTransformer): + def __init__(self): + self.replacements = {} + self.tmp_counter = 0 + + def visit_Call(self, node): + # Check if the call is a numpy type-casting call + if ( + isinstance(node.func, ast.Attribute) + and isinstance(node.func.value, ast.Name) + and node.func.value.id in ["np", "numpy"] + and isinstance(node.args[0], ast.Constant) + ): + # Create a new temporary variable name + tmp_var = f"tmp{self.tmp_counter}" + self.tmp_counter += 1 + + # Evaluate the type-casting call to create the new variable's value + numpy_type = getattr(np, node.func.attr) + self.replacements[tmp_var] = numpy_type(node.args[0].value) + + # Replace the call node with a variable node + return ast.copy_location(ast.Name(id=tmp_var, ctx=ast.Load()), node) + return self.generic_visit(node) + + +def extract_numpy_scalars(expr: str): + # Parse the expression into an AST + tree = ast.parse(expr, mode="eval") + + # Transform the AST + transformer = TransformNumpyCalls() + transformed_tree = transformer.visit(tree) + + # Generate the modified expression + transformed_expr = ast.unparse(transformed_tree) + + return transformed_expr, transformer.replacements + + +def _isscalar(arr): + return np.isscalar(arr) or (hasattr(arr, "shape") and arr.shape == ()) + + +def validate_inputs(inputs: dict, out=None, reduce=False) -> tuple: # noqa: C901 + """Validate the inputs for the expression.""" + if not inputs: + if out is None: + raise ValueError( + "You really want to pass at least one input or one output for building a LazyArray." + " Maybe you want blosc2.empty() instead?" + ) + if isinstance(out, blosc2.NDArray): + return out.shape, out.chunks, out.blocks, True + else: + return out.shape, None, None, True + + raw_inputs = [input_ for input_ in inputs.values() if (input_ is not np and not _isscalar(input_))] + if not raw_inputs: + # Scalar-only expressions have scalar output shape but can use miniexpr + return (), None, None, True + + # This will raise an exception if the input shapes are not compatible + shape = compute_broadcast_shape(raw_inputs) + + if not all(np.array_equal(shape, input.shape) for input in raw_inputs): + # If inputs have different shapes (other than scalars), we cannot take the fast path + return shape, None, None, False + + # More checks specific to NDArray inputs + # NDInputs are either non-SimpleProxy with chunks or are SimpleProxy with src having chunks + NDinputs = [ + input + for input in raw_inputs + if (hasattr(input, "chunks") and not isinstance(input, blosc2.SimpleProxy)) + or (isinstance(input, blosc2.SimpleProxy) and hasattr(input.src, "chunks")) + ] + if not NDinputs: + # All inputs are NumPy arrays, so we cannot take the fast path + if raw_inputs and hasattr(raw_inputs[0], "shape"): + shape = raw_inputs[0].shape + else: + shape = None + return shape, None, None, False + + # Check if we can take the fast path + # For this we need that the chunks and blocks for all inputs (and a possible output) + # are the same + fast_path = True + first_input = NDinputs[0] + # Check the out NDArray (if present) first + if isinstance(out, blosc2.NDArray) and not reduce: + if first_input.shape != out.shape: + return None, None, None, False + if first_input.chunks != out.chunks: + fast_path = False + if first_input.blocks != out.blocks: + fast_path = False + if 0 in out.chunks: # fast_eval has zero division error for 0 shapes + fast_path = False + # Then, the rest of the operands + for input_ in NDinputs: + if first_input.chunks != input_.chunks: + fast_path = False + if first_input.blocks != input_.blocks: + fast_path = False + if 0 in input_.chunks: # fast_eval has zero division error for 0 shapes + fast_path = False + + return first_input.shape, first_input.chunks, first_input.blocks, fast_path + + +def is_full_slice(item): + """Check whether the slice represented by item is a full slice.""" + if item == (): + # This is the case when the user does not pass any slice in compute() method + return True + if isinstance(item, tuple): + return all((isinstance(i, slice) and i == slice(None, None, None)) or i == Ellipsis for i in item) + elif isinstance(item, int | bool): + return False + else: + return item in (slice(None, None, None), Ellipsis) + + +def do_slices_intersect(slice1: list | tuple, slice2: list | tuple) -> bool: + """ + Check whether two slices intersect. + + Parameters + ---------- + slice1: list of slices + The first slice + slice2: list of slices + The second slice + + Returns + ------- + bool + Whether the slices intersect + """ + + # Pad the shorter slice list with full slices (:) + while len(slice1) < len(slice2): + slice1.append(slice(None)) + while len(slice2) < len(slice1): + slice2.append(slice(None)) + + # Check each dimension for intersection + for s1, s2 in zip(slice1, slice2, strict=True): + if s1 is Ellipsis or s2 is Ellipsis: + return True + if s1.start >= s2.stop: + return False + if s1.stop <= s2.start: + return False + + return True + + +def get_chunk(arr, info, nchunk): + reduc, aligned, low_mem, chunks_idx = info + + if low_mem: + # We don't want to uncompress the chunk, so keep it compressed and + # decompress it just before execution. This is normally slower, but + # can be useful in scarce memory situations. + return arr.schunk.get_chunk(nchunk) + + # First check if the chunk is a special zero chunk. + # Using lazychunks is very effective here because we only need to read the header. + if reduc: + # Reductions can treat zero scalars as zero chunks + chunk = arr.schunk.get_lazychunk(nchunk) + special = blosc2.SpecialValue((chunk[31] & 0x70) >> 4) + if special == blosc2.SpecialValue.ZERO: + return np.zeros((), dtype=arr.dtype) + + shape, chunks = arr.shape, arr.chunks + coords = tuple(np.unravel_index(nchunk, chunks_idx)) + slice_ = tuple( + # slice(c * s, min((c + 1) * s, shape)) # uncomment to make code hang here + slice(c * s, min((c + 1) * s, shape[i])) + for i, (c, s) in enumerate(zip(coords, chunks, strict=True)) + ) + chunks_ = tuple(s.stop - s.start for s in slice_) + + if aligned: + # Decompress the whole chunk and return it + buff = arr.schunk.decompress_chunk(nchunk) + bsize = arr.dtype.itemsize * math.prod(chunks_) + return np.frombuffer(buff[:bsize], dtype=arr.dtype).reshape(chunks_) + + return arr[slice_] + + +async def async_read_chunks(arrs, info, queue): + loop = asyncio.get_event_loop() + shape, chunks_ = arrs[0].shape, arrs[0].chunks + with concurrent.futures.ThreadPoolExecutor() as executor: + my_chunk_iter = range(arrs[0].schunk.nchunks) + if len(info) == 5: + if info[-1] is not None: + my_chunk_iter = _sliced_chunk_iter(chunks_, (), shape, axis=info[-1], nchunk=True) + info = info[:4] + for i, nchunk in enumerate(my_chunk_iter): + futures = [ + (index, loop.run_in_executor(executor, get_chunk, arr, info, nchunk)) + for index, arr in enumerate(arrs) + ] + chunks = await asyncio.gather(*(future for index, future in futures), return_exceptions=True) + chunks_sorted = [] + for chunk in chunks: + if isinstance(chunk, Exception): + # Handle the exception (e.g., log it, raise a custom exception, etc.) + print(f"Exception occurred: {chunk}") + raise chunk + chunks_sorted.append(chunk) + queue.put((i, chunks_sorted)) # use non-async queue.put() + + queue.put(None) # signal the end of the chunks + + +def async_read_chunks_thread(arrs, info, queue): + asyncio.run(async_read_chunks(arrs, info, queue)) + + +def sync_read_chunks(arrs, info): + queue_size = 2 # maximum number of chunks in the queue + queue = Queue(maxsize=queue_size) + + # Start the async file reading in a separate thread + thread = threading.Thread(target=async_read_chunks_thread, args=(arrs, info, queue)) + thread.start() + + # Read the chunks synchronously from the queue + while True: + try: + chunks = queue.get(timeout=1) # Wait for the next chunk + if chunks is None: # End of chunks + break + yield chunks + except Empty: + continue + + +def read_nchunk(arrs, info): + for _, chunks in sync_read_chunks(arrs, info): + yield chunks + + +iter_chunks = None + + +def fill_chunk_operands( + operands, slice_, chunks_, full_chunk, aligned, nchunk, iter_disk, chunk_operands, reduc=False, axis=None +): + """Retrieve the chunk operands for evaluating an expression. + + This function provides an optimized path for full chunks and a slower path for partial chunks. + """ + global iter_chunks + + if iter_disk: + # Use an environment variable to control the memory usage + low_mem = os.environ.get("BLOSC_LOW_MEM", False) + # This method is only useful when all operands are NDArray and shows better + # performance only when at least one of them is persisted on disk + if iter_chunks is None: + # Initialize the iterator for reading the chunks + # Take any operand (all should have the same shape and chunks) + key, arr = next(iter(operands.items())) + chunks_idx, _ = get_chunks_idx(arr.shape, arr.chunks) + info = (reduc, aligned[key], low_mem, chunks_idx, axis) + iter_chunks = read_nchunk(list(operands.values()), info) + # Run the asynchronous file reading function from a synchronous context + chunks = next(iter_chunks) + + for i, (key, value) in enumerate(operands.items()): + # Chunks are already decompressed, so we can use them directly + if not low_mem: + if full_chunk: + chunk_operands[key] = chunks[i] + else: + chunk_operands[key] = value[slice_] + continue + # Otherwise, we need to decompress them + if aligned[key]: + buff = blosc2.decompress2(chunks[i]) + bsize = value.dtype.itemsize * math.prod(chunks_) + chunk_operands[key] = np.frombuffer(buff[:bsize], dtype=value.dtype).reshape(chunks_) + else: + chunk_operands[key] = value[slice_] + return + + # Get the starts and stops for the slice + starts = [s.start if s.start is not None else 0 for s in slice_] + stops = [s.stop if s.stop is not None else sh for s, sh in zip(slice_, chunks_, strict=True)] + + for key, value in operands.items(): + if np.isscalar(value): + chunk_operands[key] = value + continue + if value.shape == (): + chunk_operands[key] = value[()] + continue + if not full_chunk or not isinstance(value, blosc2.NDArray): + # The chunk is not a full one, or has padding, or is not a blosc2.NDArray, + # so we need to go the slow path + chunk_operands[key] = value[slice_] + continue + + # If key is in operands, we can reuse the buffer + if ( + key in chunk_operands + and chunks_ == chunk_operands[key].shape + and isinstance(value, blosc2.NDArray) + ): + value.get_slice_numpy(chunk_operands[key], (starts, stops)) + continue + + if aligned[key]: + # Decompress the whole chunk and store it + buff = value.schunk.decompress_chunk(nchunk) + bsize = value.dtype.itemsize * math.prod(chunks_) + chunk_operands[key] = np.frombuffer(buff[:bsize], dtype=value.dtype).reshape(chunks_) + else: + chunk_operands[key] = value[slice_] + + +def _apply_jit_backend_pragma(expression: str, inputs: dict, jit_backend: str | None) -> str: + if jit_backend is None: + return expression + if jit_backend not in ("tcc", "cc"): + raise ValueError("jit_backend must be one of: None, 'tcc', 'cc'") + + pragma = f"# me:compiler={jit_backend}\n" + stripped = expression.lstrip() + if stripped.startswith("def "): + if "# me:compiler=" in expression: + return expression + return pragma + expression + params = ", ".join(k for k, v in inputs.items() if hasattr(v, "dtype")) + return f"{pragma}def __me_auto({params}):\n return {expression}" + + +def _inject_dummy_param_for_zero_input_dsl(expression: str, param_name: str) -> str: + pattern = re.compile(r"^(\s*def\s+[A-Za-z_]\w*)\(\s*\)(\s*:)", re.MULTILINE) + rewritten, nsubs = pattern.subn(rf"\1({param_name})\2", expression, count=1) + if nsubs == 0: + raise ValueError("Could not inject dummy DSL parameter for zero-input kernel") + return rewritten + + +def _is_dsl_kernel_expression(expression) -> bool: + return isinstance(expression, DSLKernel) and expression.dsl_source is not None + + +def _dsl_miniexpr_required_message(reason: str | None = None) -> str: + message = "DSL kernel requires miniexpr." + if reason: + message = f"{message} {reason}" + return message + + +def _raise_dsl_miniexpr_required(reason: str | None = None) -> None: + raise RuntimeError(_dsl_miniexpr_required_message(reason)) + + +def fast_eval( # noqa: C901 + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], + operands: dict, + getitem: bool, + **kwargs, +) -> blosc2.NDArray | np.ndarray: + """Evaluate the expression in chunks of operands using a fast path. + + Parameters + ---------- + expression: str or callable + The expression or udf to evaluate. + operands: dict + A dictionary containing the operands for the expression. + getitem: bool, optional + Indicates whether the expression is being evaluated for a getitem operation or compute(). + Default is False. + kwargs: Any, optional + Additional keyword arguments supported by the :func:`empty` constructor. + + Returns + ------- + :ref:`NDArray` or np.ndarray + The output array. + """ + global try_miniexpr + + # Use a local copy so we don't modify the global + use_miniexpr = try_miniexpr + + is_dsl = _is_dsl_kernel_expression(expression) + expr_string = expression.dsl_source if is_dsl else expression + dsl_disable_reason = None + + # Disable miniexpr for UDFs (callable expressions), except DSL kernels + if callable(expression) and not is_dsl: + use_miniexpr = False + + out = kwargs.pop("_output", None) + ne_args: dict = kwargs.pop("_ne_args", {}) + if ne_args is None: + ne_args = {} + fp_accuracy = kwargs.pop("fp_accuracy", blosc2.FPAccuracy.DEFAULT) + jit = kwargs.pop("jit", None) + jit_backend = kwargs.pop("jit_backend", None) + strict_miniexpr = kwargs.pop("strict_miniexpr", None) + _in_place = kwargs.pop("in_place", None) + dtype = kwargs.pop("dtype", None) + requested_shape = kwargs.pop("shape", None) + where: dict | None = kwargs.pop("_where_args", None) + if strict_miniexpr is None: + # Be strict by default for DSL kernels to avoid silently losing DSL fast-path regressions. + strict_miniexpr = bool(is_dsl) + if where is not None: + # miniexpr does not support where(); use the regular path. + use_miniexpr = False + if is_dsl: + dsl_disable_reason = "DSL kernels cannot be run without miniexpr." + if isinstance(out, blosc2.NDArray): + # If 'out' has been passed, and is a NDArray, use it as the base array + basearr = out + elif isinstance(out, np.ndarray): + # If 'out' is a NumPy array, create a NDArray with the same shape and dtype + basearr = blosc2.empty(out.shape, dtype=out.dtype, **kwargs) + else: + # Otherwise, find the operand with the 'chunks' attribute and the longest shape + operands_with_chunks = [o for o in operands.values() if hasattr(o, "chunks")] + if operands_with_chunks and any(arr.chunks != () for arr in operands_with_chunks): + basearr = max(operands_with_chunks, key=lambda x: len(x.shape)) + else: + if requested_shape is None: + raise ValueError("Cannot infer output shape without operands; pass `shape` explicitly") + if dtype is None: + raise ValueError("Cannot infer output dtype without operands; pass `dtype` explicitly") + basearr = blosc2.empty( + requested_shape, + dtype=dtype, + chunks=kwargs.get("chunks"), + blocks=kwargs.get("blocks"), + ) + + # Get the shape of the base array + shape = basearr.shape + chunks = kwargs.pop("chunks", None) + if chunks is None: + chunks = basearr.chunks + blocks = kwargs.pop("blocks", None) + if blocks is None: + blocks = basearr.blocks + # Check whether the partitions are aligned and behaved + aligned = { + k: False if not hasattr(k, "chunks") else blosc2.are_partitions_aligned(k.shape, k.chunks, k.blocks) + for k in operands + } + behaved = blosc2.are_partitions_behaved(shape, chunks, blocks) + + # Check that all operands are NDArray for fast path + all_ndarray = all(isinstance(value, blosc2.NDArray) and value.shape != () for value in operands.values()) + # Check that there is some NDArray that is persisted in the disk + any_persisted = any( + (isinstance(value, blosc2.NDArray) and value.shape != () and value.schunk.urlpath is not None) + for value in operands.values() + ) + if not blosc2.IS_WASM: + iter_disk = all_ndarray and any_persisted + else: + # WebAssembly does not support threading, so we cannot use the iter_disk option + iter_disk = False + + expr_string_miniexpr = expr_string + operands_miniexpr = operands + if use_miniexpr and isinstance(expr_string, str): + try: + expr_string_miniexpr, operands_miniexpr = specialize_miniexpr_inputs(expr_string, operands) + except Exception: + # If specialization fails, keep original expression/operands and let normal checks decide. + expr_string_miniexpr = expr_string + operands_miniexpr = operands + + # Check whether we can use miniexpr + if use_miniexpr: + if is_dsl and isinstance(expression, DSLKernel) and not operands_miniexpr: + # Scalar specialization may remove all kernel inputs at runtime (e.g. `f(start)` with start=3), + # so inject a dummy array operand for miniexpr even if the original DSL signature had parameters. + dummy_name = "__me_dummy0" + dummy_dtype = dtype if dtype is not None else np.uint8 + expr_string_miniexpr = _inject_dummy_param_for_zero_input_dsl(expr_string_miniexpr, dummy_name) + operands_miniexpr = { + dummy_name: blosc2.zeros(shape, dtype=dummy_dtype, chunks=chunks, blocks=blocks) + } + if math.prod(shape) <= 1: + # Avoid miniexpr for scalar-like outputs; current prefilter path is unstable here. + use_miniexpr = False + if is_dsl and dsl_disable_reason is None: + dsl_disable_reason = "scalar-like outputs are not supported by the DSL miniexpr path." + if ( + isinstance(expr_string_miniexpr, str) + and + # Prefix scans are stateful across chunks and not safe for miniexpr prefilter execution. + any(tok in expr_string_miniexpr for tok in ("cumsum(", "cumprod(", "cumulative_sum(")) + ): + use_miniexpr = False + if is_dsl and dsl_disable_reason is None: + dsl_disable_reason = "cumulative scans are not supported by the DSL miniexpr path." + if isinstance(expr_string_miniexpr, str): + expr_string_miniexpr = _apply_jit_backend_pragma( + expr_string_miniexpr, operands_miniexpr, jit_backend + ) + all_ndarray_miniexpr = all( + isinstance(value, blosc2.NDArray) and value.shape != () for value in operands_miniexpr.values() + ) + # Require aligned NDArray operands with identical chunk/block grid. + same_shape = all(hasattr(op, "shape") and op.shape == shape for op in operands_miniexpr.values()) + same_chunks = all(hasattr(op, "chunks") and op.chunks == chunks for op in operands_miniexpr.values()) + same_blocks = all(hasattr(op, "blocks") and op.blocks == blocks for op in operands_miniexpr.values()) + if not (same_shape and same_chunks and same_blocks): + use_miniexpr = False + if is_dsl and dsl_disable_reason is None: + dsl_disable_reason = "all DSL operands must share shape/chunks/blocks." + if not (all_ndarray_miniexpr and out is None): + use_miniexpr = False + if is_dsl and dsl_disable_reason is None: + dsl_disable_reason = ( + "DSL kernels require NDArray inputs and do not support the `out` argument." + ) + has_complex = any( + isinstance(op, blosc2.NDArray) and blosc2.isdtype(op.dtype, "complex floating") + for op in operands_miniexpr.values() + ) + if isinstance(expr_string_miniexpr, str) and has_complex: + if sys.platform == "win32" or blosc2.IS_WASM: + # On Windows and WebAssembly, miniexpr has issues with complex numbers + use_miniexpr = False + if is_dsl and dsl_disable_reason is None: + dsl_disable_reason = "complex DSL kernels are disabled on Windows and WebAssembly." + if any(tok in expr_string_miniexpr for tok in ("!=", "==", "<=", ">=", "<", ">")): + use_miniexpr = False + if is_dsl and dsl_disable_reason is None: + dsl_disable_reason = "complex comparisons are not supported by miniexpr." + + if is_dsl and not use_miniexpr: + _raise_dsl_miniexpr_required(dsl_disable_reason) + + if use_miniexpr: + cparams = kwargs.pop("cparams", blosc2.CParams()) + # All values will be overwritten, so we can use an uninitialized array + res_eval = blosc2.uninit(shape, dtype, chunks=chunks, blocks=blocks, cparams=cparams, **kwargs) + prefilter_set = False + try: + res_eval._set_pref_expr( + expr_string_miniexpr, + operands_miniexpr, + fp_accuracy=fp_accuracy, + jit=jit, + ) + prefilter_set = True + # print("expr->miniexpr:", expression, fp_accuracy) + # Data to compress is fetched from operands, so it can be uninitialized here + data = np.empty(res_eval.schunk.chunksize, dtype=np.uint8) + # Exercise prefilter for each chunk + for nchunk in range(res_eval.schunk.nchunks): + res_eval.schunk.update_data(nchunk, data, copy=False) + except Exception as e: + use_miniexpr = False + if is_dsl: + reason = "miniexpr compilation or execution failed for this DSL kernel." + if isinstance(expression, DSLKernel): + report = validate_dsl(expression) + if not report["valid"] and report["error"]: + reason = report["error"] + else: + reason = f"{reason}\nBackend error: {e}" + raise RuntimeError(_dsl_miniexpr_required_message(reason)) from e + if strict_miniexpr: + raise RuntimeError("miniexpr evaluation failed while strict_miniexpr=True") from e + finally: + if prefilter_set: + res_eval.schunk.remove_prefilter("miniexpr") + global iter_chunks + # Ensure any background reading thread is closed + iter_chunks = None + + if not use_miniexpr: + # If miniexpr failed, fallback to regular evaluation + # (continue to the manual chunked evaluation below) + pass + else: + if getitem: + return res_eval[()] + return res_eval + + chunk_operands = {} + # Check which chunks intersect with _slice + all_chunks = get_intersecting_chunks((), shape, chunks) # if _slice is (), returns all chunks + for nchunk, chunk_slice in enumerate(all_chunks): + cslice = chunk_slice.raw + offset = tuple(s.start for s in cslice) # offset for the udf + chunks_ = tuple(s.stop - s.start for s in cslice) + + full_chunk = chunks_ == chunks # slice is same as chunk + fill_chunk_operands( + operands, cslice, chunks_, full_chunk, aligned, nchunk, iter_disk, chunk_operands + ) + + # Since ne_evaluate() can return a dtype larger than the one in computed in the expression, + # we cannot take this fast path + # if isinstance(out, np.ndarray) and not where: + # # Fast path: put the result straight in the output array (avoiding a memory copy) + # if callable(expression): + # expression(tuple(chunk_operands.values()), out[slice_], offset=offset) + # else: + # ne_evaluate(expression, chunk_operands, out=out[slice_]) + # continue + if out is None: + # We can enter here when using any of the compute() or __getitem__() methods + if getitem: + out = np.empty(shape, dtype=dtype) + else: + out = blosc2.empty(shape, chunks=chunks, blocks=blocks, dtype=dtype, **kwargs) + + if callable(expression): + if _is_dsl_kernel_expression(expression): + _raise_dsl_miniexpr_required( + "internal fallback attempted to execute the DSL kernel directly in Python." + ) + if _in_place: + expression(tuple(chunk_operands.values()), out, offset=offset) + continue + result = np.empty(chunks_, dtype=out.dtype) + expression(tuple(chunk_operands.values()), result, offset=offset) + else: + if where is None: + result = ne_evaluate(expression, chunk_operands, **ne_args) + else: + # Apply the where condition (in result) + if len(where) == 2: + new_expr = f"where({expression}, _where_x, _where_y)" + result = ne_evaluate(new_expr, chunk_operands, **ne_args) + else: + # We do not support one or zero operands in the fast path yet + raise ValueError("Fast path: the where condition must be a tuple with two elements") + + # Store the result in the output array + if getitem: + try: + out[cslice] = result + except ComplexWarning: + # The result is a complex number, so we need to convert it to real. + # This is a workaround for rigidness of NumExpr with type casting. + result = result.real.astype(out.dtype) + out[cslice] = result + else: + if behaved and result.shape == chunks_ and result.dtype == out.dtype: + # Fast path only works for results that are full chunks + out.schunk.update_data(nchunk, result, copy=False) + else: + out[cslice] = result + + return out + + +def compute_start_index(shape, slice_obj): + """ + Compute the index of the starting element of a slice in an n-dimensional array. + + Parameters + ---------- + shape : tuple + The shape of the n-dimensional array. + slice_obj : tuple of slices + The slice object representing the slice of the array. + + Returns + ------- + start_index : int + The index of the starting element of the slice. + """ + if not isinstance(slice_obj, tuple): + slice_obj = (slice_obj,) + + start_index = 0 + stride = 1 + + for dim, sl in reversed(list(enumerate(slice_obj))): + if isinstance(sl, slice): + start = sl.start if sl.start is not None else 0 + elif sl is Ellipsis: + start = 0 + else: + start = sl + + start_index += start * stride + stride *= shape[dim] + + return start_index + + +def slices_eval( # noqa: C901 + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], + operands: dict, + getitem: bool, + _slice=NDINDEX_EMPTY_TUPLE, + shape=None, + **kwargs, +) -> blosc2.NDArray | np.ndarray: + """Evaluate the expression in chunks of operands. + + This function can handle operands with different chunk shapes and + can evaluate only a slice of the output array if needed. + + This is also flexible enough to work with operands of different shapes. + + Parameters + ---------- + expression: str or callable + The expression or user-defined (udf) to evaluate. + operands: dict + A dictionary containing the operands for the expression. + getitem: bool, optional + Indicates whether the expression is being evaluated for a getitem operation or compute(). + Default is False. + _slice: ndindex.Tuple sequence of slices and ints. Default = ndindex.Tuple(), optional + If provided, only the chunks that intersect with this slice + will be evaluated. + shape: tuple | None + The shape of the full (unsliced result). Typically passed on from parent LazyArray. + If None, a guess is made from broadcasting the operands. + kwargs: Any, optional + Additional keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + :ref:`NDArray` or np.ndarray + The output array. + """ + out: blosc2.NDArray | None = kwargs.pop("_output", None) + ne_args: dict = kwargs.pop("_ne_args", {}) + if ne_args is None: + ne_args = {} + chunks = kwargs.get("chunks") + where: dict | None = kwargs.pop("_where_args", None) + _indices = kwargs.pop("_indices", False) + if _indices and (not where or len(where) != 1): + raise NotImplementedError("Indices can only be used with one where condition") + _order = kwargs.pop("_order", None) + if _order is not None and not isinstance(_order, list): + # Always use a list for _order + _order = [_order] + + dtype = kwargs.pop("dtype", None) + _in_place = kwargs.pop("in_place", False) + shape_slice = None + need_final_slice = False + + # keep orig_slice + _slice = _slice.raw + orig_slice = _slice + + # Compute the shape and chunks of the output array, including broadcasting + if shape is None: # lazyudf provides shape kwarg + shape = compute_broadcast_shape(operands.values()) + + if _slice != (): + # Check whether _slice contains an integer, or any step that are not None or 1 + if any((isinstance(s, int)) for s in _slice): + need_final_slice = True + _slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice) + # shape_slice in general not equal to final shape: + # dummy dims (due to ints) will be dealt with by taking final_slice + shape_slice = ndindex.ndindex(_slice).newshape(shape) + mask_slice = np.array([isinstance(i, int) for i in orig_slice], dtype=np.bool_) + if out is not None: + shape_ = shape_slice if shape_slice is not None else shape + if shape_ != out.shape and not _in_place: + raise ValueError("Provided output shape does not match the slice shape.") + + if chunks is None: # Guess chunk shape + # Either out, or operand with `chunks`, can be used to get the chunks + operands_ = [o for o in operands.values() if hasattr(o, "chunks") and o.shape == shape] + if out is not None and hasattr(out, "chunks"): + chunks = out.chunks + elif len(operands_) > 0: + # Use the first operand with chunks to get the necessary chunking information + chunks = operands_[0].chunks + else: + # Typically, we enter here when using UDFs, and out is a NumPy array. + # Use operands to get the shape and chunks + # operand will be a 'fake' NDArray just to get the necessary chunking information + fp_accuracy = kwargs.pop("fp_accuracy", None) + temp = blosc2.empty(shape, dtype=dtype) + if fp_accuracy is not None: + kwargs["fp_accuracy"] = fp_accuracy + chunks = temp.chunks + del temp + + # The starting point for the indices of the inputs + leninputs = compute_start_index(shape, orig_slice) if orig_slice != () else 0 + lenout = 0 + behaved = False + indices_ = None + chunk_indices = None + dtype_ = np.int64 if _indices else dtype + if _order is not None: + # Get the dtype of the array to sort + dtype_ = operands["_where_x"].dtype + # Now, use only the fields that are necessary for the sorting + dtype_ = np.dtype([(f, dtype_[f]) for f in _order]) + + # Iterate over the operands and get the chunks + chunk_operands = {} + # Check which chunks intersect with _slice (handles zero chunks internally) + intersecting_chunks = get_intersecting_chunks( + _slice, shape, chunks + ) # if _slice is (), returns all chunks + ratio = ( + np.ceil(np.asarray(shape) / np.asarray(chunks)).astype(np.int64) + if 0 not in chunks + else np.asarray(shape) + ) + + for chunk_slice in intersecting_chunks: + # Check whether current cslice intersects with _slice + cslice = chunk_slice.raw + nchunk = ( + builtins.sum(c.start // chunks[i] * np.prod(ratio[i + 1 :]) for i, c in enumerate(cslice)) + if 0 not in chunks + else 0 + ) + if cslice != () and _slice != (): + # get intersection of chunk and target + cslice = step_handler(cslice, _slice) + offset = tuple(s.start for s in cslice) # offset for the udf + cslice_shape = tuple(s.stop - s.start for s in cslice) + len_chunk = math.prod(cslice_shape) + # get local index of part of out that is to be updated + cslice_subidx = ( + ndindex.ndindex(cslice).as_subindex(_slice).raw + ) # in the case _slice=(), just gives cslice + + _get_chunk_operands(operands, cslice, chunk_operands, shape) + + if out is None: + shape_ = shape_slice if shape_slice is not None else shape + if where is not None and len(where) < 2: + # The result is a linear array + shape_ = math.prod(shape_) + if getitem or _order: + out = np.empty(shape_, dtype=dtype_) + if _order: + indices_ = np.empty(shape_, dtype=np.int64) + else: + # if "chunks" not in kwargs and (where is None or len(where) == 2): + # Let's use the same chunks as the first operand (it could have been automatic too) + # out = blosc2.empty(shape_, chunks=chunks, dtype=dtype_, **kwargs) + # out = blosc2.empty(shape_, dtype=dtype_, **kwargs) + if "chunks" in kwargs and (where is not None and len(where) < 2 and len(shape_) > 1): + # Remove the chunks argument if the where condition is not a tuple with two elements + kwargs.pop("chunks") + fp_accuracy = kwargs.pop("fp_accuracy", None) + out = blosc2.empty(shape_, dtype=dtype_, **kwargs) + if fp_accuracy is not None: + kwargs["fp_accuracy"] = fp_accuracy + # Check if the in out partitions are well-behaved (i.e. no padding) + behaved = blosc2.are_partitions_behaved(out.shape, out.chunks, out.blocks) + # Evaluate the expression using chunks of operands + + if callable(expression): + if _is_dsl_kernel_expression(expression): + _raise_dsl_miniexpr_required( + "internal sliced fallback attempted to execute the DSL kernel directly in Python." + ) + if _in_place: # presumably the user knows what they're doing + # edit out in-place + expression(tuple(chunk_operands.values()), out, offset=offset) + else: + result = np.empty(cslice_shape, dtype=out.dtype) # raises error if out is None + # cslice should be equal to cslice_subidx + # Call the udf directly and use result as the output array + expression(tuple(chunk_operands.values()), result, offset=offset) + out[cslice_subidx] = result + continue + + if _indices or _order: + indices = np.arange(leninputs, leninputs + len_chunk, dtype=np.int64).reshape(cslice_shape) + leninputs += len_chunk + result, chunk_indices = _get_result(expression, chunk_operands, ne_args, where, indices, _order) + else: + result, _ = _get_result(expression, chunk_operands, ne_args, where) + # Enforce contiguity of result (necessary to fill the out array) + # but avoid copy if already contiguous + result = np.require(result, requirements="C") + + if where is None or len(where) == 2: + if behaved and result.shape == out.chunks and result.dtype == out.dtype: + # Fast path + # TODO: Check this only works when slice is () + out.schunk.update_data(nchunk, result, copy=False) + else: + try: + out[cslice_subidx] = result + except ComplexWarning: + # The result is a complex number, so we need to convert it to real. + # This is a workaround for rigidness of numpy with type casting. + result = result.real.astype(out.dtype) + out[cslice_subidx] = result + elif len(where) == 1: + lenres = len(result) + out[lenout : lenout + lenres] = result + if _order is not None: + indices_[lenout : lenout + lenres] = chunk_indices + lenout += lenres + else: + raise ValueError("The where condition must be a tuple with one or two elements") + + if where is not None and len(where) < 2: # Don't need to take final_slice since filled up from 0 index + if _order is not None: + # argsort the result following _order + new_order = np.argsort(out[:lenout]) + # And get the corresponding indices in array + out = indices_[new_order] + # Cap the output array to the actual length + if isinstance(out, np.ndarray): + out = out[:lenout] + else: + out.resize((lenout,)) + + else: # Need to take final_slice since filled up array according to slice_ for each chunk + if need_final_slice: # only called if out was None + if isinstance(out, np.ndarray): + squeeze_axis = np.where(mask_slice)[0] + squeeze_axis = np.squeeze(squeeze_axis) # handle 1d mask_slice + out = np.squeeze(out, squeeze_axis) + elif isinstance(out, blosc2.NDArray): + # It *seems* better to choose an automatic chunks and blocks for the output array + # out = out.slice(_slice, chunks=out.chunks, blocks=out.blocks) + out = out.squeeze(np.where(mask_slice)[0]) + else: + raise ValueError("The output array is not a NumPy array or a NDArray") + + return out + + +def slices_eval_getitem( + expression: str, + operands: dict, + _slice=NDINDEX_EMPTY_TUPLE, + **kwargs, +) -> np.ndarray: + """Evaluate the expression in slices of operands. + + This function can handle operands with different chunk shapes and + can evaluate only a slice of the output array if needed. + + This is a special (and much simplified) version of slices_eval() that + only works for the case we are returning a NumPy array, where is + either None or has two args, and expression is not callable. + + One inconvenient of this function is that it tries to evaluate + the whole slice in one go. For small slices, this is good, as it + is normally way more efficient. However, for larger slices this + can require large amounts of memory per operand. + + Parameters + ---------- + expression: str or callable + The expression or user-defined (udf) to evaluate. + operands: dict + A dictionary containing the operands for the expression. + _slice: ndindex.Tuple sequence of slices and ints. Default = ndindex.Tuple(), optional + If provided, this slice will be evaluated. + kwargs: Any, optional + Additional keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + :ref:`NDArray` or np.ndarray + The output array. + """ + out: np.ndarray | None = kwargs.pop("_output", None) + ne_args: dict = kwargs.pop("_ne_args", {}) + _in_place = kwargs.pop("in_place", False) + if ne_args is None: + ne_args = {} + where: dict | None = kwargs.pop("_where_args", None) + + dtype = kwargs.pop("dtype", None) + shape = kwargs.pop("shape", None) + if shape is None: + if out is None: + # Compute the shape and chunks of the output array, including broadcasting + shape = compute_broadcast_shape(operands.values()) + else: + shape = out.shape + + # compute the shape of the output array + _slice = _slice.raw + _slice_bcast = tuple(slice(i, i + 1) if isinstance(i, int) else i for i in _slice) + slice_shape = ndindex.ndindex(_slice_bcast).newshape(shape) # includes dummy dimensions + + # Get the slice of each operand + slice_operands = {} + for key, value in operands.items(): + if np.isscalar(value): + slice_operands[key] = value + continue + if value.shape == (): + slice_operands[key] = value[()] + continue + if check_smaller_shape(value.shape, shape, slice_shape, _slice_bcast): + # We need to fetch the part of the value that broadcasts with the operand + smaller_slice = compute_smaller_slice(shape, value.shape, _slice) + slice_operands[key] = value[smaller_slice] + continue + + slice_operands[key] = value[_slice] + + # Evaluate the expression using slices of operands + if callable(expression): + if _is_dsl_kernel_expression(expression): + _raise_dsl_miniexpr_required( + "internal getitem fallback attempted to execute the DSL kernel directly in Python." + ) + offset = tuple(0 if s is None else s.start for s in _slice_bcast) # offset for the udf + if _in_place: + expression(tuple(slice_operands.values()), out, offset=offset) + return out + else: + result = np.empty(slice_shape, dtype=dtype) + expression(tuple(slice_operands.values()), result, offset=offset) + else: + result, _ = _get_result(expression, slice_operands, ne_args, where) + + if out is None: # avoid copying unnecessarily + try: + return result.astype(dtype, copy=False) + except ComplexWarning: + # The result is a complex number, so we need to convert it to real. + # This is a workaround for rigidness of numpy with type casting. + return result.real.astype(dtype, copy=False) + else: + # out should always have maximal shape + out[_slice] = result + return out + + +def infer_reduction_dtype(dtype, operation): + # It may change in the future, but mostly array-api compliant + my_float = np.result_type( + dtype, np.float32 if dtype in (np.float32, np.complex64) else blosc2.DEFAULT_FLOAT + ) + if operation in {ReduceOp.SUM, ReduceOp.PROD, ReduceOp.CUMULATIVE_SUM, ReduceOp.CUMULATIVE_PROD}: + if np.issubdtype(dtype, np.bool_): + return np.int64 + if np.issubdtype(dtype, np.unsignedinteger): + return np.result_type(dtype, np.uint64) + return np.result_type(dtype, np.int64 if np.issubdtype(dtype, np.integer) else my_float) + elif operation in {ReduceOp.MEAN, ReduceOp.STD, ReduceOp.VAR}: + return my_float + elif operation in {ReduceOp.MIN, ReduceOp.MAX}: + return dtype + elif operation in {ReduceOp.ANY, ReduceOp.ALL}: + return np.bool_ + elif operation in {ReduceOp.ARGMAX, ReduceOp.ARGMIN}: + return np.int64 + else: + raise ValueError(f"Unsupported operation: {operation}") + + +def step_handler(cslice, _slice): + out = () + for s1, s2 in zip(cslice, _slice, strict=True): + s1start, s1stop = s1.start, s1.stop + s2start, s2stop, s2step = s2.start, s2.stop, s2.step + # assume s1step = 1 + newstart = builtins.max(s1start, s2start) + newstop = builtins.min(s1stop, s2stop) + rem = (newstart - s2start) % s2step + if rem != 0: # only pass through here if s2step is not 1 + newstart += s2step - rem + # true_stop = start + n*step + 1 -> stop = start + n * step + 1 + residual + # so n = (stop - start - 1) // step + newstop = newstart + (newstop - newstart - 1) // s2step * s2step + 1 + out += (slice(newstart, newstop, s2step),) + return out + + +def reduce_slices( # noqa: C901 + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], + operands: dict, + reduce_args, + _slice=NDINDEX_EMPTY_TUPLE, + **kwargs, +) -> blosc2.NDArray | np.ndarray: + """Evaluate the expression in chunks of operands. + + This function can handle operands with different chunk shapes. + Also, it can be used when only a slice of the output array is needed. + + Parameters + ---------- + expression: str or callable + The expression or user-defined function (udf) to evaluate. + operands: dict + A dictionary containing the operands for the operands. + reduce_args: dict + A dictionary with arguments to be passed to the reduction function. + _slice: ndindex.Tuple sequence of slices and ints. Default = ndindex.Tuple(), optional + If provided, only the chunks that intersect with this slice + will be evaluated. + kwargs: Any, optional + Additional keyword arguments supported by the :func:`empty` constructor. + + Returns + ------- + :ref:`NDArray` or np.ndarray + The resulting output array. + """ + global try_miniexpr + + # Use a local copy so we don't modify the global + use_miniexpr = try_miniexpr # & False + + if blosc2.IS_WASM: + # Reduction miniexpr on wasm is currently unstable for scalar reductions (axis=None). + # Keep wasm reduction evaluation on the regular chunked path until stabilized. + use_miniexpr = False + + out = kwargs.pop("_output", None) + res_out_ = None # temporary required to store max/min for argmax/argmin + ne_args: dict = kwargs.pop("_ne_args", {}) + if ne_args is None: + ne_args = {} + fp_accuracy = kwargs.pop("fp_accuracy", blosc2.FPAccuracy.DEFAULT) + jit = kwargs.pop("jit", None) + jit_backend = kwargs.pop("jit_backend", None) + where: dict | None = kwargs.pop("_where_args", None) + reduce_op = reduce_args.pop("op") + reduce_op_str = reduce_args.pop("op_str", None) + axis = reduce_args["axis"] + keepdims = reduce_args.get("keepdims", False) + include_initial = reduce_args.pop("include_initial", False) + dtype = reduce_args.get("dtype", None) + if dtype is None: + dtype = kwargs.pop("dtype", None) + dtype = infer_reduction_dtype(dtype, reduce_op) + else: + del kwargs["dtype"] + + # Compute the shape and chunks of the output array, including broadcasting + shape = compute_broadcast_shape(operands.values()) + + _slice = _slice.raw + shape_slice = shape + mask_slice = np.array([isinstance(i, int) for i in _slice], dtype=np.bool_) + if out is None and _slice != (): + _slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice) + shape_slice = ndindex.ndindex(_slice).newshape(shape) + # shape_slice in general not equal to final shape: + # dummy dims (due to ints) will be dealt with by taking final_slice + + # after slicing, we reduce to calculate shape of output + if axis is None: + axis = tuple(range(len(shape_slice))) + elif np.isscalar(axis): + axis = (axis,) + axis = tuple(a if a >= 0 else a + len(shape_slice) for a in axis) + if np.any(mask_slice): + add_idx = np.cumsum(mask_slice) + axis = tuple(a + add_idx[a] for a in axis) # axis now refers to new shape with dummy dims + if reduce_args["axis"] is not None: + # conserve as integer if was not tuple originally + reduce_args["axis"] = axis[0] if np.isscalar(reduce_args["axis"]) else axis + if reduce_op in {ReduceOp.CUMULATIVE_SUM, ReduceOp.CUMULATIVE_PROD}: + reduced_shape = (np.prod(shape_slice),) if reduce_args["axis"] is None else shape_slice + # if reduce_args["axis"] is None, have to have 1D input array; otherwise, ensure positive scalar + reduce_args["axis"] = 0 if reduce_args["axis"] is None else axis[0] + if include_initial: + reduced_shape = tuple( + s + 1 if i == reduce_args["axis"] else s for i, s in enumerate(shape_slice) + ) + else: + if keepdims: + reduced_shape = tuple(1 if i in axis else s for i, s in enumerate(shape_slice)) + else: + reduced_shape = tuple(s for i, s in enumerate(shape_slice) if i not in axis) + mask_slice = mask_slice[[i for i in range(len(mask_slice)) if i not in axis]] + + if out is not None and reduced_shape != out.shape: + raise ValueError("Provided output shape does not match the reduced shape.") + + # Choose the array with the largest shape as the reference for chunks + # Note: we could have expr = blosc2.lazyexpr('numpy_array + 1') (i.e. no choice for chunks) + blosc2_arrs = tuple(o for o in operands.values() if hasattr(o, "chunks")) + fast_path = False + all_ndarray = False + any_persisted = False + chunks = None + blocks = None + if blosc2_arrs: # fast path only relevant if there are blosc2 arrays + operand = max(blosc2_arrs, key=lambda x: len(x.shape)) + + # Check if the partitions are aligned (i.e. all operands have the same shape, + # chunks and blocks, and have no padding). This will allow us to take the fast path. + same_shape = all(operand.shape == o.shape for o in operands.values() if hasattr(o, "shape")) + same_chunks = all(operand.chunks == o.chunks for o in operands.values() if hasattr(o, "chunks")) + same_blocks = all(operand.blocks == o.blocks for o in operands.values() if hasattr(o, "blocks")) + fast_path = same_shape and same_chunks and same_blocks and (0 not in operand.chunks) + aligned = dict.fromkeys(operands.keys(), False) + iter_disk = False + if fast_path: + chunks = operand.chunks + blocks = operand.blocks + # Check that all operands are NDArray for fast path + all_ndarray = all( + isinstance(value, blosc2.NDArray) and value.shape != () for value in operands.values() + ) + # Check that there is some NDArray that is persisted in the disk + any_persisted = any( + ( + isinstance(value, blosc2.NDArray) + and value.shape != () + and value.schunk.urlpath is not None + ) + for value in operands.values() + ) + if not blosc2.IS_WASM: + iter_disk = all_ndarray and any_persisted + # Experiments say that iter_disk is faster than the regular path for reductions + # even when all operands are in memory, so no need to check any_persisted + # New benchmarks are saying the contrary (> 10% slower), so this needs more + # investigation + # iter_disk = all_ndarray + else: + # WebAssembly does not support threading, so we cannot use the iter_disk option + iter_disk = False + else: + for arr in blosc2_arrs: + if arr.shape == shape: + chunks = arr.chunks + break + if chunks is None: # have to calculate chunks (this is cheap as empty just creates a thin metalayer) + temp = blosc2.empty(shape, dtype=dtype) + chunks = temp.chunks + del temp + + # miniexpr reduction path only supported for some cases so far + if not (fast_path and all_ndarray and reduced_shape == () and _slice == ()): + use_miniexpr = False + + # Some reductions are not supported yet in miniexpr + if reduce_op in (ReduceOp.ARGMAX, ReduceOp.ARGMIN, ReduceOp.CUMULATIVE_PROD, ReduceOp.CUMULATIVE_SUM): + use_miniexpr = False + + # Check whether we can use miniexpr + if use_miniexpr and isinstance(expression, str): + has_complex = any( + isinstance(op, blosc2.NDArray) and blosc2.isdtype(op.dtype, "complex floating") + for op in operands.values() + ) + if has_complex and (sys.platform == "win32" or blosc2.IS_WASM): + # On Windows and WebAssembly, miniexpr has issues with complex numbers + use_miniexpr = False + if has_complex and any(tok in expression for tok in ("!=", "==", "<=", ">=", "<", ">")): + use_miniexpr = False + if where is not None and len(where) != 2: + use_miniexpr = False + + if use_miniexpr: + # Experiments say that not splitting is best (at least on Apple Silicon M4 Pro) + cparams = kwargs.pop("cparams", blosc2.CParams(splitmode=blosc2.SplitMode.NEVER_SPLIT)) + # Create a fake NDArray just to drive the miniexpr evaluation (values won't be used) + res_eval = blosc2.uninit(shape, dtype, chunks=chunks, blocks=blocks, cparams=cparams, **kwargs) + # Compute the number of blocks in the result + nblocks = res_eval.nbytes // res_eval.blocksize + # Initialize aux_reduc based on the reduction operation + # Padding blocks won't be written, so initial values matter for the final reduction + if reduce_op in {ReduceOp.SUM, ReduceOp.ANY, ReduceOp.CUMULATIVE_SUM}: + aux_reduc = np.zeros(nblocks, dtype=dtype) + elif reduce_op in {ReduceOp.PROD, ReduceOp.ALL, ReduceOp.CUMULATIVE_PROD}: + aux_reduc = np.ones(nblocks, dtype=dtype) + elif reduce_op == ReduceOp.MIN: + if np.issubdtype(dtype, np.integer): + aux_reduc = np.full(nblocks, np.iinfo(dtype).max, dtype=dtype) + else: + aux_reduc = np.full(nblocks, np.inf, dtype=dtype) + elif reduce_op == ReduceOp.MAX: + if np.issubdtype(dtype, np.integer): + aux_reduc = np.full(nblocks, np.iinfo(dtype).min, dtype=dtype) + else: + aux_reduc = np.full(nblocks, -np.inf, dtype=dtype) + else: + # For other operations, zeros should be safe + aux_reduc = np.zeros(nblocks, dtype=dtype) + prefilter_set = False + try: + if where is not None: + expression_miniexpr = f"{reduce_op_str}(where({expression}, _where_x, _where_y))" + else: + expression_miniexpr = f"{reduce_op_str}({expression})" + expression_miniexpr = _apply_jit_backend_pragma(expression_miniexpr, operands, jit_backend) + res_eval._set_pref_expr(expression_miniexpr, operands, fp_accuracy, aux_reduc, jit=jit) + prefilter_set = True + # print("expr->miniexpr:", expression, reduce_op, fp_accuracy) + # Data won't even try to be compressed, so buffers can be unitialized and reused + data = np.empty(res_eval.schunk.chunksize, dtype=np.uint8) + chunk_data = np.empty(res_eval.schunk.chunksize + blosc2.MAX_OVERHEAD, dtype=np.uint8) + # Exercise prefilter for each chunk + for nchunk in range(res_eval.schunk.nchunks): + res_eval.schunk._prefilter_data(nchunk, data, chunk_data) + except Exception: + use_miniexpr = False + finally: + if prefilter_set: + res_eval.schunk.remove_prefilter("miniexpr") + global iter_chunks + # Ensure any background reading thread is closed + iter_chunks = None + + if not use_miniexpr: + # If miniexpr failed, fallback to regular evaluation + # (continue to the manual chunked evaluation below) + pass + else: + if reduce_op in {ReduceOp.ANY, ReduceOp.ALL}: + result = reduce_op.value(aux_reduc, **reduce_args) + else: + result = reduce_op.value.reduce(aux_reduc, **reduce_args) + return result + + # Iterate over the operands and get the chunks + chunk_operands = {} + # Check which chunks intersect with _slice + if np.isscalar(reduce_args["axis"]): # iterate over chunks incrementing along reduction axis + intersecting_chunks = get_intersecting_chunks(_slice, shape, chunks, axis=reduce_args["axis"]) + else: # iterate over chunks incrementing along last axis + intersecting_chunks = get_intersecting_chunks(_slice, shape, chunks) + out_init = False + res_out_init = False + ratio = ( + np.ceil(np.asarray(shape) / np.asarray(chunks)).astype(np.int64) + if 0 not in chunks + else np.asarray(shape) + ) + + for chunk_slice in intersecting_chunks: + cslice = chunk_slice.raw + nchunk = ( + builtins.sum(c.start // chunks[i] * np.prod(ratio[i + 1 :]) for i, c in enumerate(cslice)) + if 0 not in chunks + else 0 + ) + # Check whether current cslice intersects with _slice + if cslice != () and _slice != (): + # get intersection of chunk and target + cslice = step_handler(cslice, _slice) + offset = tuple(s.start for s in cslice) # offset for the udf + starts = [s.start if s.start is not None else 0 for s in cslice] + unit_steps = np.all([s.step == 1 for s in cslice]) + cslice_shape = tuple(s.stop - s.start for s in cslice) + # get local index of part of out that is to be updated + cslice_subidx = ndindex.ndindex(cslice).as_subindex(_slice).raw # if _slice is (), just gives cslice + if _slice == () and fast_path and unit_steps: + # Fast path + full_chunk = cslice_shape == chunks + fill_chunk_operands( + operands, + cslice, + cslice_shape, + full_chunk, + aligned, + nchunk, + iter_disk, + chunk_operands, + reduc=True, + axis=reduce_args["axis"] if np.isscalar(reduce_args["axis"]) else None, + ) + else: + _get_chunk_operands(operands, cslice, chunk_operands, shape) + + if reduce_op in {ReduceOp.CUMULATIVE_PROD, ReduceOp.CUMULATIVE_SUM}: + reduced_slice = ( + tuple( + slice(sl.start + 1, sl.stop + 1, sl.step) if i == reduce_args["axis"] else sl + for i, sl in enumerate(cslice_subidx) + ) + if include_initial + else cslice_subidx + ) + else: + reduced_slice = ( + tuple(slice(None) if i in axis else sl for i, sl in enumerate(cslice_subidx)) + if keepdims + else tuple(sl for i, sl in enumerate(cslice_subidx) if i not in axis) + ) + + # Evaluate and reduce the expression using chunks of operands + + if callable(expression): + if _is_dsl_kernel_expression(expression): + _raise_dsl_miniexpr_required( + "internal reduction fallback attempted to execute the DSL kernel directly in Python." + ) + # TODO: Implement the reductions for UDFs (and test them) + result = np.empty(cslice_shape, dtype=out.dtype) + expression(tuple(chunk_operands.values()), result, offset=offset) + # Reduce the result + result = reduce_op.value.reduce(result, **reduce_args) + # Update the output array with the result + out[reduced_slice] = reduce_op.value(out[reduced_slice], result) + continue + + result, _ = _get_result(expression, chunk_operands, ne_args, where) + # Enforce contiguity of result (necessary to fill the out array) + # but avoid copy if already contiguous + result = np.require(result, requirements="C") + + # Reduce the result + if result.shape == (): + if reduce_op == ReduceOp.SUM and result[()] == 0: + # Avoid a reduction when result is a zero scalar. Faster for sparse data. + continue + # Note that cslice_shape refers to slice of operand chunks, not reduced_slice + result = np.full(cslice_shape, result[()]) + if reduce_op in {ReduceOp.ANY, ReduceOp.ALL, ReduceOp.CUMULATIVE_SUM, ReduceOp.CUMULATIVE_PROD}: + result = reduce_op.value(result, **reduce_args) + elif reduce_op in {ReduceOp.ARGMAX, ReduceOp.ARGMIN}: + # offset for start of slice + slice_ref = ( + starts + if _slice == () + else [ + (s - sl.start - np.sign(sl.step)) // sl.step + 1 + for s, sl in zip(starts, _slice, strict=True) + ] + ) + result_idx = reduce_op.value(result, **reduce_args) + if reduce_args["axis"] is None: # indexing into flattened array + result = result[np.unravel_index(result_idx, shape=result.shape)] + idx_within_cslice = np.unravel_index(result_idx, shape=cslice_shape) + result_idx = np.ravel_multi_index( + tuple(o + i for o, i in zip(slice_ref, idx_within_cslice, strict=True)), shape_slice + ) + else: # axis is an integer + result = np.take_along_axis( + result, + np.expand_dims(result_idx, axis=reduce_args["axis"]) if not keepdims else result_idx, + axis=reduce_args["axis"], + ) + result = result if keepdims else result.squeeze(axis=reduce_args["axis"]) + result_idx += slice_ref[reduce_args["axis"]] + else: + result = reduce_op.value.reduce(result, **reduce_args) + + if not out_init: + # if cumsum/cumprod and arrays large, return blosc2 array with same chunks + chunks_out = ( + chunks + if np.prod(reduced_shape) * np.dtype(dtype).itemsize > 4 * blosc2.MAX_FAST_PATH_SIZE + else None + ) + chunks_out = chunks_out if _slice == () else None + out_ = convert_none_out(result.dtype, reduce_op, reduced_shape, chunks=chunks_out) + if out is not None: + out[:] = out_ + del out_ + else: + out = out_ + behaved = ( + False + if not hasattr(out, "chunks") + else blosc2.are_partitions_behaved(out.shape, out.chunks, out.blocks) + ) + out_init = True + + # res_out only used be argmin/max and cumulative_sum/prod which only accept axis=int argument + if (not res_out_init) or ( + np.isscalar(reduce_args["axis"]) and cslice_subidx[reduce_args["axis"]].start == 0 + ): # starting reduction again along axis + res_out_ = _get_res_out(result.shape, reduce_args["axis"], dtype, reduce_op) + res_out_init = True + + # Update the output array with the result + if reduce_op == ReduceOp.ANY: + out[reduced_slice] += result + elif reduce_op == ReduceOp.ALL: + out[reduced_slice] *= result + elif res_out_ is not None: + # need lowest index for which optimum attained + if reduce_op in {ReduceOp.ARGMAX, ReduceOp.ARGMIN}: + cond = (res_out_ == result) & (result_idx < out[reduced_slice]) + cond |= res_out_ < result if reduce_op == ReduceOp.ARGMAX else res_out_ > result + out[reduced_slice] = np.where(cond, result_idx, out[reduced_slice]) + res_out_ = np.where(cond, result, res_out_) + else: # CUMULATIVE_SUM or CUMULATIVE_PROD + idx_lastval = tuple( + slice(-1, None) if i == reduce_args["axis"] else slice(None, None) + for i, c in enumerate(reduced_slice) + ) + if reduce_op == ReduceOp.CUMULATIVE_SUM: + result += res_out_ + else: # CUMULATIVE_PROD + result *= res_out_ + res_out_ = result[idx_lastval] + if behaved and result.shape == out.chunks and result.dtype == out.dtype and _slice == (): + # Fast path + # TODO: Check this only works when slice is () as nchunk is incorrect for out otherwise + out.schunk.update_data(nchunk, result, copy=False) + else: + out[reduced_slice] = result + else: + out[reduced_slice] = reduce_op.value(out[reduced_slice], result) + + # No longer need res_out_ + del res_out_ + + if out is None: + if reduce_op in (ReduceOp.MIN, ReduceOp.MAX, ReduceOp.ARGMIN, ReduceOp.ARGMAX): + raise ValueError("zero-size array in (arg-)min/max reduction operation is not supported") + if dtype is None: + # We have no hint here, so choose a default dtype + dtype = np.float64 + out = convert_none_out(dtype, reduce_op, reduced_shape) + + if reduced_shape == (): + # convert_none_out() may allocate shape (1,) as an internal buffer for scalar reductions. + # Collapse it to a numpy scalar while handling both 0-d and 1-d singleton arrays. + if isinstance(out, np.ndarray): + out = out[()] if out.ndim == 0 else out[0] + else: + out = out[()] + final_mask = tuple(np.where(mask_slice)[0]) + if np.any(mask_slice): # remove dummy dims + out = np.squeeze(out, axis=final_mask) + # Check if the output array needs to be converted into a blosc2.NDArray + if kwargs != {} and not np.isscalar(out): + out = blosc2.asarray(out, **kwargs) + return out + + +def _get_res_out(reduced_shape, axis, dtype, reduce_op): + reduced_shape = (1,) if reduced_shape == () else reduced_shape + # Get res_out to hold running sums along axes for chunks when doing cumulative sums/prods with axis not None + if reduce_op in {ReduceOp.CUMULATIVE_SUM, ReduceOp.CUMULATIVE_PROD}: + temp_shape = tuple(1 if i == axis else s for i, s in enumerate(reduced_shape)) + res_out_ = ( + np.zeros(temp_shape, dtype=dtype) + if reduce_op == ReduceOp.CUMULATIVE_SUM + else np.ones(temp_shape, dtype=dtype) + ) + elif reduce_op in {ReduceOp.ARGMIN, ReduceOp.ARGMAX}: + temp_shape = reduced_shape + res_out_ = np.ones(temp_shape, dtype=dtype) + if np.issubdtype(dtype, np.integer): + res_out_ *= np.iinfo(dtype).max if reduce_op == ReduceOp.ARGMIN else np.iinfo(dtype).min + elif np.issubdtype(dtype, np.bool): + res_out_ = res_out_ if reduce_op == ReduceOp.ARGMIN else np.zeros(temp_shape, dtype=dtype) + else: + res_out_ *= np.inf if reduce_op == ReduceOp.ARGMIN else -np.inf + else: + res_out_ = None + return res_out_ + + +def convert_none_out(dtype, reduce_op, reduced_shape, chunks=None): + reduced_shape = (1,) if reduced_shape == () else reduced_shape + # out will be a proper numpy.ndarray + if reduce_op in {ReduceOp.SUM, ReduceOp.CUMULATIVE_SUM, ReduceOp.PROD, ReduceOp.CUMULATIVE_PROD}: + if reduce_op in (ReduceOp.CUMULATIVE_SUM, ReduceOp.CUMULATIVE_PROD) and chunks is not None: + out = ( + blosc2.zeros(reduced_shape, dtype=dtype, chunks=chunks) + if reduce_op == ReduceOp.CUMULATIVE_SUM + else blosc2.ones(reduced_shape, dtype=dtype, chunks=chunks) + ) + else: + out = ( + np.zeros(reduced_shape, dtype=dtype) + if reduce_op in {ReduceOp.SUM, ReduceOp.CUMULATIVE_SUM} + else np.ones(reduced_shape, dtype=dtype) + ) + elif reduce_op == ReduceOp.MIN: + if np.issubdtype(dtype, np.integer): + out = np.iinfo(dtype).max * np.ones(reduced_shape, dtype=dtype) + else: + out = np.inf * np.ones(reduced_shape, dtype=dtype) + elif reduce_op == ReduceOp.MAX: + if np.issubdtype(dtype, np.integer): + out = np.iinfo(dtype).min * np.ones(reduced_shape, dtype=dtype) + else: + out = -np.inf * np.ones(reduced_shape, dtype=dtype) + elif reduce_op == ReduceOp.ANY: + out = np.zeros(reduced_shape, dtype=np.bool_) + elif reduce_op == ReduceOp.ALL: + out = np.ones(reduced_shape, dtype=np.bool_) + elif reduce_op in {ReduceOp.ARGMIN, ReduceOp.ARGMAX}: + out = np.zeros(reduced_shape, dtype=blosc2.DEFAULT_INDEX) + return out + + +def _validate_chunked_eval_inputs(operands: dict, out, shape, reduce_args: dict) -> bool: + if operands: + _, _, _, fast_path = validate_inputs(operands, out, reduce=reduce_args != {}) + return fast_path + if shape is None and out is None: + raise ValueError( + "For UDFs with no inputs, provide `shape` (or an output array) to indicate result shape" + ) + return False + + +def _eval_zero_input_dsl_if_needed( + expression, + operands: dict, + where, + getitem: bool, + item, + shape, + jit, + jit_backend, + kwargs: dict, +): + use_zero_input_dsl_fast_eval = ( + not operands + and isinstance(expression, DSLKernel) + and expression.dsl_source is not None + and where is None + ) + if not use_zero_input_dsl_fast_eval: + return False, None + + full_res = fast_eval( + expression, + operands, + getitem=False, + shape=shape, + jit=jit, + jit_backend=jit_backend, + **kwargs, + ) + if getitem: + return True, full_res[item.raw] + return True, full_res + + +def chunked_eval( # noqa: C901 + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, item=(), **kwargs +): + """ + Evaluate the expression in chunks of operands. + + This chooses the best algorithm exploring different paths depending on the input operands. + + Parameters + ---------- + expression: str or callable + The expression or user-defined function (udf) to evaluate. + operands: dict + A dictionary containing the operands for the expression. + item: int, sequence of ints, slice, sequence of slices or None, optional + The slice(s) of the operands to be used in computation. Note that step parameter is not honored yet. + Item is used to slice the operands PRIOR to computation. + kwargs: Any, optional + Additional keyword arguments supported by the :func:`empty` constructor. In addition, + the following keyword arguments are supported: + _getitem: bool, optional + Indicates whether the expression is being evaluated for a getitem operation. + Default is False. + _output: blosc2.Array, optional + The output array to store the result. + _ne_args: dict, optional + Additional arguments to be passed to `numexpr.evaluate()` function. + _where_args: dict, optional + Additional arguments for conditional evaluation. + """ + try: + # standardise slice to be ndindex.Tuple + item = () if item == slice(None, None, None) else item + item = item if isinstance(item, tuple) else (item,) + item = tuple( + slice(s.start, s.stop, 1 if s.step is None else s.step) if isinstance(s, slice) else s + for s in item + ) + item = ndindex.ndindex(item) + shape = kwargs.pop("shape", None) + if item.raw != () and shape is not None: + item = item.expand(shape) # converts to standard tuple form + + getitem = kwargs.pop("_getitem", False) + out = kwargs.get("_output") + # Execution policy for miniexpr JIT paths only; never forward to array constructors. + jit = kwargs.pop("jit", None) + jit_backend = kwargs.pop("jit_backend", None) + + where: dict | None = kwargs.get("_where_args") + if where: + # Make the where arguments part of the operands + operands = {**operands, **where} + + reduce_args = kwargs.pop("_reduce_args", {}) + fast_path = _validate_chunked_eval_inputs(operands, out, shape, reduce_args) + + # Activate last read cache for NDField instances + for op in operands: + if isinstance(operands[op], blosc2.NDField): + operands[op].ndarr.keep_last_read = True + + if reduce_args: + # Eval and reduce the expression in a single step + return reduce_slices( + expression, + operands, + reduce_args=reduce_args, + _slice=item, + jit=jit, + jit_backend=jit_backend, + **kwargs, + ) + + handled, result = _eval_zero_input_dsl_if_needed( + expression, operands, where, getitem, item, shape, jit, jit_backend, kwargs + ) + if handled: + return result + + full_slice = is_full_slice(item.raw) + if not full_slice or (where is not None and len(where) < 2): + # The fast path is possible under a few conditions + if getitem and (where is None or len(where) == 2): + # Compute the size of operands for the fast path + unit_steps = np.all([s.step == 1 for s in item.raw if isinstance(s, slice)]) + # shape of slice, if non-unit steps have to decompress full array into memory + shape_operands = item.newshape(shape) if unit_steps else shape + _dtype = kwargs.get("dtype", np.float64) + size_operands = math.prod(shape_operands) * len(operands) * _dtype.itemsize + # Only take the fast path if the size of operands is relatively small + if size_operands < blosc2.MAX_FAST_PATH_SIZE: + return slices_eval_getitem(expression, operands, _slice=item, shape=shape, **kwargs) + return slices_eval(expression, operands, getitem=getitem, _slice=item, shape=shape, **kwargs) + + fast_path = full_slice and fast_path + if fast_path: # necessarily item is () + if getitem: + # When using getitem, taking the fast path is always possible + return fast_eval( + expression, + operands, + getitem=True, + jit=jit, + jit_backend=jit_backend, + shape=shape, + **kwargs, + ) + elif (kwargs.get("chunks") is None and kwargs.get("blocks") is None) and ( + out is None or isinstance(out, blosc2.NDArray) + ): + # If not, the conditions to use the fast path are a bit more restrictive + # e.g. the user cannot specify chunks or blocks, or an output that is not + # a blosc2.NDArray + return fast_eval( + expression, + operands, + getitem=False, + jit=jit, + jit_backend=jit_backend, + shape=shape, + **kwargs, + ) + elif _is_dsl_kernel_expression(expression) and (out is None or isinstance(out, blosc2.NDArray)): + # DSL kernels require miniexpr and must not fall back to Python execution. + return fast_eval( + expression, + operands, + getitem=False, + jit=jit, + jit_backend=jit_backend, + shape=shape, + **kwargs, + ) + + # End up here by default + return slices_eval(expression, operands, getitem=getitem, _slice=item, shape=shape, **kwargs) + + finally: + global iter_chunks + # Ensure any background reading thread is closed + iter_chunks = None + + +def fuse_operands(operands1, operands2): + new_operands = {} + dup_operands = {} + new_pos = len(operands1) + operand_to_key = {id(v): k for k, v in operands1.items()} + for k2, v2 in operands2.items(): + try: + k1 = operand_to_key[id(v2)] + # The operand is duplicated; keep track of it + dup_operands[k2] = k1 + except KeyError: + # The value is not among operands1, so rebase it + new_op = f"o{new_pos}" + new_pos += 1 + new_operands[new_op] = v2 + return new_operands, dup_operands + + +def fuse_expressions(expr, new_base, dup_op): + new_expr = "" + skip_to_char = 0 + old_base = 0 + prev_pos = {} + for i, expr_i in enumerate(expr): + if i < skip_to_char: + continue + if expr_i == "o": + if i > 0 and expr[i - 1] not in {" ", "("}: + # Not a variable + new_expr += expr_i + continue + # This is a variable. Find the end of it. + j = i + 1 + for k in range(len(expr[j:])): + if expr[j + k] in " )[,": # Added comma to the list of delimiters + j = k + break + if expr[i + j] == ")": + j -= 1 + # Extract only the numeric part, handling cases where there might be a comma + operand_str = expr[i + 1 : i + j + 1] + # Split by comma and take the first part (the operand index) + operand_num_str = operand_str.split(",")[0] + old_pos = int(operand_num_str) + old_op = f"o{old_pos}" + if old_op not in dup_op: + if old_pos in prev_pos: + # Keep track of duplicated old positions inside expr + new_pos = prev_pos[old_pos] + else: + new_pos = old_base + new_base + old_base += 1 + new_expr += f"o{new_pos}" + prev_pos[old_pos] = new_pos + else: + new_expr += dup_op[old_op] + skip_to_char = i + j + 1 + else: + new_expr += expr_i + return new_expr + + +def check_dtype(op, value1, value2): + if op in ("contains", "startswith", "endswith"): + return np.dtype(np.bool_) + + v1_dtype = blosc2.result_type(value1) + v2_dtype = v1_dtype if value2 is None else blosc2.result_type(value2) + if op in not_complex_ops and (v1_dtype == np.complex128 or v2_dtype == np.complex128): + # Ensure that throw exception for functions which don't support complex args + raise ValueError(f"Invalid operand type for {op}: {v1_dtype, v2_dtype}") + if op in relational_ops: + return np.dtype(np.bool_) + if op in logical_ops: + # Ensure that both operands are booleans or ints + if v1_dtype not in (np.bool_, np.int32, np.int64): + raise ValueError(f"Invalid operand type for {op}: {v1_dtype}") + if v2_dtype not in (np.bool_, np.int32, np.int64): + raise ValueError(f"Invalid operand type for {op}: {v2_dtype}") + + if op == "/": + if v1_dtype == np.int32 and v2_dtype == np.int32: + return blosc2.float32 + if np.issubdtype(v1_dtype, np.integer) and np.issubdtype(v2_dtype, np.integer): + return blosc2.float64 + + # Follow NumPy rules for scalar-array operations + return blosc2.result_type(value1, value2) + + +def result_type( + *arrays_and_dtypes: blosc2.NDArray | int | float | complex | bool | str | blosc2.dtype, +) -> blosc2.dtype: + """ + Returns the dtype that results from applying type promotion rules (see Type Promotion Rules) to the arguments. + + Parameters + ---------- + arrays_and_dtypes: Sequence[NDarray | int | float | complex | bool | blosc2.dtype]) + An arbitrary number of input arrays, scalars, and/or dtypes. + + Returns + ------- + out: blosc2.dtype + The dtype resulting from an operation involving the input arrays, scalars, and/or dtypes. + """ + # Follow NumPy rules for scalar-array operations + # Create small arrays with the same dtypes and let NumPy's type promotion determine the result type + arrs = [ + (np.array(value).dtype if isinstance(value, (str, bytes)) else value) + if (np.isscalar(value) or not hasattr(value, "dtype")) + else np.array([0], dtype=_convert_dtype(value.dtype)) + for value in arrays_and_dtypes + ] + return np.result_type(*arrs) + + +def can_cast(from_: blosc2.dtype | blosc2.NDArray, to: blosc2.dtype) -> bool: + """ + Determines if one data type can be cast to another data type according to (NumPy) type promotion rules. + + Parameters + ---------- + from_: dtype | NDArray + Input data type or array from which to cast. + + to: dtype + Desired data type. + + Returns + ------- + out:bool + True if the cast can occur according to type promotion rules; otherwise, False. + """ + arrs = np.array([0], dtype=from_.dtype) if hasattr(from_, "shape") else from_ + return np.result_type(arrs) + + +class LazyExpr(LazyArray): + """Class for hosting lazy expressions. + + This is not meant to be called directly from user space. + + Once the lazy expression is created, it can be evaluated via :func:`LazyExpr.compute`. + """ + + def __init__(self, new_op): # noqa: C901 + if new_op is None: + self.expression = "" + self.operands = {} + return + value1, op, value2 = new_op + dtype_ = check_dtype(op, value1, value2) # perform some checks + # Check that operands are proper Operands, LazyArray or scalars; if not, convert to NDArray objects + value1 = ( + blosc2.SimpleProxy(value1) + if not (isinstance(value1, (blosc2.Operand, np.ndarray)) or np.isscalar(value1)) + else value1 + ) + # Reset values represented as np.int64 etc. to be set as Python natives + value1 = value1.item() if np.isscalar(value1) and hasattr(value1, "item") else value1 + if value2 is None: + if isinstance(value1, LazyExpr): + self.expression = value1.expression if op is None else f"{op}({value1.expression})" + # handle constructors which can give empty operands + self._dtype = ( + value1.dtype + if op is None + else _numpy_eval_expr(f"{op}(o0)", {"o0": value1}, prefer_blosc=False).dtype + ) + self.operands = value1.operands + else: + if np.isscalar(value1): + value1 = ne_evaluate(f"{op}({value1!r})") + op = None + self.operands = {"o0": value1} + self.expression = "o0" if op is None else f"{op}(o0)" + return + value2 = ( + blosc2.SimpleProxy(value2) + if not (isinstance(value2, (blosc2.Operand, np.ndarray)) or np.isscalar(value2)) + else value2 + ) + # Reset values represented as np.int64 etc. to be set as Python natives + value2 = value2.item() if np.isscalar(value2) and hasattr(value2, "item") else value2 + + if isinstance(value1, LazyExpr) or isinstance(value2, LazyExpr): + if isinstance(value1, LazyExpr): + newexpr = value1.update_expr(new_op) + else: + newexpr = value2.update_expr(new_op) + self.expression = newexpr.expression + self.operands = newexpr.operands + self._dtype = newexpr.dtype + return + elif op in funcs_2args: + if np.isscalar(value1) and np.isscalar(value2): + self.expression = "o0" + svalue1 = _format_expr_scalar(value1) + svalue2 = _format_expr_scalar(value2) + self.operands = {"o0": ne_evaluate(f"{op}({svalue1}, {svalue2})")} # eager evaluation + elif np.isscalar(value2): + self.operands = {"o0": value1} + self.expression = f"{op}(o0, {_format_expr_scalar(value2)})" + elif np.isscalar(value1): + self.operands = {"o0": value2} + self.expression = f"{op}({_format_expr_scalar(value1)}, o0)" + else: + self.operands = {"o0": value1, "o1": value2} + self.expression = f"{op}(o0, o1)" + return + + self._dtype = dtype_ + if np.isscalar(value1) and np.isscalar(value2): + self.expression = "o0" + self.operands = {"o0": ne_evaluate(f"({value1!r} {op} {value2!r})")} # eager evaluation + elif np.isscalar(value2): + self.operands = {"o0": value1} + self.expression = f"(o0 {op} {value2!r})" + elif hasattr(value2, "shape") and value2.shape == (): + self.operands = {"o0": value1} + self.expression = f"(o0 {op} {value2[()]})" + elif np.isscalar(value1): + self.operands = {"o0": value2} + self.expression = f"({value1!r} {op} o0)" + elif hasattr(value1, "shape") and value1.shape == (): + self.operands = {"o0": value2} + self.expression = f"({value1[()]} {op} o0)" + else: + if value1 is value2: + self.operands = {"o0": value1} + self.expression = f"(o0 {op} o0)" + else: + # This is the very first time that a LazyExpr is formed from two operands + # that are not LazyExpr themselves + self.operands = {"o0": value1, "o1": value2} + self.expression = f"(o0 {op} o1)" + + def update_expr(self, new_op): # noqa: C901 + prev_flag = blosc2._disable_overloaded_equal + # We use a lot of the original NDArray.__eq__ as 'is', so deactivate the overloaded one + blosc2._disable_overloaded_equal = True + # One of the two operands are LazyExpr instances + try: + value1, op, value2 = new_op + dtype_ = check_dtype(op, value1, value2) # conserve dtype + # The new expression and operands + expression = None + new_operands = {} + # where() handling requires evaluating the expression prior to merge. + # This is different from reductions, where the expression is evaluated + # and returned a NumPy array (for usability convenience). + # We do things like this to enable the fusion of operations like + # `a.where(0, 1).sum()`. + # Another possibility would have been to always evaluate where() and produce + # an NDArray, but that would have been less efficient for the case above. + if hasattr(value1, "_where_args"): + value1 = value1.compute() + if hasattr(value2, "_where_args"): + value2 = value2.compute() + + if not isinstance(value1, LazyExpr) and not isinstance(value2, LazyExpr): + # We converted some of the operands to NDArray (where() handling above) + new_operands = {"o0": value1, "o1": value2} + expression = "op(o0, o1)" if op in funcs_2args else f"(o0 {op} o1)" + return self._new_expr(expression, new_operands, guess=False, out=None, where=None) + elif isinstance(value1, LazyExpr) and isinstance(value2, LazyExpr): + # Expression fusion + # Fuse operands in expressions and detect duplicates + new_operands, dup_op = fuse_operands(value1.operands, value2.operands) + # Take expression 2 and rebase the operands while removing duplicates + new_expr = fuse_expressions(value2.expression, len(value1.operands), dup_op) + expression = ( + f"{op}({value1.expression}, {new_expr})" + if op in funcs_2args + else f"({value1.expression} {op} {new_expr})" + ) + def_operands = value1.operands + elif isinstance(value1, LazyExpr): + if np.isscalar(value2): + v2 = _format_expr_scalar(value2) + elif hasattr(value2, "shape") and value2.shape == (): + v2 = _format_expr_scalar(value2[()]) + else: + operand_to_key = {id(v): k for k, v in value1.operands.items()} + try: + v2 = operand_to_key[id(value2)] + except KeyError: + v2 = f"o{len(value1.operands)}" + new_operands = {v2: value2} + if op == "~": + expression = f"({op}{value1.expression})" + else: + expression = ( + f"{op}({value1.expression}, {v2})" + if op in funcs_2args + else f"({value1.expression} {op} {v2})" + ) + def_operands = value1.operands + else: + if np.isscalar(value1): + v1 = _format_expr_scalar(value1) + elif hasattr(value1, "shape") and value1.shape == (): + v1 = _format_expr_scalar(value1[()]) + else: + operand_to_key = {id(v): k for k, v in value2.operands.items()} + try: + v1 = operand_to_key[id(value1)] + except KeyError: + v1 = f"o{len(value2.operands)}" + new_operands = {v1: value1} + if op == "[]": # syntactic sugar for slicing + expression = f"({v1}[{value2.expression}])" + else: + expression = ( + f"{op}({v1}, {value2.expression})" + if op in funcs_2args + else f"({v1} {op} {value2.expression})" + ) + def_operands = value2.operands + # Return a new expression + operands = def_operands | new_operands + expr = self._new_expr(expression, operands, guess=False, out=None, where=None) + expr._dtype = dtype_ # override dtype with preserved dtype + return expr + finally: + blosc2._disable_overloaded_equal = prev_flag + + @property + def dtype(self): + # Honor self._dtype; it can be set during the building of the expression + if hasattr(self, "_dtype"): + # In some situations, we already know the dtype + return self._dtype + if ( + hasattr(self, "_dtype_") + and hasattr(self, "_expression_") + and self._expression_ == self.expression + ): + # Use the cached dtype + return self._dtype_ + + # Return None if there is a missing operand (e.g. a removed file on disk) + if any(v is None for v in self.operands.values()): + return None + + _out = _numpy_eval_expr(self.expression, self.operands, prefer_blosc=False) + self._dtype_ = _out.dtype + self._expression_ = self.expression + return self._dtype_ + + @property + def ndim(self) -> int: + return len(self.shape) + + @property + def shape(self): + # Honor self._shape; it can be set during the building of the expression + if hasattr(self, "_shape"): + return self._shape + if ( + hasattr(self, "_shape_") + and hasattr(self, "_expression_") + and self._expression_ == self.expression + ): + # Use the cached shape + return self._shape_ + + # Return None if there is a missing operand (e.g. a removed file on disk) + if any(v is None for v in self.operands.values()): + return None + + # Operands shape can change, so we always need to recompute this + if any(_has_constructor_call(self.expression, constructor) for constructor in constructors): + # might have an expression with pure constructors + opshapes = {k: v if not hasattr(v, "shape") else v.shape for k, v in self.operands.items()} + _shape = infer_shape(self.expression, opshapes) # infer shape, includes constructors + else: + _shape, chunks, blocks, fast_path = validate_inputs(self.operands, getattr(self, "_out", None)) + if fast_path: + # fast_path ensure that all the operands have the same partitions + self._chunks = chunks + self._blocks = blocks + + self._shape_ = _shape + self._expression_ = self.expression + return _shape + + @property + def chunks(self): + if hasattr(self, "_chunks"): + return self._chunks + shape, self._chunks, self._blocks, fast_path = validate_inputs( + self.operands, getattr(self, "_out", None) + ) + if not hasattr(self, "_shape"): + self._shape = shape + if self._shape != shape: # validate inputs only works for elementwise funcs so returned shape might + fast_path = False # be incompatible with true output shape + if not fast_path: + # Not using the fast path, so we need to compute the chunks/blocks automatically + self._chunks, self._blocks = compute_chunks_blocks(self.shape, None, None, dtype=self.dtype) + return self._chunks + + @property + def blocks(self): + if hasattr(self, "_blocks"): + return self._blocks + shape, self._chunks, self._blocks, fast_path = validate_inputs( + self.operands, getattr(self, "_out", None) + ) + if not hasattr(self, "_shape"): + self._shape = shape + if self._shape != shape: # validate inputs only works for elementwise funcs so returned shape might + fast_path = False # be incompatible with true output shape + if not fast_path: + # Not using the fast path, so we need to compute the chunks/blocks automatically + self._chunks, self._blocks = compute_chunks_blocks(self.shape, None, None, dtype=self.dtype) + return self._blocks + + def where(self, value1=None, value2=None): + """ + Select value1 or value2 values based on the condition of the current expression. + + Parameters + ---------- + value1: array_like, optional + The value to select when the condition is True. + value2: array_like, optional + The value to select when the condition is False. + + Returns + ------- + out: LazyExpr + A new expression with the where condition applied. + """ + if not np.issubdtype(self.dtype, np.bool_): + raise ValueError("where() can only be used with boolean expressions") + # This just acts as a 'decorator' for the existing expression + if value1 is not None and value2 is not None: + # Guess the outcome dtype for value1 and value2 + dtype = blosc2.result_type(value1, value2) + args = {"_where_x": value1, "_where_y": value2} + elif value1 is not None: + if hasattr(value1, "dtype"): + dtype = value1.dtype + else: + dtype = np.asarray(value1).dtype + args = {"_where_x": value1} + elif value2 is not None: + raise ValueError("where() requires value1 when using value2") + else: + args = {} + dtype = None + + # Create a new expression + new_expr = blosc2.LazyExpr(new_op=(self, None, None)) + new_expr.expression = self.expression + new_expr.operands = self.operands + new_expr._where_args = args + new_expr._dtype = dtype + return new_expr + + def sum( + self, + axis=None, + dtype=None, + keepdims=False, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + reduce_args = { + "op": ReduceOp.SUM, + "op_str": "sum", + "axis": axis, + "dtype": dtype, + "keepdims": keepdims, + } + return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) + + def prod( + self, + axis=None, + dtype=None, + keepdims=False, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + reduce_args = { + "op": ReduceOp.PROD, + "op_str": "prod", + "axis": axis, + "dtype": dtype, + "keepdims": keepdims, + } + return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) + + def get_num_elements(self, axis, item): + if hasattr(self, "_where_args") and len(self._where_args) == 1: + # We have a where condition, so we need to count the number of elements + # fulfilling the condition + orig_where_args = self._where_args + self._where_args = {"_where_x": blosc2.ones(self.shape, dtype=np.int8)} + num_elements = self.sum(axis=axis, dtype=np.int64, item=item) + self._where_args = orig_where_args + return num_elements + # Compute the number of elements in the array + shape = self.shape + if np.isscalar(axis): + axis = (axis,) + if item != (): + # Compute the shape of the slice + shape = ndindex.ndindex(item).newshape(shape) + axis = tuple(range(len(shape))) if axis is None else axis + axis = tuple(a if a >= 0 else a + len(shape) for a in axis) # handle negative indexing + return math.prod([shape[i] for i in axis]) + + def mean( + self, + axis=None, + dtype=None, + keepdims=False, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + item = kwargs.pop("item", ()) + total_sum = self.sum( + axis=axis, + dtype=dtype, + keepdims=keepdims, + item=item, + fp_accuracy=fp_accuracy, + ) + num_elements = self.get_num_elements(axis, item) + if num_elements == 0: + raise ValueError("mean of an empty array is not defined") + out = total_sum / num_elements + out2 = kwargs.pop("out", None) + if out2 is not None: + out2[:] = out + return out2 + if kwargs != {} and not np.isscalar(out): + out = blosc2.asarray(out, **kwargs) + return out + + def std( + self, + axis=None, + dtype=None, + keepdims=False, + ddof=0, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + item = kwargs.pop("item", ()) + if item == (): # fast path + mean_value = self.mean(axis=axis, dtype=dtype, keepdims=True, fp_accuracy=fp_accuracy) + expr = (self - mean_value) ** 2 + else: + mean_value = self.mean(axis=axis, dtype=dtype, keepdims=True, item=item, fp_accuracy=fp_accuracy) + # TODO: Not optimal because we load the whole slice in memory. Would have to write + # a bespoke std function that executed within slice_eval to avoid this probably. + expr = (self.slice(item) - mean_value) ** 2 + out = expr.mean(axis=axis, dtype=dtype, keepdims=keepdims, fp_accuracy=fp_accuracy) + if ddof != 0: + num_elements = self.get_num_elements(axis, item) + out = np.sqrt(out * num_elements / (num_elements - ddof)) + else: + out = np.sqrt(out) + out2 = kwargs.pop("out", None) + if out2 is not None: + out2[:] = out + return out2 + if kwargs != {} and not np.isscalar(out): + out = blosc2.asarray(out, **kwargs) + return out + + def var( + self, + axis=None, + dtype=None, + keepdims=False, + ddof=0, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + item = kwargs.pop("item", ()) + if item == (): # fast path + mean_value = self.mean(axis=axis, dtype=dtype, keepdims=True, fp_accuracy=fp_accuracy) + expr = (self - mean_value) ** 2 + else: + mean_value = self.mean(axis=axis, dtype=dtype, keepdims=True, item=item, fp_accuracy=fp_accuracy) + # TODO: Not optimal because we load the whole slice in memory. Would have to write + # a bespoke var function that executed within slice_eval to avoid this probably. + expr = (self.slice(item) - mean_value) ** 2 + out = expr.mean(axis=axis, dtype=dtype, keepdims=keepdims, fp_accuracy=fp_accuracy) + if ddof != 0: + num_elements = self.get_num_elements(axis, item) + out = out * num_elements / (num_elements - ddof) + out2 = kwargs.pop("out", None) + if out2 is not None: + out2[:] = out + return out2 + if kwargs != {} and not np.isscalar(out): + out = blosc2.asarray(out, **kwargs) + return out + + def min( + self, + axis=None, + keepdims=False, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + reduce_args = { + "op": ReduceOp.MIN, + "op_str": "min", + "axis": axis, + "keepdims": keepdims, + } + return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) + + def max( + self, + axis=None, + keepdims=False, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + reduce_args = { + "op": ReduceOp.MAX, + "op_str": "max", + "axis": axis, + "keepdims": keepdims, + } + return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) + + def any( + self, + axis=None, + keepdims=False, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + reduce_args = { + "op": ReduceOp.ANY, + "op_str": "any", + "axis": axis, + "keepdims": keepdims, + } + return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) + + def all( + self, + axis=None, + keepdims=False, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + reduce_args = { + "op": ReduceOp.ALL, + "op_str": "all", + "axis": axis, + "keepdims": keepdims, + } + return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) + + def argmax( + self, + axis=None, + keepdims=False, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + reduce_args = { + "op": ReduceOp.ARGMAX, + "axis": axis, + "keepdims": keepdims, + } + return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) + + def argmin( + self, + axis=None, + keepdims=False, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + reduce_args = { + "op": ReduceOp.ARGMIN, + "axis": axis, + "keepdims": keepdims, + } + return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) + + def cumulative_sum( + self, + axis=None, + include_initial: bool = False, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + reduce_args = { + "op": ReduceOp.CUMULATIVE_SUM, + "axis": axis, + "include_initial": include_initial, + } + if self.ndim != 1 and axis is None: + raise ValueError("axis must be specified for cumulative_sum of non-1D array.") + return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) + + def cumulative_prod( + self, + axis=None, + include_initial: bool = False, + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + **kwargs, + ): + reduce_args = { + "op": ReduceOp.CUMULATIVE_PROD, + "axis": axis, + "include_initial": include_initial, + } + if self.ndim != 1 and axis is None: + raise ValueError("axis must be specified for cumulative_prod of non-1D array.") + return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) + + def _eval_constructor(self, expression, constructor, operands): + """Evaluate a constructor function inside a string expression.""" + + def find_args(expr): + idx = expr.find("(") + 1 + count = 1 + for i, c in enumerate(expr[idx:], start=idx): + if c == "(": + count += 1 + elif c == ")": + count -= 1 + if count == 0: + return expr[idx:i], i + 1 + raise ValueError("Unbalanced parenthesis in expression") + + # Find the index of the first constructor call. + match = _find_constructor_call(expression, constructor) + if match is None: + raise ValueError(f"Constructor '{constructor}' not found in expression: {expression}") + idx = match.start() + # Find the arguments of the constructor function + try: + args, idx2 = find_args(expression[idx + len(constructor) :]) + except ValueError as err: + raise ValueError(f"Unbalanced parenthesis in expression: {expression}") from err + idx2 = idx + len(constructor) + idx2 + + # Give a chance to a possible .reshape() method + if expression[idx2 : idx2 + len(".reshape(")] == ".reshape(": + args2, idx3 = find_args(expression[idx2 + len("reshape(") :]) + # Remove a possible shape= from the reshape call (due to rewriting the expression + # via extract_numpy_scalars(), other variants like .reshape(shape = shape_) work too) + args2 = args2.replace("shape=", "") + args = f"{args}, shape={args2}" + idx2 += len(".reshape") + idx3 + + # Evaluate the constructor function + constructor_func = getattr(blosc2, constructor) + _globals = {constructor: constructor_func} + # Add the blosc2 constructors and dtype symbols to the globals + _globals |= {k: getattr(blosc2, k) for k in constructors} + _globals |= dtype_symbols + evalcons = f"{constructor}({args})" + + # Internal constructors will be cached for avoiding multiple computations + if not hasattr(self, "cons_cache"): + self.cons_cache = {} + if evalcons in self.cons_cache: + return self.cons_cache[evalcons], expression[idx:idx2] + value = eval(evalcons, _globals, operands) + self.cons_cache[evalcons] = value + + return value, expression[idx:idx2] + + def _compute_expr(self, item, kwargs): + if any(method in self.expression for method in eager_funcs): + # We have reductions in the expression (probably coming from a string lazyexpr) + # Also includes slice + _globals = get_expr_globals(self.expression) + lazy_expr = eval(self.expression, _globals, self.operands) + if not isinstance(lazy_expr, blosc2.LazyExpr): + key, mask = process_key(item, lazy_expr.shape) + # An immediate evaluation happened (e.g. all operands are numpy arrays) + if hasattr(self, "_where_args"): + # We need to apply the where() operation + if len(self._where_args) == 1: + # We have a single argument + where_x = self._where_args["_where_x"] + return (where_x[:][lazy_expr])[key] + if len(self._where_args) == 2: + # We have two arguments + where_x = self._where_args["_where_x"] + where_y = self._where_args["_where_y"] + return np.where(lazy_expr, where_x, where_y)[key] + out = kwargs.get("_output", None) + if out is not None: + # This is not exactly optimized, but it works for now + out[:] = lazy_expr[key] + return out + arr = lazy_expr[key] + if builtins.sum(mask) > 0: + # Correct shape to adjust to NumPy convention + arr.shape = tuple(arr.shape[i] for i in range(len(mask)) if not mask[i]) + return arr + + return chunked_eval(lazy_expr.expression, lazy_expr.operands, item, **kwargs) + + if any(_has_constructor_call(self.expression, constructor) for constructor in constructors): + expression = self.expression + newexpr = expression + newops = self.operands.copy() + # We have constructors in the expression (probably coming from a string lazyexpr) + # Let's replace the constructors with the actual NDArray objects + for constructor in constructors: + if not _has_constructor_call(newexpr, constructor): + continue + while _has_constructor_call(newexpr, constructor): + # Get the constructor function and replace it by an NDArray object in the operands + # Find the constructor call and its arguments + value, constexpr = self._eval_constructor(newexpr, constructor, newops) + # Add the new operand to the operands; its name will be temporary + newop = f"_c{len(newops)}" + newops[newop] = value + # Replace the constructor call by the new operand + newexpr = newexpr.replace(constexpr, newop) + + _globals = get_expr_globals(newexpr) + lazy_expr = eval(newexpr, _globals, newops) + if isinstance(lazy_expr, blosc2.NDArray): + # Almost done (probably the expression is made of only constructors) + # We only have to define the trivial expression ("o0") + lazy_expr = blosc2.LazyExpr(new_op=(lazy_expr, None, None)) + + return chunked_eval(lazy_expr.expression, lazy_expr.operands, item, **kwargs) + + return chunked_eval(self.expression, self.operands, item, **kwargs) + + # TODO: indices and sort are repeated in LazyUDF; refactor + def indices(self, order: str | list[str] | None = None) -> blosc2.LazyArray: + if self.dtype.fields is None: + raise NotImplementedError("indices() can only be used with structured arrays") + if not hasattr(self, "_where_args") or len(self._where_args) != 1: + raise ValueError("indices() can only be used with conditions") + # Build a new lazy array + lazy_expr = copy.copy(self) + # ... and assign the new attributes + lazy_expr._indices = True + if order: + lazy_expr._order = order + # dtype changes to int64 + lazy_expr._dtype = np.dtype(np.int64) + return lazy_expr + + def sort(self, order: str | list[str] | None = None) -> blosc2.LazyArray: + if self.dtype.fields is None: + raise NotImplementedError("sort() can only be used with structured arrays") + if not hasattr(self, "_where_args") or len(self._where_args) != 1: + raise ValueError("sort() can only be used with conditions") + # Build a new lazy expression + lazy_expr = copy.copy(self) + # ... and assign the new attributes + if order: + lazy_expr._order = order + return lazy_expr + + def compute( + self, + item=(), + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + jit=None, + jit_backend: str | None = None, + **kwargs, + ) -> blosc2.NDArray: + # When NumPy ufuncs are called, the user may add an `out` parameter to kwargs + if "out" in kwargs: # use provided out preferentially + kwargs["_output"] = kwargs.pop("out") + elif hasattr(self, "_output"): + kwargs["_output"] = self._output + + if "ne_args" in kwargs: + kwargs["_ne_args"] = kwargs.pop("ne_args") + if hasattr(self, "_ne_args"): + kwargs["_ne_args"] = self._ne_args + if hasattr(self, "_where_args"): + kwargs["_where_args"] = self._where_args + kwargs.setdefault("fp_accuracy", fp_accuracy) + if jit is not None: + kwargs["jit"] = jit + if jit_backend is not None: + kwargs["jit_backend"] = jit_backend + kwargs["dtype"] = self.dtype + kwargs["shape"] = self.shape + if hasattr(self, "_indices"): + kwargs["_indices"] = self._indices + if hasattr(self, "_order"): + kwargs["_order"] = self._order + result = self._compute_expr(item, kwargs) + if "_order" in kwargs and "_indices" not in kwargs: + # We still need to apply the index in result + x = self._where_args["_where_x"] + result = x[result] # always a numpy array; TODO: optimize this for _getitem not in kwargs + if ( + "_getitem" not in kwargs + and "_output" not in kwargs + and "_reduce_args" not in kwargs + and not isinstance(result, blosc2.NDArray) + ): + # Get rid of all the extra kwargs that are not accepted by blosc2.asarray + kwargs_not_accepted = { + "_where_args", + "_indices", + "_order", + "_ne_args", + "dtype", + "shape", + "fp_accuracy", + } + kwargs = {key: value for key, value in kwargs.items() if key not in kwargs_not_accepted} + result = blosc2.asarray(result, **kwargs) + return result + + def __getitem__(self, item): + kwargs = {"_getitem": True} + result = self.compute(item, **kwargs) + # Squeeze single-element dimensions when indexing with integers + # See e.g. examples/ndarray/animated_plot.py + if isinstance(item, int) or (hasattr(item, "__iter__") and any(isinstance(i, int) for i in item)): + result = result.squeeze(axis=tuple(i for i in range(result.ndim) if result.shape[i] == 1)) + return result + + def slice(self, item): + return self.compute(item) # should do a slice since _getitem = False + + def __str__(self): + return f"{self.expression}" + + @property + def info(self): + return InfoReporter(self) + + @property + def info_items(self): + items = [] + items += [("type", f"{self.__class__.__name__}")] + items += [("expression", self.expression)] + opsinfo = { + key: str(value) if value.schunk.urlpath is None else value.schunk.urlpath + for key, value in self.operands.items() + } + items += [("operands", opsinfo)] + items += [("shape", self.shape)] + items += [("dtype", self.dtype)] + return items + + def save(self, urlpath=None, **kwargs): + if urlpath is None: + raise ValueError("To save a LazyArray you must provide an urlpath") + + expression = self.expression_tosave if hasattr(self, "expression_tosave") else self.expression + operands_ = self.operands_tosave if hasattr(self, "operands_tosave") else self.operands + # Validate expression + validate_expr(expression) + + meta = kwargs.get("meta", {}) + meta["LazyArray"] = LazyArrayEnum.Expr.value + kwargs["urlpath"] = urlpath + kwargs["meta"] = meta + kwargs["mode"] = "w" # always overwrite the file in urlpath + + # Create an empty array; useful for providing the shape and dtype of the outcome + array = blosc2.empty(shape=self.shape, dtype=self.dtype, **kwargs) + + # Save the expression and operands in the metadata + operands = {} + for key, value in operands_.items(): + if isinstance(value, blosc2.C2Array): + operands[key] = { + "path": str(value.path), + "urlbase": value.urlbase, + } + continue + if isinstance(value, blosc2.Proxy): + # Take the required info from the Proxy._cache container + value = value._cache + if not hasattr(value, "schunk"): + raise ValueError( + "To save a LazyArray, all operands must be blosc2.NDArray or blosc2.C2Array objects" + ) + if value.schunk.urlpath is None: + raise ValueError("To save a LazyArray, all operands must be stored on disk/network") + operands[key] = value.schunk.urlpath + array.schunk.vlmeta["_LazyArray"] = { + "expression": expression, + "UDF": None, + "operands": operands, + } + + @classmethod + def _new_expr(cls, expression, operands, guess, out=None, where=None, ne_args=None): + # Validate the expression + validate_expr(expression) + expression = convert_to_slice(expression) + chunks, blocks = None, None + if guess: + # The expression has been validated, so we can evaluate it + # in guessing mode to avoid computing reductions + # Extract possible numpy scalars + _expression, local_vars = extract_numpy_scalars(expression) + _operands = operands | local_vars + # Check that operands are proper Operands, LazyArray or scalars; if not, convert to NDArray objects + for op, val in _operands.items(): + if not (isinstance(val, (blosc2.Operand, np.ndarray)) or np.isscalar(val)): + _operands[op] = blosc2.SimpleProxy(val) + # for scalars just return value (internally converts to () if necessary) + opshapes = {k: v if not hasattr(v, "shape") else v.shape for k, v in _operands.items()} + _shape = infer_shape(_expression, opshapes) # infer shape, includes constructors + # have to handle slices since a[10] on a dummy variable of shape (1,1) doesn't work + desliced_expr, desliced_ops = extract_and_replace_slices(_expression, _operands) + # substitutes with dummy operands (cheap for reductions) and + # defaults to blosc2 functions (cheap for constructors) + new_expr = _numpy_eval_expr(desliced_expr, desliced_ops, prefer_blosc=True) + _dtype = new_expr.dtype if hasattr(new_expr, "dtype") else np.dtype(type(new_expr)) + if isinstance(new_expr, blosc2.LazyExpr): + # DO NOT restore the original expression and operands + # Instead rebase operands and restore only constructors + expression_, operands_ = conserve_functions( + _expression, _operands, new_expr.operands | local_vars + ) + elif _shape == () and not _operands: # passed scalars + expression_ = "o0" + operands_ = {"o0": ne_evaluate(_expression)} + else: + # An immediate evaluation happened + # (e.g. all operands are numpy arrays or constructors) + # or passed "a", "a[:10]", 'sum(a)' + expression_, operands_ = conserve_functions(_expression, _operands, local_vars) + if hasattr(new_expr, "chunks") and new_expr.chunks != (1,) * len(_shape): + # for constructors with chunks in kwargs, chunks will be specified + # for general expression new_expr is just with dummy scalar variables (so ignore) + chunks = new_expr.chunks + blocks = new_expr.blocks + new_expr = cls(None) + new_expr.expression = f"({expression_})" # force parenthesis + new_expr.operands = operands_ + new_expr.expression_tosave = expression + new_expr.operands_tosave = operands + # Cache the dtype and shape (should be immutable) + new_expr._dtype = _dtype + new_expr._shape = _shape + if chunks is not None and blocks is not None: + new_expr._chunks, new_expr._blocks = chunks, blocks + else: + # Create a new LazyExpr object + new_expr = cls(None) + new_expr.expression = expression + new_expr.operands = operands + if out is not None: + new_expr._output = out + if where is not None: + new_expr._where_args = where + new_expr._ne_args = ne_args + return new_expr + + +class LazyUDF(LazyArray): + def __init__( + self, func, inputs, dtype, shape=None, chunked_eval=True, jit=None, jit_backend=None, **kwargs + ): + # After this, all the inputs should be np.ndarray or NDArray objects + self.inputs = convert_inputs(inputs) + # Get res shape + if shape is None: + self._shape = compute_broadcast_shape(self.inputs) + if self._shape is None: + raise ValueError( + "If all inputs are scalars, pass a `shape` argument to indicate the output shape" + ) + else: + self._shape = shape + + self.kwargs = kwargs + self.kwargs["dtype"] = dtype + self.kwargs["shape"] = self._shape + self.kwargs["jit"] = jit + self.kwargs["jit_backend"] = jit_backend + in_place = kwargs.get("in_place", False) + self.kwargs["in_place"] = in_place + self._dtype = dtype + self.func = func + if isinstance(self.func, DSLKernel) and self.func.dsl_error is not None: + udf_name = getattr(self.func.func, "__name__", self.func.__name__) + raise DSLSyntaxError(f"Invalid DSL kernel '{udf_name}'.\n{self.func.dsl_error}") from None + + # Prepare internal array for __getitem__ + # Deep copy the kwargs to avoid modifying them + kwargs_getitem = copy.deepcopy(self.kwargs) + # Cannot use multithreading when applying a postfilter, dparams['nthreads'] ignored + dparams = kwargs_getitem.get("dparams", {}) + if isinstance(dparams, dict): + dparams["nthreads"] = 1 + else: + raise TypeError("dparams should be a dictionary") + kwargs_getitem["dparams"] = dparams + + if isinstance(self.func, DSLKernel) and self.func.input_names: + # DSL kernels are using input names that are extracted from params as a list, + # and we need to use them for matching variables in miniexpr + # (instead of the 'o{%d}' notation). + self.inputs_dict = dict(zip(self.func.input_names, self.inputs, strict=True)) + else: + self.inputs_dict = {f"o{i}": obj for i, obj in enumerate(self.inputs)} + + @property + def dtype(self): + return self._dtype + + @property + def ndim(self) -> int: + return len(self.shape) + + @property + def shape(self): + return self._shape + + @property + def info(self): + return InfoReporter(self) + + @property + def info_items(self): + inputs = {} + for key, value in self.inputs_dict.items(): + if isinstance(value, blosc2.Array): + inputs[key] = f"<{value.__class__.__name__}> {value.shape} {value.dtype}" + else: + inputs[key] = str(value) + return [ + ("type", f"{self.__class__.__name__}"), + ("inputs", inputs), + ("shape", self.shape), + ("dtype", self.dtype), + ] + + @property + def chunks(self): + if hasattr(self, "_chunks"): + return self._chunks + if not self.inputs_dict: + req_chunks = self.kwargs.get("chunks") + req_blocks = self.kwargs.get("blocks") + self._chunks, self._blocks = compute_chunks_blocks( + self.shape, req_chunks, req_blocks, dtype=self.dtype + ) + return self._chunks + shape, self._chunks, self._blocks, fast_path = validate_inputs( + self.inputs_dict, getattr(self, "_out", None) + ) + if not hasattr(self, "_shape"): + self._shape = shape + if self._shape != shape: # validate inputs only works for elementwise funcs so returned shape might + fast_path = False # be incompatible with true output shape + if not fast_path: + # Not using the fast path, so we need to compute the chunks/blocks automatically + self._chunks, self._blocks = compute_chunks_blocks(self.shape, None, None, dtype=self.dtype) + return self._chunks + + @property + def blocks(self): + if hasattr(self, "_blocks"): + return self._blocks + if not self.inputs_dict: + req_chunks = self.kwargs.get("chunks") + req_blocks = self.kwargs.get("blocks") + self._chunks, self._blocks = compute_chunks_blocks( + self.shape, req_chunks, req_blocks, dtype=self.dtype + ) + return self._blocks + shape, self._chunks, self._blocks, fast_path = validate_inputs( + self.inputs_dict, getattr(self, "_out", None) + ) + if not hasattr(self, "_shape"): + self._shape = shape + if self._shape != shape: # validate inputs only works for elementwise funcs so returned shape might + fast_path = False # be incompatible with true output shape + if not fast_path: + # Not using the fast path, so we need to compute the chunks/blocks automatically + self._chunks, self._blocks = compute_chunks_blocks(self.shape, None, None, dtype=self.dtype) + return self._blocks + + # TODO: indices and sort are repeated in LazyExpr; refactor + def indices(self, order: str | list[str] | None = None) -> blosc2.LazyArray: + if self.dtype.fields is None: + raise NotImplementedError("indices() can only be used with structured arrays") + if not hasattr(self, "_where_args") or len(self._where_args) != 1: + raise ValueError("indices() can only be used with conditions") + # Build a new lazy array + lazy_expr = copy.copy(self) + # ... and assign the new attributes + lazy_expr._indices = True + if order: + lazy_expr._order = order + # dtype changes to int64 + lazy_expr._dtype = np.dtype(np.int64) + return lazy_expr + + def sort(self, order: str | list[str] | None = None) -> blosc2.LazyArray: + if self.dtype.fields is None: + raise NotImplementedError("sort() can only be used with structured arrays") + if not hasattr(self, "_where_args") or len(self._where_args) != 1: + raise ValueError("sort() can only be used with conditions") + # Build a new lazy expression + lazy_expr = copy.copy(self) + # ... and assign the new attributes + if order: + lazy_expr._order = order + return lazy_expr + + def compute( + self, + item=(), + fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, + jit=None, + jit_backend=None, + **kwargs, + ): + # Get kwargs + if kwargs is None: + kwargs = {} + # Do copy to avoid modifying the original parameters + aux_kwargs = copy.deepcopy(self.kwargs) + + # Update is not recursive + aux_cparams = aux_kwargs.get("cparams", {}) + if isinstance(aux_cparams, blosc2.CParams): + # Convert to dictionary + aux_cparams = asdict(aux_cparams) + cparams = kwargs.get("cparams", {}) + if isinstance(cparams, blosc2.CParams): + # Convert to dictionary + cparams = asdict(cparams) + aux_cparams.update(cparams) + aux_kwargs["cparams"] = aux_cparams + + aux_dparams = aux_kwargs.get("dparams", {}) + if isinstance(aux_dparams, blosc2.DParams): + # Convert to dictionary + aux_dparams = asdict(aux_dparams) + dparams = kwargs.get("dparams", {}) + if isinstance(dparams, blosc2.DParams): + # Convert to dictionary + dparams = asdict(dparams) + aux_dparams.update(dparams) + aux_kwargs["dparams"] = aux_dparams + + _ = kwargs.pop("cparams", None) + _ = kwargs.pop("dparams", None) + if jit is not None: + aux_kwargs["jit"] = jit + if jit_backend is not None: + aux_kwargs["jit_backend"] = jit_backend + urlpath = kwargs.get("urlpath") + if urlpath is not None and urlpath == aux_kwargs.get( + "urlpath", + ): + raise ValueError("Cannot use the same urlpath for LazyArray and eval NDArray") + _ = aux_kwargs.pop("urlpath", None) + + if "out" in kwargs: # use provided out preferentially + aux_kwargs["_output"] = kwargs.pop("out") + elif hasattr(self, "_output"): + aux_kwargs["_output"] = self._output + aux_kwargs.update(kwargs) + + # aux_kwargs includes self.shape and self.dtype + return chunked_eval(self.func, self.inputs_dict, item, _getitem=False, **aux_kwargs) + + def __getitem__(self, item): + return chunked_eval(self.func, self.inputs_dict, item, _getitem=True, **self.kwargs) + + def save(self, urlpath=None, **kwargs): + """ + Save the :ref:`LazyUDF` on disk. + + Parameters + ---------- + urlpath: str + The path to the file where the LazyUDF will be stored. + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: None + + Notes + ----- + * All operands must be :ref:`NDArray` or :ref:`C2Array` objects stored on + disk or a remote server (i.e. they must have a ``urlpath``). + * When the :ref:`LazyUDF` wraps a :func:`blosc2.dsl_kernel`-decorated + function, the DSL source is preserved verbatim in the saved metadata. + On reload via :func:`blosc2.open`, the function is restored as a full + :class:`~blosc2.dsl_kernel.DSLKernel` so the miniexpr JIT fast path + remains available without any extra work from the caller. + """ + if urlpath is None: + raise ValueError("To save a LazyArray you must provide an urlpath") + + meta = kwargs.get("meta", {}) + meta["LazyArray"] = LazyArrayEnum.UDF.value + kwargs["urlpath"] = urlpath + kwargs["meta"] = meta + kwargs["mode"] = "w" # always overwrite the file in urlpath + + # Create an empty array; useful for providing the shape and dtype of the outcome + array = blosc2.empty(shape=self.shape, dtype=self.dtype, **kwargs) + + # Save the expression and operands in the metadata + operands = {} + operands_ = self.inputs_dict + for i, (_key, value) in enumerate(operands_.items()): + pos_key = f"o{i}" # always use positional keys for consistent loading + if isinstance(value, blosc2.C2Array): + operands[pos_key] = { + "path": str(value.path), + "urlbase": value.urlbase, + } + continue + if isinstance(value, blosc2.Proxy): + # Take the required info from the Proxy._cache container + value = value._cache + if not hasattr(value, "schunk"): + raise ValueError( + "To save a LazyArray, all operands must be blosc2.NDArray or blosc2.C2Array objects" + ) + if value.schunk.urlpath is None: + raise ValueError("To save a LazyArray, all operands must be stored on disk/network") + operands[pos_key] = value.schunk.urlpath + udf_func = self.func.func if isinstance(self.func, DSLKernel) else self.func + udf_name = getattr(udf_func, "__name__", self.func.__name__) + try: + udf_source = textwrap.dedent(inspect.getsource(udf_func)).lstrip() + except Exception: + udf_source = None + meta = { + "UDF": udf_source, + "operands": operands, + "name": udf_name, + } + if isinstance(self.func, DSLKernel) and self.func.dsl_source is not None: + meta["dsl_source"] = self.func.dsl_source + array.schunk.vlmeta["_LazyArray"] = meta + + +def _numpy_eval_expr(expression, operands, prefer_blosc=False): + npops = { + key: np.ones(np.ones(len(value.shape), dtype=int), dtype=value.dtype) + if hasattr(value, "shape") + else value + for key, value in operands.items() + } + if prefer_blosc: + # convert blosc arrays to small dummies + ops = { + key: blosc2.ones((1,) * len(value.shape), dtype=value.dtype) + if hasattr(value, "chunks") + else value # some of these could be numpy arrays + for key, value in operands.items() + } + # change numpy arrays + ops = { + key: np.ones((1,) * len(value.shape), dtype=value.dtype) + if isinstance(value, np.ndarray) + else value + for key, value in ops.items() + } + else: # wasm pathway assumes numpy arrs + ops = npops + + # Create a globals dict with blosc2 version of functions preferentially + # (default to numpy func if not implemented in blosc2) + if prefer_blosc: + _globals = get_expr_globals(expression) + _globals |= dtype_symbols + else: + _globals = safe_numpy_globals + try: + _out = eval(expression, _globals, ops) + except RuntimeWarning: + # Sometimes, numpy gets a RuntimeWarning when evaluating expressions + # with synthetic operands (1's). Let's try with numexpr, which is not so picky + # about this. + ops = npops if blosc2.IS_WASM else ops + _out = ne_evaluate(expression, local_dict=ops) + return _out + + +def lazyudf( + func: Callable[[tuple, np.ndarray, tuple[int]], None], + inputs: Sequence[Any] | None, + dtype: np.dtype, + shape: tuple | list | None = None, + chunked_eval: bool = True, + jit: bool | None = None, + jit_backend: str | None = None, + **kwargs: Any, +) -> LazyUDF: + """ + Get a LazyUDF from a python user-defined function. + + Parameters + ---------- + func: Python function + The user-defined function to apply to each block. This function will + always receive the following parameters: + - `inputs_tuple`: A tuple containing the corresponding slice for the block of each input + in :paramref:`inputs`. + - `output`: The buffer to be filled as a multidimensional numpy.ndarray. + - `offset`: The multidimensional offset corresponding to the start of the block being computed: + ``` + def myudf(inputs_tuple, output, offset): + x, y = inputs_tuple + ... + output[:] = result + ``` + inputs: Sequence[Any] or None + The sequence of inputs. Besides objects compliant with the blosc2.Array protocol, + any other object is supported too, and it will be passed as-is to the + user-defined function. If not needed, this can be empty, but `shape` must + be provided. + dtype: np.dtype + The resulting ndarray dtype in NumPy format. + shape: tuple, optional + The shape of the resulting array. If None, the shape will be guessed from inputs. + chunked_eval: bool, optional + Whether to evaluate the function in chunks or not (blocks). + jit: bool or None, optional + JIT policy for miniexpr-backed execution: + ``None`` uses default behavior (currently, JIT is tried out), ``True`` prefers JIT, ``False`` disables JIT. + jit_backend: {"tcc", "cc"} or None, optional + JIT backend selection for miniexpr-backed execution: + ``None`` uses backend defaults (currently "tcc"), ``"tcc"`` forces libtcc, ``"cc"`` forces C compiler backend. + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + These arguments will be used by the :meth:`LazyArray.__getitem__` and + :meth:`LazyArray.compute` methods. The + last one will ignore the `urlpath` parameter passed in this function. + In addition, one may provide ``in_place``, a bool (default False), which indicates whether + the function should modify the output directly, (rather than chunks of the output, which are later written to output): + ``` + def inplace_udf(inputs_tuple, output, offset): + x, y = inputs_tuple + ... + out[3] += 1 + ``` + + Returns + ------- + out: :ref:`LazyUDF` + A :ref:`LazyUDF` is returned. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> dtype = np.float64 + >>> shape = [3, 3] + >>> size = shape[0] * shape[1] + >>> a = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape) + >>> b = np.linspace(10, 20, num=size, dtype=dtype).reshape(shape) + >>> a1 = blosc2.asarray(a) + >>> b1 = blosc2.asarray(b) + >>> # Define a user-defined function that will be applied to each block of data + >>> def my_function(inputs_tuple, output, offset): + >>> a, b = inputs_tuple + >>> output[:] = a + b + >>> # Create a LazyUDF object using the user-defined function + >>> lazy_udf = blosc2.lazyudf(my_function, [a1, b1], dtype) + >>> type(lazy_udf) + + >>> f"Result of LazyUDF evaluation: {lazy_udf[:]}" + Result of LazyUDF evaluation: + [[10. 12.5 15. ] + [17.5 20. 22.5] + [25. 27.5 30. ]] + """ + if isinstance(func, DSLKernel) and func.dsl_error is not None: + udf_name = getattr(func.func, "__name__", func.__name__) + raise DSLSyntaxError(f"Invalid DSL kernel '{udf_name}'.\n{func.dsl_error}") from None + return LazyUDF(func, inputs, dtype, shape, chunked_eval, jit, jit_backend, **kwargs) + + +def seek_operands(names, local_dict=None, global_dict=None, _frame_depth: int = 2): + """ + Get the arguments based on the names. + """ + call_frame = sys._getframe(_frame_depth) + + clear_local_dict = False + if local_dict is None: + local_dict = call_frame.f_locals + clear_local_dict = True + try: + frame_globals = call_frame.f_globals + if global_dict is None: + global_dict = frame_globals + + # If `call_frame` is the top frame of the interpreter we can't clear its + # `local_dict`, because it is actually the `global_dict`. + clear_local_dict = clear_local_dict and frame_globals is not local_dict + + op_dict = {} + for name in names: + try: + a = local_dict[name] + except KeyError: + a = global_dict[name] + op_dict[name] = a + finally: + # If we generated local_dict via an explicit reference to f_locals, + # clear the dict to prevent creating extra ref counts in the caller's scope + if clear_local_dict and hasattr(local_dict, "clear"): + local_dict.clear() + + return op_dict + + +def lazyexpr( + expression: str | bytes | LazyArray | blosc2.NDArray, + operands: dict | None = None, + out: blosc2.Array = None, + where: tuple | list | None = None, + local_dict: dict | None = None, + global_dict: dict | None = None, + ne_args: dict | None = None, + _frame_depth: int = 2, +) -> LazyExpr: + """ + Get a LazyExpr from an expression. + + Parameters + ---------- + expression: str or bytes or LazyExpr or NDArray + The expression to evaluate. This can be any valid expression that numexpr + can ingest. If a LazyExpr is passed, the expression will be + updated with the new operands. + operands: dict[blosc2.Array], optional + The dictionary with operands. Supported values are Python scalars, + or any instance that is blosc2.Array compliant. + If None, the operands will be seeked in the local and global dictionaries. + out: blosc2.Array, optional + The output array where the result will be stored. If not provided, + a new NumPy array will be created and returned. + where: tuple, list, optional + A sequence of arguments for the where clause in the expression. + local_dict: dict, optional + The local dictionary to use when looking for operands in the expression. + If not provided, the local dictionary of the caller will be used. + global_dict: dict, optional + The global dictionary to use when looking for operands in the expression. + If not provided, the global dictionary of the caller will be used. + ne_args: dict, optional + Additional arguments to be passed to `numexpr.evaluate()` function. + _frame_depth: int, optional + The depth of the frame to use when looking for operands in the expression. + The default value is 2. + + + Returns + ------- + out: :ref:`LazyExpr` + A :ref:`LazyExpr` is returned. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> dtype = np.float64 + >>> shape = [3, 3] + >>> size = shape[0] * shape[1] + >>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) + >>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) + >>> a1 = blosc2.asarray(a) + >>> a1[:] + [[0. 0.625 1.25 ] + [1.875 2.5 3.125] + [3.75 4.375 5. ]] + >>> b1 = blosc2.asarray(b) + >>> expr = 'a * b + 2' + >>> operands = { 'a': a1, 'b': b1 } + >>> lazy_expr = blosc2.lazyexpr(expr, operands=operands) + >>> f"Lazy expression created: {lazy_expr}" + Lazy expression created: a * b + 2 + >>> lazy_expr[:] + [[ 2. 2.390625 3.5625 ] + [ 5.515625 8.25 11.765625] + [16.0625 21.140625 27. ]] + """ + if isinstance(expression, LazyExpr): + if operands is not None: + expression.operands.update(operands) + if out is not None: + expression._output = out + expression._ne_args = ne_args + if where is not None: + where_args = {"_where_x": where[0], "_where_y": where[1]} + expression._where_args = where_args + return expression + elif isinstance(expression, blosc2.NDArray): + operands = {"o0": expression} + return LazyExpr._new_expr("o0", operands, guess=False, out=out, where=where, ne_args=ne_args) + + if operands is None: + # Try to get operands from variables in the stack + operand_set = get_expr_operands(expression) + # If no operands are found, raise an error + if operand_set: + # Look for operands in the stack + operands = seek_operands(operand_set, local_dict, global_dict, _frame_depth=_frame_depth) + else: + # No operands found in the expression. Maybe a constructor? + constructor = any(_has_constructor_call(expression, constructor) for constructor in constructors) + if not constructor: + raise ValueError("No operands nor constructors found in the expression") + # _new_expr will take care of the constructor, but needs an empty dict in operands + operands = {} + + return LazyExpr._new_expr(expression, operands, guess=True, out=out, where=where, ne_args=ne_args) + + +def _reconstruct_lazyudf(expr, lazyarray, operands_dict, array): + """Reconstruct a LazyUDF (including DSL kernels) from saved metadata.""" + local_ns = {} + name = lazyarray["name"] + filename = f"<{name}>" # any unique name + SAFE_GLOBALS = { + "__builtins__": {k: v for k, v in builtins.__dict__.items() if k != "__import__"}, + "np": np, + "blosc2": blosc2, + } + if blosc2._HAS_NUMBA: + SAFE_GLOBALS["numba"] = numba + + # Register the source so inspect can find it + linecache.cache[filename] = (len(expr), None, expr.splitlines(True), filename) + + exec(compile(expr, filename, "exec"), SAFE_GLOBALS, local_ns) + func = local_ns[name] + # If the saved LazyUDF was a DSL kernel, re-wrap and restore the dsl_source + if "dsl_source" in lazyarray: + if not isinstance(func, DSLKernel): + func = DSLKernel(func) + if func.dsl_source is None: + # Re-extraction from linecache failed; use the saved verbatim dsl_source + func.dsl_source = lazyarray["dsl_source"] + # TODO: make more robust for general kwargs (not just cparams) + return blosc2.lazyudf( + func, + tuple(operands_dict[f"o{n}"] for n in range(len(operands_dict))), + shape=array.shape, + dtype=array.dtype, + cparams=array.cparams, + ) + + +def _open_lazyarray(array): + value = array.schunk.meta["LazyArray"] + lazyarray = array.schunk.vlmeta["_LazyArray"] + if value == LazyArrayEnum.Expr.value: + expr = lazyarray["expression"] + elif value == LazyArrayEnum.UDF.value: + expr = lazyarray["UDF"] + else: + raise ValueError("Argument `array` is not LazyExpr or LazyUDF instance.") + + operands = lazyarray["operands"] + parent_path = Path(array.schunk.urlpath).parent + operands_dict = {} + missing_ops = {} + for key, v in operands.items(): + if isinstance(v, str): + v = parent_path / v + try: + op = blosc2.open(v) + except FileNotFoundError: + missing_ops[key] = v + else: + operands_dict[key] = op + elif isinstance(v, dict): + # C2Array + operands_dict[key] = blosc2.C2Array( + pathlib.Path(v["path"]).as_posix(), + urlbase=v["urlbase"], + ) + else: + raise TypeError("Error when retrieving the operands") + + if missing_ops: + exc = exceptions.MissingOperands(expr, missing_ops) + exc.expr = expr + exc.missing_ops = missing_ops + raise exc + + # LazyExpr + if value == LazyArrayEnum.Expr.value: + new_expr = LazyExpr._new_expr(expr, operands_dict, guess=True, out=None, where=None) + elif value == LazyArrayEnum.UDF.value: + new_expr = _reconstruct_lazyudf(expr, lazyarray, operands_dict, array) + + # Make the array info available for the user (only available when opened from disk) + new_expr.array = array + # We want to expose schunk too, so that .info() can be used on the LazyArray + new_expr.schunk = array.schunk + return new_expr + + +# Mimim numexpr's evaluate function +def evaluate( + ex: str, + local_dict: dict | None = None, + global_dict: dict | None = None, + out: blosc2.Array = None, + **kwargs: Any, +) -> blosc2.Array: + """ + Evaluate a string expression using the Blosc2 compute engine. + + This is a drop-in replacement for `numexpr.evaluate()`, but using the + Blosc2 compute engine. This allows for: + + 1) Use more functionality (e.g. reductions) than numexpr. + 2) Follow casting rules of NumPy more closely. + 3) Use both NumPy arrays and Blosc2 NDArrays in the same expression. + + As NDArrays can be on-disk, the expression can be evaluated without loading + the whole array into memory (i.e. using an out-of-core approach). + + Parameters + ---------- + ex: str + The expression to evaluate. + local_dict: dict, optional + The local dictionary to use when looking for operands in the expression. + If not provided, the local dictionary of the caller will be used. + global_dict: dict, optional + The global dictionary to use when looking for operands in the expression. + If not provided, the global dictionary of the caller will be used. + out: blosc2.Array, optional + The output array where the result will be stored. If not provided, + a new NumPy array will be created and returned. + kwargs: Any, optional + Additional arguments to be passed to `numexpr.evaluate()` function. + + Returns + ------- + out: blosc2.Array + The result of the expression evaluation. If out is provided, the result + will be stored in out and returned at the same time. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> dtype = np.float64 + >>> shape = [3, 3] + >>> size = shape[0] * shape[1] + >>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) + >>> b = blosc2.linspace(0, 5, num=size, dtype=dtype, shape=shape) + >>> expr = 'a * b + 2' + >>> out = blosc2.evaluate(expr) + >>> out + [[ 2. 2.390625 3.5625 ] + [ 5.515625 8.25 11.765625] + [16.0625 21.140625 27. ]] + """ + lexpr = lazyexpr( + ex, local_dict=local_dict, global_dict=global_dict, out=out, ne_args=kwargs, _frame_depth=3 + ) + if out is not None: + # The user specified an output array + return lexpr.compute() + # The user did not specify an output array, so return a NumPy array + return lexpr[()] + + +if __name__ == "__main__": + from time import time + + # Create initial containers + na1 = np.linspace(0, 10, 10_000_000, dtype=np.float64) + a1 = blosc2.asarray(na1) + na2 = np.copy(na1) + a2 = blosc2.asarray(na2) + na3 = np.copy(na1) + a3 = blosc2.asarray(na3) + na4 = np.copy(na1) + a4 = blosc2.asarray(na4) + # Interesting slice + # sl = None + sl = slice(0, 10_000) + # Create a simple lazy expression + expr = a1 + a2 + print(expr) + t0 = time() + nres = na1 + na2 + print(f"Elapsed time (numpy, [:]): {time() - t0:.3f} s") + t0 = time() + nres = ne_evaluate("na1 + na2") + print(f"Elapsed time (numexpr, [:]): {time() - t0:.3f} s") + nres = nres[sl] if sl is not None else nres + t0 = time() + res = expr.compute(item=sl) + print(f"Elapsed time (evaluate): {time() - t0:.3f} s") + res = res[sl] if sl is not None else res[:] + t0 = time() + res2 = expr[sl] + print(f"Elapsed time (getitem): {time() - t0:.3f} s") + np.testing.assert_allclose(res, nres) + np.testing.assert_allclose(res2, nres) + + # Complex lazy expression + expr = blosc2.tan(a1) * (blosc2.sin(a2) * blosc2.sin(a2) + blosc2.cos(a3)) + (blosc2.sqrt(a4) * 2) + # expr = blosc2.sin(a1) + 2 * a1 + 1 + expr += 2 + print(expr) + t0 = time() + nres = np.tan(na1) * (np.sin(na2) * np.sin(na2) + np.cos(na3)) + (np.sqrt(na4) * 2) + 2 + # nres = np.sin(na1[:]) + 2 * na1[:] + 1 + 2 + print(f"Elapsed time (numpy, [:]): {time() - t0:.3f} s") + t0 = time() + nres = ne_evaluate("tan(na1) * (sin(na2) * sin(na2) + cos(na3)) + (sqrt(na4) * 2) + 2") + print(f"Elapsed time (numexpr, [:]): {time() - t0:.3f} s") + nres = nres[sl] if sl is not None else nres + t0 = time() + res = expr.compute(sl) + print(f"Elapsed time (evaluate): {time() - t0:.3f} s") + res = res[sl] if sl is not None else res[:] + t0 = time() + res2 = expr[sl] + print(f"Elapsed time (getitem): {time() - t0:.3f} s") + np.testing.assert_allclose(res, nres) + np.testing.assert_allclose(res2, nres) + print("Everything is working fine") diff --git a/venv/Lib/site-packages/blosc2/lib/blosc2.lib b/venv/Lib/site-packages/blosc2/lib/blosc2.lib new file mode 100644 index 0000000..fd17771 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/lib/blosc2.lib differ diff --git a/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Blosc2Config.cmake b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Blosc2Config.cmake new file mode 100644 index 0000000..f0c8fa6 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Blosc2Config.cmake @@ -0,0 +1,133 @@ +# only add PUBLIC dependencies as well +# https://cmake.org/cmake/help/latest/manual/cmake-packages.7.html#creating-a-package-configuration-file +include(CMakeFindDependencyMacro) + +# Search in _ROOT: +# https://cmake.org/cmake/help/v3.12/policy/CMP0074.html +if(POLICY CMP0074) + cmake_policy(SET CMP0074 NEW) +endif() + +# locate the installed FindABC.cmake modules +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/Modules") + +# this section stores which configuration options were set +set(HAVE_THREADS ON) +set(HAVE_IPP ) +set(HAVE_LZ4_CONFIG ) +set(HAVE_ZLIB_NG TRUE) +set(HAVE_ZLIB_NG_CONFIG ) +set(HAVE_ZSTD_CONFIG ) +set(DEACTIVATE_IPP ON) +set(DEACTIVATE_ZLIB OFF) +set(DEACTIVATE_ZSTD OFF) +set(PREFER_EXTERNAL_LZ4 OFF) +set(PREFER_EXTERNAL_ZLIB OFF) +set(PREFER_EXTERNAL_ZSTD OFF) + +# find dependencies and their targets, which are used in our Blosc2Targets.cmake +# additionally, the Blosc2_..._FOUND variables are used to support +# find_package(Blosc2 ... COMPONENTS ... ...) +# this enables downstream projects to express the need for specific features. +set(CMAKE_THREAD_PREFER_PTHREAD TRUE) # pre 3.1 +set(THREADS_PREFER_PTHREAD_FLAG TRUE) # CMake 3.1+ +if(HAVE_THREADS) + find_dependency(Threads) + set(Blosc2_THREADS_FOUND TRUE) +else() + set(Blosc2_THREADS_FOUND FALSE) +endif() + +if(NOT DEACTIVATE_IPP AND HAVE_IPP) + find_dependency(IPP) + set(Blosc2_IPP_FOUND FALSE) +else() + set(Blosc2_IPP_FOUND TRUE) +endif() + +if(PREFER_EXTERNAL_LZ4 AND HAVE_LZ4_CONFIG) + find_dependency(lz4 CONFIG) +endif() +set(Blosc2_LZ4_FOUND TRUE) + +if(DEACTIVATE_ZLIB) + set(Blosc2_ZLIB_FOUND FALSE) +elseif(NOT DEACTIVATE_ZLIB AND PREFER_EXTERNAL_ZLIB) + if(HAVE_ZLIB_NG) + if (HAVE_ZLIB_NG_CONFIG) + find_dependency(zlib-ng CONFIG) + endif() + else() + find_dependency(ZLIB) + endif() + set(Blosc2_ZLIB_FOUND TRUE) +endif() + +if(DEACTIVATE_ZSTD) + set(Blosc2_ZSTD_FOUND FALSE) +elseif(NOT DEACTIVATE_ZSTD AND PREFER_EXTERNAL_ZSTD) + if(HAVE_ZSTD_CONFIG) + find_dependency(zstd CONFIG) + endif() + set(Blosc2_ZSTD_FOUND TRUE) +endif() + +# define central Blosc2::blosc2_shared/static targets +include("${CMAKE_CURRENT_LIST_DIR}/Blosc2Targets.cmake") + +# check if components are fulfilled and set Blosc2__FOUND vars +# Blosc2_FIND_COMPONENTS is a list set by find_package(... COMPONENTS ... ...) +# likewise Blosc2_FIND_REQUIRED_... per component specified +foreach(comp ${Blosc2_FIND_COMPONENTS}) + if(NOT Blosc2_${comp}_FOUND) + if(Blosc2_FIND_REQUIRED_${comp}) + set(Blosc2_FOUND FALSE) + endif() + endif() +endforeach() + +# Defines imported targets for Blosc2 inside a Python wheel + +# ------------------------------ +# Shared library target +# ------------------------------ +if(NOT TARGET Blosc2::blosc2_shared) + add_library(Blosc2::blosc2_shared SHARED IMPORTED GLOBAL) + + if(WIN32) + # MSVC: import library (.lib) + runtime DLL (.dll) + set_target_properties(Blosc2::blosc2_shared PROPERTIES + IMPORTED_IMPLIB "${CMAKE_CURRENT_LIST_DIR}/../blosc2_shared.lib" + IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_shared.dll" + INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include" + ) + else() + # Linux/macOS + set_target_properties(Blosc2::blosc2_shared PROPERTIES + IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_shared.so" + INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include" + ) + endif() +endif() + +# ------------------------------ +# Static library target +# ------------------------------ +if(NOT TARGET Blosc2::blosc2_static) + add_library(Blosc2::blosc2_static STATIC IMPORTED GLOBAL) + + if(MSVC) + # Windows static library uses .lib + set_target_properties(Blosc2::blosc2_static PROPERTIES + IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_static.lib" + INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include" + ) + else() + # Linux/macOS static library uses .a + set_target_properties(Blosc2::blosc2_static PROPERTIES + IMPORTED_LOCATION "${CMAKE_CURRENT_LIST_DIR}/../blosc2_static.a" + INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/../../include" + ) + endif() +endif() + diff --git a/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Blosc2ConfigVersion.cmake b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Blosc2ConfigVersion.cmake new file mode 100644 index 0000000..cbd66bc --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Blosc2ConfigVersion.cmake @@ -0,0 +1,65 @@ +# This is a basic version file for the Config-mode of find_package(). +# It is used by write_basic_package_version_file() as input file for configure_file() +# to create a version-file which can be installed along a config.cmake file. +# +# The created file sets PACKAGE_VERSION_EXACT if the current version string and +# the requested version string are exactly the same and it sets +# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version, +# but only if the requested major version is the same as the current one. +# The variable CVF_VERSION must be set before calling configure_file(). + + +set(PACKAGE_VERSION "2.23.1") + +if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION) + set(PACKAGE_VERSION_COMPATIBLE FALSE) +else() + + if("2.23.1" MATCHES "^([0-9]+)\\.") + set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}") + if(NOT CVF_VERSION_MAJOR VERSION_EQUAL 0) + string(REGEX REPLACE "^0+" "" CVF_VERSION_MAJOR "${CVF_VERSION_MAJOR}") + endif() + else() + set(CVF_VERSION_MAJOR "2.23.1") + endif() + + if(PACKAGE_FIND_VERSION_RANGE) + # both endpoints of the range must have the expected major version + math (EXPR CVF_VERSION_MAJOR_NEXT "${CVF_VERSION_MAJOR} + 1") + if (NOT PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR + OR ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX_MAJOR STREQUAL CVF_VERSION_MAJOR) + OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX VERSION_LESS_EQUAL CVF_VERSION_MAJOR_NEXT))) + set(PACKAGE_VERSION_COMPATIBLE FALSE) + elseif(PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR + AND ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND PACKAGE_VERSION VERSION_LESS_EQUAL PACKAGE_FIND_VERSION_MAX) + OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION_MAX))) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_COMPATIBLE FALSE) + endif() + else() + if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_COMPATIBLE FALSE) + endif() + + if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + endif() + endif() +endif() + + +# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it: +if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "8" STREQUAL "") + return() +endif() + +# check that the installed version has the same 32/64bit-ness as the one which is currently searching: +if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "8") + math(EXPR installedBits "8 * 8") + set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)") + set(PACKAGE_VERSION_UNSUITABLE TRUE) +endif() diff --git a/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Blosc2Targets-release.cmake b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Blosc2Targets-release.cmake new file mode 100644 index 0000000..43d669f --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Blosc2Targets-release.cmake @@ -0,0 +1,29 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "Blosc2::blosc2_shared" for configuration "Release" +set_property(TARGET Blosc2::blosc2_shared APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(Blosc2::blosc2_shared PROPERTIES + IMPORTED_IMPLIB_RELEASE "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/blosc2.lib" + IMPORTED_LOCATION_RELEASE "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/libblosc2.dll" + ) + +list(APPEND _cmake_import_check_targets Blosc2::blosc2_shared ) +list(APPEND _cmake_import_check_files_for_Blosc2::blosc2_shared "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/blosc2.lib" "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/libblosc2.dll" ) + +# Import target "Blosc2::blosc2_static" for configuration "Release" +set_property(TARGET Blosc2::blosc2_static APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(Blosc2::blosc2_static PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C" + IMPORTED_LOCATION_RELEASE "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/libblosc2.lib" + ) + +list(APPEND _cmake_import_check_targets Blosc2::blosc2_static ) +list(APPEND _cmake_import_check_files_for_Blosc2::blosc2_static "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib/libblosc2.lib" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Blosc2Targets.cmake b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Blosc2Targets.cmake new file mode 100644 index 0000000..d915f19 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Blosc2Targets.cmake @@ -0,0 +1,116 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 3.0.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "3.0.0") + message(FATAL_ERROR "CMake >= 3.0.0 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 3.0.0...3.29) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS Blosc2::blosc2_shared Blosc2::blosc2_static Blosc2::blosc2) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# The installation prefix configured by this project. +set(_IMPORT_PREFIX "C:/Users/runneradmin/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib") + +# Create imported target Blosc2::blosc2_shared +add_library(Blosc2::blosc2_shared SHARED IMPORTED) + +set_target_properties(Blosc2::blosc2_shared PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/include" +) + +# Create imported target Blosc2::blosc2_static +add_library(Blosc2::blosc2_static STATIC IMPORTED) + +set_target_properties(Blosc2::blosc2_static PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/include" + INTERFACE_LINK_LIBRARIES "\$" +) + +# Create imported target Blosc2::blosc2 +add_library(Blosc2::blosc2 INTERFACE IMPORTED) + +set_target_properties(Blosc2::blosc2 PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/include" + INTERFACE_LINK_LIBRARIES "Blosc2::blosc2_static" +) + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/Blosc2Targets-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + if(CMAKE_VERSION VERSION_LESS "3.28" + OR NOT DEFINED _cmake_import_check_xcframework_for_${_cmake_target} + OR NOT IS_DIRECTORY "${_cmake_import_check_xcframework_for_${_cmake_target}}") + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + endif() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# This file does not depend on other imported targets which have +# been exported from the same project but in a separate export set. + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindIPP.cmake b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindIPP.cmake new file mode 100644 index 0000000..3010ae9 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindIPP.cmake @@ -0,0 +1,74 @@ +# Find the Intel IPP (Integrated Performance Primitives) +# +# IPP_FOUND - System has IPP +# IPP_INCLUDE_DIRS - IPP include files directories +# IPP_LIBRARIES - The IPP libraries +# +# The environment variable IPPROOT is used to find the installation location. +# If the environment variable is not set we'll look for it in the default installation locations. +# +# Usage: +# +# find_package(IPP) +# if(IPP_FOUND) +# target_link_libraries(TARGET ${IPP_LIBRARIES}) +# endif() + +find_path(IPP_ROOT_DIR + include/ipp.h + PATHS + $ENV{IPPROOT} + /opt/intel/compilers_and_libraries/linux/ipp + /opt/intel/compilers_and_libraries/mac/ipp + "C:/IntelSWTools/compilers_and_libraries/windows/ipp/" + "C:/Program Files (x86)/IntelSWTools/compilers_and_libraries/windows/ipp" + $ENV{HOME}/intel/ipp + $ENV{HOME}/miniconda3 + $ENV{USERPROFILE}/miniconda3/Library + "C:/Miniconda37-x64/Library" # Making AppVeyor happy +) + +find_path(IPP_INCLUDE_DIR + ipp.h + PATHS + ${IPP_ROOT_DIR}/include + ) + +if(WIN32) + set(IPP_SEARCH_LIB ippcoremt.lib) + set(IPP_LIBS ippcoremt.lib ippsmt.lib ippdcmt.lib) +elseif(APPLE) + set(IPP_SEARCH_LIB libippcore.a) + set(IPP_LIBS libipps.a libippdc.a libippcore.a) +else() # Linux + set(IPP_SEARCH_LIB libippcore.so) + set(IPP_LIBS ipps ippdc ippcore) +endif() + + +find_path(IPP_LIB_SEARCHPATH + ${IPP_SEARCH_LIB} + PATHS + ${IPP_ROOT_DIR}/lib/intel64 + ${IPP_ROOT_DIR}/lib +) + +foreach(LIB ${IPP_LIBS}) + find_library(${LIB}_PATH ${LIB} PATHS ${IPP_LIB_SEARCHPATH}) + if(${LIB}_PATH) + set(IPP_LIBRARIES ${IPP_LIBRARIES} ${${LIB}_PATH}) + set(IPP_FOUND TRUE) + else() + # message(STATUS "Could not find ${LIB}: disabling IPP") + set(IPP_NOTFOUND TRUE) + endif() +endforeach() + +if(IPP_FOUND AND NOT IPP_NOTFOUND) + set(IPP_INCLUDE_DIRS ${IPP_INCLUDE_DIR}) + include_directories(${IPP_INCLUDE_DIRS}) + message(STATUS "Found IPP libraries in: ${IPP_LIBRARIES}") +else() + message(STATUS "No IPP libraries found.") + set(IPP_FOUND FALSE) +endif() diff --git a/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindLZ4.cmake b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindLZ4.cmake new file mode 100644 index 0000000..b95d316 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindLZ4.cmake @@ -0,0 +1,10 @@ +find_path(LZ4_INCLUDE_DIR lz4.h) + +find_library(LZ4_LIBRARY NAMES lz4 liblz4) + +if(LZ4_INCLUDE_DIR AND LZ4_LIBRARY) + set(LZ4_FOUND TRUE) + message(STATUS "Found LZ4 library: ${LZ4_LIBRARY}") +else() + message(STATUS "No LZ4 library found. Using internal sources.") +endif() diff --git a/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindSIMD.cmake b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindSIMD.cmake new file mode 100644 index 0000000..1ce5209 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindSIMD.cmake @@ -0,0 +1,58 @@ +# Check if SSE/AVX instructions are available on the machine where +# the project is compiled. + +if(CMAKE_SYSTEM_NAME MATCHES "Linux") + exec_program(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) + + string(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE "${CPUINFO}") + string(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE) + if(SSE2_TRUE) + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + else() + set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") + endif() + + string(REGEX REPLACE "^.*(avx2).*$" "\\1" SSE_THERE "${CPUINFO}") + string(COMPARE EQUAL "avx2" "${SSE_THERE}" AVX2_TRUE) + if(AVX2_TRUE) + set(AVX2_FOUND true CACHE BOOL "AVX2 available on host") + else() + set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") + endif() + +elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin") + exec_program("/usr/sbin/sysctl -a | grep machdep.cpu.features" OUTPUT_VARIABLE CPUINFO) + string(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE "${CPUINFO}") + string(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE) + if(SSE2_TRUE) + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + else() + set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") + endif() + + exec_program("/usr/sbin/sysctl -a | grep machdep.cpu.leaf7_features" OUTPUT_VARIABLE CPUINFO) + string(REGEX REPLACE "^.*(AVX2).*$" "\\1" SSE_THERE "${CPUINFO}") + string(COMPARE EQUAL "AVX2" "${SSE_THERE}" AVX2_TRUE) + if(AVX2_TRUE) + set(AVX2_FOUND true CACHE BOOL "AVX2 available on host") + else() + set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") + endif() + +elseif(CMAKE_SYSTEM_NAME MATCHES "Windows") + # TODO. For now supposing SSE2 is safe enough + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") +else() + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") +endif() + +if(NOT SSE2_FOUND) + message(STATUS "Could not find hardware support for SSE2 on this machine.") +endif() +if(NOT AVX2_FOUND) + message(STATUS "Could not find hardware support for AVX2 on this machine.") +endif() + +mark_as_advanced(SSE2_FOUND AVX2_FOUND) diff --git a/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindZLIB_NG.cmake b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindZLIB_NG.cmake new file mode 100644 index 0000000..80ae84d --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindZLIB_NG.cmake @@ -0,0 +1,54 @@ +find_path(ZLIB_NG_INCLUDE_DIR NAMES zlib-ng.h) + +if(ZLIB_INCLUDE_DIRS) + set(ZLIB_NG_LIBRARY_DIRS ${ZLIB_NG_INCLUDE_DIR}) + + if("${ZLIB_NG_LIBRARY_DIRS}" MATCHES "/include$") + # Strip off the trailing "/include" in the path. + GET_FILENAME_COMPONENT(ZLIB_NG_LIBRARY_DIRS ${ZLIB_NG_LIBRARY_DIRS} PATH) + endif("${ZLIB_NG_LIBRARY_DIRS}" MATCHES "/include$") + + if(EXISTS "${ZLIB_NG_LIBRARY_DIRS}/lib") + set(ZLIB_NG_LIBRARY_DIRS ${ZLIB_NG_LIBRARY_DIRS}/lib) + endif(EXISTS "${ZLIB_NG_LIBRARY_DIRS}/lib") +endif() + +find_library(ZLIB_NG_LIBRARY NAMES z-ng libz-ng zlib-ng libz-ng.a) + +set(ZLIB_NG_LIBRARIES ${ZLIB_NG_LIBRARY}) +set(ZLIB_NG_INCLUDE_DIR ${ZLIB_NG_INCLUDE_DIR}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ZLIB_NG DEFAULT_MSG ZLIB_NG_LIBRARY ZLIB_NG_INCLUDE_DIR) + +if(ZLIB_NG_INCLUDE_DIR AND ZLIB_NG_LIBRARIES) + set(ZLIB_NG_FOUND TRUE) +else(ZLIB_NG_INCLUDE_DIR AND ZLIB_NG_LIBRARIES) + set(ZLIB_NG_FOUND FALSE) +endif(ZLIB_NG_INCLUDE_DIR AND ZLIB_NG_LIBRARIES) + +if(ZLIB_NG_FOUND) + message(STATUS "Found zlib-ng: ${ZLIB_NG_LIBRARIES}, ${ZLIB_NG_INCLUDE_DIR}") +endif() + +#[[ +Copyright https://github.com/zlib-ng/minizip-ng, 2021 + +Condition of use and distribution are the same as zlib: + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgement in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +]]# diff --git a/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindZSTD.cmake b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindZSTD.cmake new file mode 100644 index 0000000..2dcc844 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/FindZSTD.cmake @@ -0,0 +1,8 @@ +find_path(ZSTD_INCLUDE_DIR zstd.h) + +find_library(ZSTD_LIBRARY NAMES zstd) + +if(ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY) + set(ZSTD_FOUND TRUE) + message(STATUS "Found ZSTD library: ${ZSTD_LIBRARY}") +endif() diff --git a/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/toolchain-aarch64.cmake b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/toolchain-aarch64.cmake new file mode 100644 index 0000000..0bb1903 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/toolchain-aarch64.cmake @@ -0,0 +1,26 @@ +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR aarch64) +set(CMAKE_SYSTEM_VERSION 1) + +message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}") + +set(CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu") +set(CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu") + +set(CMAKE_CROSSCOMPILING TRUE) +set(CMAKE_CROSSCOMPILING_EMULATOR qemu-aarch64 -L /usr/${CMAKE_C_COMPILER_TARGET}/) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) +if(NOT C_COMPILER_FULL_PATH) + message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") +endif() +set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) + +find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) +if(CXX_COMPILER_FULL_PATH) + set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) +endif() diff --git a/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/toolchain-armhf.cmake b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/toolchain-armhf.cmake new file mode 100644 index 0000000..85db326 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/toolchain-armhf.cmake @@ -0,0 +1,27 @@ +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR arm) +set(CMAKE_SYSTEM_VERSION 1) + +message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}") + +set(CMAKE_C_COMPILER_TARGET arm-linux-gnueabihf) +set(CMAKE_CXX_COMPILER_TARGET arm-linux-gnueabihf) + +set(CMAKE_CROSSCOMPILING TRUE) +set(CMAKE_CROSSCOMPILING_EMULATOR qemu-arm -L /usr/${CMAKE_C_COMPILER_TARGET}/) + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + +find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) +if(NOT C_COMPILER_FULL_PATH) + message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") +endif() +set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) + +find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) +if(CXX_COMPILER_FULL_PATH) + set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) +endif() diff --git a/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/toolchain-armsf.cmake b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/toolchain-armsf.cmake new file mode 100644 index 0000000..84cd3e5 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/cmake/blosc2/Modules/toolchain-armsf.cmake @@ -0,0 +1,31 @@ +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR arm) +set(CMAKE_SYSTEM_VERSION 1) + +message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}") + +if(NOT DEFINED CMAKE_C_COMPILER_TARGET) + set(CMAKE_C_COMPILER_TARGET arm-linux-gnueabi) +endif() +if(NOT DEFINED CMAKE_CXX_COMPILER_TARGET) + set(CMAKE_CXX_COMPILER_TARGET arm-linux-gnueabi) +endif() + +set(CMAKE_CROSSCOMPILING TRUE) +set(CMAKE_CROSSCOMPILING_EMULATOR qemu-arm -L /usr/${CMAKE_C_COMPILER_TARGET}/) + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + +find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) +if(NOT C_COMPILER_FULL_PATH) + message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") +endif() +set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) + +find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) +if(CXX_COMPILER_FULL_PATH) + set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) +endif() diff --git a/venv/Lib/site-packages/blosc2/lib/libblosc2.dll b/venv/Lib/site-packages/blosc2/lib/libblosc2.dll new file mode 100644 index 0000000..9c42d1b Binary files /dev/null and b/venv/Lib/site-packages/blosc2/lib/libblosc2.dll differ diff --git a/venv/Lib/site-packages/blosc2/lib/libblosc2.lib b/venv/Lib/site-packages/blosc2/lib/libblosc2.lib new file mode 100644 index 0000000..f53ae81 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/lib/libblosc2.lib differ diff --git a/venv/Lib/site-packages/blosc2/lib/pkgconfig/blosc2.pc b/venv/Lib/site-packages/blosc2/lib/pkgconfig/blosc2.pc new file mode 100644 index 0000000..571edd1 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/lib/pkgconfig/blosc2.pc @@ -0,0 +1,11 @@ +libdir=C:/Users/runneradmin/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/lib +includedir=C:/Users/runneradmin/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/C:/Users/RUNNER~1/AppData/Local/Temp/tmpz5scj3ge/wheel/platlib/blosc2/include + +Name: blosc2 +Description: A blocking, shuffling and lossless compression library +URL: https://blosc.org/ +Version: 2.23.1 + +Requires: +Libs: -L${libdir} -lblosc2 +Cflags: -I${includedir} diff --git a/venv/Lib/site-packages/blosc2/lib/tcc.dll b/venv/Lib/site-packages/blosc2/lib/tcc.dll new file mode 100644 index 0000000..4864106 Binary files /dev/null and b/venv/Lib/site-packages/blosc2/lib/tcc.dll differ diff --git a/venv/Lib/site-packages/blosc2/linalg.py b/venv/Lib/site-packages/blosc2/linalg.py new file mode 100644 index 0000000..b1bda5e --- /dev/null +++ b/venv/Lib/site-packages/blosc2/linalg.py @@ -0,0 +1,822 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from __future__ import annotations + +import builtins +import math +import warnings +from itertools import product +from typing import TYPE_CHECKING, Any + +import numpy as np + +import blosc2 + +from .utils import get_intersecting_chunks, nptranspose, npvecdot, slice_to_chunktuple + +if TYPE_CHECKING: + from collections.abc import Sequence + + +def matmul(x1: blosc2.Array, x2: blosc2.NDArray, **kwargs: Any) -> blosc2.NDArray: + """ + Computes the matrix product between two Blosc2 NDArrays. + + Parameters + ---------- + x1: :ref:`NDArray` | np.ndarray + The first input array. + x2: :ref:`NDArray` | np.ndarray + The second input array. + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + The matrix product of the inputs. This is a scalar only when both x1, + x2 are 1-d vectors. + + Raises + ------ + ValueError + If the last dimension of ``x1`` is not the same size as + the second-to-last dimension of ``x2``. + + If a scalar value is passed in. + + References + ---------- + `numpy.matmul `_ + + Examples + -------- + For 2-D arrays it is the matrix product: + + >>> import numpy as np + >>> import blosc2 + >>> a = np.array([[1, 2], + ... [3, 4]]) + >>> nd_a = blosc2.asarray(a) + >>> b = np.array([[2, 3], + ... [2, 1]]) + >>> nd_b = blosc2.asarray(b) + >>> blosc2.matmul(nd_a, nd_b) + array([[ 6, 5], + [14, 13]]) + + For 2-D mixed with 1-D, the result is the usual. + + >>> a = np.array([[1, 3], + ... [0, 1]]) + >>> nd_a = blosc2.asarray(a) + >>> v = np.array([1, 2]) + >>> nd_v = blosc2.asarray(v) + >>> blosc2.matmul(nd_a, nd_v) + array([7, 2]) + >>> blosc2.matmul(nd_v, nd_a) + array([1, 5]) + + """ + # Validate arguments are not scalars + if np.isscalar(x1) or np.isscalar(x2): + raise ValueError("Arguments can't be scalars.") + + # Makes a SimpleProxy if inputs are not blosc2 arrays + x1, x2 = blosc2.as_simpleproxy(x1, x2) + + # Validate matrix multiplication compatibility + if x1.shape[builtins.max(-1, -len(x2.shape))] != x2.shape[builtins.max(-2, -len(x2.shape))]: + raise ValueError("Shapes are not aligned for matrix multiplication.") + + # Promote 1D arrays to 2D if necessary + x1_is_vector = False + x2_is_vector = False + if x1.ndim == 1: + x1 = blosc2.expand_dims(x1, axis=0) # (N,) -> (1, N) + x1_is_vector = True + if x2.ndim == 1: + x2 = blosc2.expand_dims(x2, axis=1) # (M,) -> (M, 1) + x2_is_vector = True + + n, k = x1.shape[-2:] + m = x2.shape[-1] + result_shape = np.broadcast_shapes(x1.shape[:-2], x2.shape[:-2]) + (n, m) + # For matmul, we don't want to reduce the chunksize, as experiments show that + # the larger, the better (as long as some limits are not exceeded). + kwargs["_chunksize_reduc_factor"] = 1 + result = blosc2.zeros(result_shape, dtype=blosc2.result_type(x1, x2), **kwargs) + + if 0 not in result.shape + x1.shape + x2.shape: # if any array is empty, return array of 0s + p, q = result.chunks[-2:] + r = x2.chunks[-1] + + intersecting_chunks = get_intersecting_chunks((), result.shape[:-2], result.chunks[:-2]) + for chunk in intersecting_chunks: + chunk = chunk.raw + for row in range(0, n, p): + row_end = builtins.min(row + p, n) + for col in range(0, m, q): + col_end = builtins.min(col + q, m) + for aux in range(0, k, r): + aux_end = builtins.min(aux + r, k) + bx1 = ( + x1[chunk[-x1.ndim + 2 :] + (slice(row, row_end), slice(aux, aux_end))] + if x1.ndim > 2 + else x1[row:row_end, aux:aux_end] + ) + bx2 = ( + x2[chunk[-x2.ndim + 2 :] + (slice(aux, aux_end), slice(col, col_end))] + if x2.ndim > 2 + else x2[aux:aux_end, col:col_end] + ) + result[chunk + (slice(row, row_end), slice(col, col_end))] += np.matmul(bx1, bx2) + + if x1_is_vector: + result = result.squeeze(axis=-2) + if x2_is_vector: + result = result.squeeze(axis=-1) + + return result + + +def tensordot( + x1: blosc2.NDArray, + x2: blosc2.NDArray, + axes: int | tuple[Sequence[int], Sequence[int]] = 2, + **kwargs: Any, +) -> blosc2.NDArray: + """ + Returns a tensor contraction of x1 and x2 over specific axes. The tensordot function corresponds to the + generalized matrix product. Note: Neither argument is complex-conjugated or transposed. If conjugation and/or transposition is desired, these operations should be explicitly + performed prior to computing the generalized matrix product. + + Parameters + ---------- + x1: blosc2.NDArray + First input array. Should have a numeric data type. + + x2: blosc2.NDArray + Second input array. Should have a numeric data type. Corresponding contracted axes of x1 and x2 + must be equal. + + axes: int | tuple[Sequence[int], Sequence[int]] + Number of axes (dimensions) to contract or explicit sequences of axis (dimension) indices for x1 and x2, + respectively. + + * If axes is an int equal to N, then contraction is performed over the last N axes of x1 and the first N axes of x2 in order. The size of each corresponding axis (dimension) must match. Must be nonnegative. + + * If N equals 0, the result is the tensor (outer) product. + + * If N equals 1, the result is the tensor dot product. + + * If N equals 2, the result is the tensor double contraction (default). + + * If axes is a tuple of two sequences (x1_axes, x2_axes), the first sequence applies to x1 and the second sequence to x2. + Both sequences must have the same length. Each axis (dimension) x1_axes[i] for x1 must have the same size as the respective + axis (dimension) x2_axes[i] for x2. Each index referred to in a sequence must be unique. If x1 has rank (i.e, number of dimensions) N, + a valid x1 axis must reside on the half-open interval [-N, N). If x2 has rank M, a valid x2 axis must reside on the half-open interval [-M, M). + + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: blosc2.NDArray + An array containing the tensor contraction whose shape consists of the non-contracted axes (dimensions) of the first array x1, followed by + the non-contracted axes (dimensions) of the second array x2. + """ + fast_path = kwargs.pop("fast_path", None) # for testing purposes + # TODO: add fast path for when don't need to change chunkshapes + + # Makes a SimpleProxy if inputs are not blosc2 arrays + x1, x2 = blosc2.as_simpleproxy(x1, x2) + + if isinstance(axes, tuple): + a_axes, b_axes = axes + a_axes = list(a_axes) + b_axes = list(b_axes) + if len(a_axes) != len(b_axes): + raise ValueError("Lengths of reduction axes for x1 and x2 must be equal!") + # need to track order of b_axes; later we cycle through a_axes sorted for op_chunk + # a_sorted[inv_sort][b_sort] matches b_sorted since b_axes matches a_axes + inv_sort = np.argsort(np.argsort(a_axes)) + b_sort = np.argsort(b_axes) + order = inv_sort[b_sort] + a_keep, b_keep = [True] * x1.ndim, [True] * x2.ndim + for i, j in zip(a_axes, b_axes, strict=False): + i = x1.ndim + i if i < 0 else i + j = x2.ndim + j if j < 0 else j + a_keep[i] = False + b_keep[j] = False + a_axes = [] if a_axes == () else a_axes # handle no reduction + b_axes = [] if b_axes == () else b_axes # handle no reduction + elif isinstance(axes, int): + if axes < 0: + raise ValueError("Integer axes argument must be nonnegative!") + order = np.arange(axes, dtype=int) # no reordering required + a_axes = list(range(x1.ndim - axes, x1.ndim)) + b_axes = list(range(0, axes)) + a_keep = [i + axes < x1.ndim for i in range(x1.ndim)] + b_keep = [i >= axes for i in range(x2.ndim)] + else: + raise ValueError("Axes argument must be two element tuple of sequences or an integer.") + x1shape = np.array(x1.shape) + x2shape = np.array(x2.shape) + a_chunks_red = tuple(c for i, c in enumerate(x1.chunks) if not a_keep[i]) + a_shape_red = tuple(c for i, c in enumerate(x1.shape) if not a_keep[i]) + + if np.any(x1shape[a_axes] != x2shape[b_axes]): + raise ValueError("x1 and x2 must have same shapes along reduction dimensions") + + result_shape = tuple(x1shape[a_keep]) + tuple(x2shape[b_keep]) + result = blosc2.zeros(result_shape, dtype=blosc2.result_type(x1, x2), **kwargs) + + op_chunks = [ + slice_to_chunktuple(slice(0, s, 1), c) for s, c in zip(x1shape[a_axes], a_chunks_red, strict=True) + ] + res_chunks = [ + slice_to_chunktuple(s, c) + for s, c in zip([slice(0, r, 1) for r in result.shape], result.chunks, strict=True) + ] + a_selection = (slice(None, None, 1),) * x1.ndim + b_selection = (slice(None, None, 1),) * x2.ndim + + chunk_memory = np.prod(result.chunks) * ( + np.prod(x1shape[a_axes]) * x1.dtype.itemsize + np.prod(x2shape[b_axes]) * x2.dtype.itemsize + ) + if chunk_memory < blosc2.MAX_FAST_PATH_SIZE: + fast_path = True if fast_path is None else fast_path + fast_path = False if fast_path is None else fast_path # fast_path set via kwargs for testing + + # adapted from numpy.tensordot + a_keep_axes = [i for i, k in enumerate(a_keep) if k] + b_keep_axes = [i for i, k in enumerate(b_keep) if k] + newaxes_a = a_keep_axes + a_axes + newaxes_b = b_axes + b_keep_axes + + for rchunk in product(*res_chunks): + res_chunk = tuple( + slice(rc * rcs, builtins.min((rc + 1) * rcs, rshape), 1) + for rc, rcs, rshape in zip(rchunk, result.chunks, result.shape, strict=True) + ) + rchunk_iter = iter(res_chunk) + a_selection = tuple(next(rchunk_iter) if a else slice(None, None, 1) for a in a_keep) + b_selection = tuple(next(rchunk_iter) if b else slice(None, None, 1) for b in b_keep) + res_chunks = tuple(s.stop - s.start for s in res_chunk) + for ochunk in product(*op_chunks): + if not fast_path: # operands too big, have to go chunk-by-chunk + op_chunk = tuple( + slice(rc * rcs, builtins.min((rc + 1) * rcs, x1s), 1) + for rc, rcs, x1s in zip(ochunk, a_chunks_red, a_shape_red, strict=True) + ) # use x1 chunk shape to iterate over reduction axes + ochunk_iter = iter(op_chunk) + a_selection = tuple( + next(ochunk_iter) if not a else as_ for as_, a in zip(a_selection, a_keep, strict=True) + ) + # have to permute to match order of a_axes + order_iter = iter(order) + b_selection = tuple( + op_chunk[next(order_iter)] if not b else bs_ + for bs_, b in zip(b_selection, b_keep, strict=True) + ) + bx1 = x1[a_selection] + bx2 = x2[b_selection] + # adapted from numpy tensordot + newshape_a = ( + math.prod([bx1.shape[i] for i in a_keep_axes]), + math.prod([bx1.shape[a] for a in a_axes]), + ) + newshape_b = ( + math.prod([bx2.shape[b] for b in b_axes]), + math.prod([bx2.shape[i] for i in b_keep_axes]), + ) + at = nptranspose(bx1, newaxes_a).reshape(newshape_a) + bt = nptranspose(bx2, newaxes_b).reshape(newshape_b) + res = np.dot(at, bt) + result[res_chunk] += res.reshape(res_chunks) + if fast_path: # already done everything + break + return result + + +def vecdot(x1: blosc2.NDArray, x2: blosc2.NDArray, axis: int = -1, **kwargs) -> blosc2.NDArray: + """ + Computes the (vector) dot product of two arrays. Complex conjugates x1. + + Parameters + ---------- + x1: blosc2.NDArray + First input array. Must have floating-point data type. + + x2: blosc2.NDArray + Second input array. Must be compatible with x1 for all non-contracted axes (via broadcasting). + The size of the axis over which to compute the dot product must be the same size as the respective axis in x1. + Must have a floating-point data type. + + axis: int + The axis (dimension) of x1 and x2 containing the vectors for which to compute the dot product. + Should be an integer on the interval [-N, -1], where N is min(x1.ndim, x2.ndim). Default: -1. + + Returns + ------- + out: blosc2.NDArray + If x1 and x2 are both one-dimensional arrays, a zero-dimensional containing the dot product; + otherwise, a non-zero-dimensional array containing the dot products and having rank N-1, + where N is the rank (number of dimensions) of the shape determined according to broadcasting + along the non-contracted axes. + """ + fast_path = kwargs.pop("fast_path", None) # for testing purposes + # Added this to pass array-api tests (which use internal getitem to check results) + if isinstance(x1, np.ndarray) and isinstance(x2, np.ndarray): + return npvecdot(x1, x2, axis=axis) + + # Makes a SimpleProxy if inputs are not blosc2 arrays + x1, x2 = blosc2.as_simpleproxy(x1, x2) + + N = builtins.min(x1.ndim, x2.ndim) + if axis < -N or axis > -1: + raise ValueError("axis must be on interval [-N,-1].") + a_axes = axis + x1.ndim + b_axes = axis + x2.ndim + a_keep = [True] * x1.ndim + a_keep[a_axes] = False + b_keep = [True] * x2.ndim + b_keep[b_axes] = False + + x1shape = np.array(x1.shape) + x2shape = np.array(x2.shape) + a_chunks_red = x1.chunks[a_axes] + a_shape_red = x1.shape[a_axes] + + if np.any(x1shape[a_axes] != x2shape[b_axes]): + raise ValueError("x1 and x2 must have same shapes along reduction dimensions") + + result_shape = np.broadcast_shapes(x1shape[a_keep], x2shape[b_keep]) + result = blosc2.zeros(result_shape, dtype=blosc2.result_type(x1, x2), **kwargs) + + res_chunks = [ + slice_to_chunktuple(s, c) + for s, c in zip([slice(0, r, 1) for r in result.shape], result.chunks, strict=True) + ] + a_selection = (slice(None, None, 1),) * x1.ndim + b_selection = (slice(None, None, 1),) * x2.ndim + + chunk_memory = np.prod(result.chunks) * ( + x1shape[a_axes] * x1.dtype.itemsize + x2shape[b_axes] * x2.dtype.itemsize + ) + if chunk_memory < blosc2.MAX_FAST_PATH_SIZE: + fast_path = True if fast_path is None else fast_path + fast_path = False if fast_path is None else fast_path # fast_path set via kwargs for testing + + for rchunk in product(*res_chunks): + res_chunk = tuple( + slice(rc * rcs, builtins.min((rc + 1) * rcs, rshape), 1) + for rc, rcs, rshape in zip(rchunk, result.chunks, result.shape, strict=True) + ) + # handle broadcasting - if x1, x2 different ndim, could have to prepend 1s + rchunk_iter = ( + slice(0, 1, 1) if s == 1 else r + for r, s in zip(res_chunk[-x1.ndim + 1 :], x1shape[a_keep], strict=True) + ) + a_selection = tuple(next(rchunk_iter) if a else slice(None, None, 1) for a in a_keep) + rchunk_iter = ( + slice(0, 1, 1) if s == 1 else r + for r, s in zip(res_chunk[-x2.ndim + 1 :], x2shape[b_keep], strict=True) + ) + b_selection = tuple(next(rchunk_iter) if b else slice(None, None, 1) for b in b_keep) + + for ochunk in range(0, a_shape_red, a_chunks_red): + if not fast_path: # operands too big, go chunk-by-chunk + op_chunk = (slice(ochunk, builtins.min(ochunk + a_chunks_red, x1.shape[a_axes]), 1),) + a_selection = a_selection[:a_axes] + op_chunk + a_selection[a_axes + 1 :] + b_selection = b_selection[:b_axes] + op_chunk + b_selection[b_axes + 1 :] + bx1 = x1[a_selection] + bx2 = x2[b_selection] + res = npvecdot(bx1, bx2, axis=axis) # handles conjugation of bx1 + result[res_chunk] += res + if fast_path: # already done everything + break + return result + + +def permute_dims( + arr: blosc2.Array, axes: tuple[int] | list[int] | None = None, **kwargs: Any +) -> blosc2.NDArray: + """ + Permutes the axes (dimensions) of an array. + + Parameters + ---------- + arr: :ref:`blosc2.NDArray` | np.ndarray + The input array. + axes: tuple[int], list[int], optional + The desired permutation of axes. If None, the axes are reversed by default. + If specified, axes must be a tuple or list representing a permutation of + ``[0, 1, ..., N-1]``, where ``N`` is the number of dimensions of the input array. + Negative indices are also supported. The *i*-th axis of the result will correspond + to the axis numbered ``axes[i]`` of the input. + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`blosc2.NDArray` + A Blosc2 :ref:`blosc2.NDArray` with axes transposed. + + Raises + ------ + ValueError + If ``axes`` is not a valid permutation of the dimensions of ``arr``. + + References + ---------- + `numpy.transpose `_ + + `permute_dims `_ + + Examples + -------- + For 2-D arrays it is the matrix transposition as usual: + + >>> import blosc2 + >>> a = blosc2.arange(1, 10).reshape((3, 3)) + >>> a[:] + array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + >>> at = blosc2.permute_dims(a) + >>> at[:] + array([[1, 4, 7], + [2, 5, 8], + [3, 6, 9]]) + + For 3-D arrays: + + >>> import blosc2 + >>> a = blosc2.arange(1, 25).reshape((2, 3, 4)) + >>> a[:] + array([[[ 1, 2, 3, 4], + [ 5, 6, 7, 8], + [ 9, 10, 11, 12]], + [[13, 14, 15, 16], + [17, 18, 19, 20], + [21, 22, 23, 24]]]) + + >>> at = blosc2.permute_dims(a, axes=(1, 0, 2)) + >>> at[:] + array([[[ 1, 2, 3, 4], + [13, 14, 15, 16]], + [[ 5, 6, 7, 8], + [17, 18, 19, 20]], + [[ 9, 10, 11, 12], + [21, 22, 23, 24]]]) + """ + if np.isscalar(arr) or arr.ndim < 2: + return arr + + # Makes a SimpleProxy if input is not blosc2 array + arr = blosc2.as_simpleproxy(arr) + + ndim = arr.ndim + + if axes is None: + axes = tuple(range(ndim))[::-1] + else: + axes = tuple(axis if axis >= 0 else ndim + axis for axis in axes) + if sorted(axes) != list(range(ndim)): + raise ValueError(f"axes {axes} is not a valid permutation of {ndim} dimensions") + + new_shape = tuple(arr.shape[axis] for axis in axes) + if "chunks" not in kwargs or kwargs["chunks"] is None: + kwargs["chunks"] = tuple(arr.chunks[axis] for axis in axes) + + result = blosc2.empty(shape=new_shape, dtype=arr.dtype, **kwargs) + + chunks = arr.chunks + shape = arr.shape + # handle SimpleProxy which doesn't have iterchunks_info + if hasattr(arr, "iterchunks_info"): + my_it = arr.iterchunks_info() + _get_el = lambda x: x.coords # noqa: E731 + else: + my_it = get_intersecting_chunks((), shape, chunks) + _get_el = lambda x: x.raw # noqa: E731 + for info in my_it: + coords = _get_el(info) + start_stop = [ + (coord * chunk, builtins.min(chunk * (coord + 1), dim)) + for coord, chunk, dim in zip(coords, chunks, shape, strict=False) + ] + + src_slice = tuple(slice(start, stop) for start, stop in start_stop) + dst_slice = tuple(slice(start_stop[ax][0], start_stop[ax][1]) for ax in axes) + + transposed = nptranspose(arr[src_slice], axes=axes) + result[dst_slice] = np.ascontiguousarray(transposed) + + return result + + +def transpose(x, **kwargs: Any) -> blosc2.NDArray: + """ + Returns a Blosc2 blosc2.NDArray with axes transposed. + + Only 2D arrays are supported for now. Other dimensions raise an error. + + Parameters + ---------- + x: :ref:`blosc2.NDArray` + The input array. + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`blosc2.NDArray` + The Blosc2 blosc2.NDArray with axes transposed. + + References + ---------- + `numpy.transpose `_ + """ + warnings.warn( + "transpose is deprecated and will be removed in a future version. " + "Use matrix_transpose or permute_dims instead.", + DeprecationWarning, + stacklevel=2, + ) + + # If arguments are dimension < 2, they are returned + if np.isscalar(x) or x.ndim < 2: + return x + # Makes a SimpleProxy if input is not blosc2 array + x = blosc2.as_simpleproxy(x) + # Validate arguments are dimension 2 + if x.ndim > 2: + raise ValueError("Transposing arrays with dimension greater than 2 is not supported yet.") + return permute_dims(x, **kwargs) + + +def matrix_transpose(arr: blosc2.Array, **kwargs: Any) -> blosc2.NDArray: + """ + Transposes a matrix (or a stack of matrices). + + Parameters + ---------- + arr: :ref:`blosc2.NDArray` | np.ndarray + The input blosc2.NDArray having shape ``(..., M, N)`` and whose innermost two dimensions form + ``MxN`` matrices. + + Returns + ------- + out: :ref:`blosc2.NDArray` + A new :ref:`blosc2.NDArray` containing the transpose for each matrix and having shape + ``(..., N, M)``. + """ + axes = None + # Makes a SimpleProxy if input is not blosc2 array + arr = blosc2.as_simpleproxy(arr) + if not np.isscalar(arr) and arr.ndim > 2: + axes = list(range(arr.ndim)) + axes[-2], axes[-1] = axes[-1], axes[-2] + return permute_dims(arr, axes, **kwargs) + + +def diagonal(x: blosc2.blosc2.NDArray, offset: int = 0) -> blosc2.blosc2.NDArray: + """ + Returns the specified diagonals of a matrix (or a stack of matrices) x. + + Parameters + ---------- + x: blosc2.NDArray + Input array having shape (..., M, N) and whose innermost two dimensions form MxN matrices. + + offset: int + Offset specifying the off-diagonal relative to the main diagonal. + + * offset = 0: the main diagonal. + * offset > 0: off-diagonal above the main diagonal. + * offset < 0: off-diagonal below the main diagonal. + + Default: 0. + + Returns + ------- + out: blosc2.NDArray + An array containing the diagonals and whose shape is determined by + removing the last two dimensions and appending a dimension equal to the size of the + resulting diagonals. + + Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.diag.html#diag + """ + # Makes a SimpleProxy if input is not blosc2 array + x = blosc2.as_simpleproxy(x) + n_rows, n_cols = x.shape[-2:] + min_idx = builtins.min(n_rows, n_cols) + if offset < 0: + start = -offset + rows = np.arange(start, builtins.min(start + n_cols, n_rows)) + cols = np.arange(len(rows)) + elif offset > 0: + cols = np.arange(offset, builtins.min(offset + n_rows, n_cols)) + rows = np.arange(len(cols)) + else: + rows = cols = np.arange(min_idx) + key = tuple(slice(None, None, 1) for i in range(x.ndim - 2)) + (rows, cols) + # TODO: change to use slice to give optimised compressing + return blosc2.asarray(x[key]) + + +def outer(x1: blosc2.blosc2.NDArray, x2: blosc2.blosc2.NDArray, **kwargs: Any) -> blosc2.blosc2.NDArray: + """ + Returns the outer product of two vectors x1 and x2. + + Parameters + ---------- + x1: blosc2.NDArray + First one-dimensional input array of size N. Must have a numeric data type. + + x2: blosc2.NDArray + Second one-dimensional input array of size M. Must have a numeric data type. + + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: blosc2.NDArray + A two-dimensional array containing the outer product and whose shape is (N, M). + """ + x1, x2 = blosc2.as_simpleproxy(x1, x2) + if (x1.ndim != 1) or (x2.ndim != 1): + raise ValueError("outer only valid for 1D inputs.") + return tensordot(x1, x2, ((), ()), **kwargs) # for testing purposes + + +def cholesky(x: blosc2.blosc2.NDArray, upper: bool = False) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.cholesky.html#cholesky + # """ + raise NotImplementedError + + +def cross(x1: blosc2.blosc2.NDArray, x2: blosc2.blosc2.NDArray, axis: int = -1) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.cross.html#cross + # """ + raise NotImplementedError + + +def det(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.det.html#det + # """ + raise NotImplementedError + + +def eigh(x: blosc2.blosc2.NDArray) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.eigh.html#eigh + # """ + raise NotImplementedError + + +def eigvalsh(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.eigvalsh.html#eigvalsh + # """ + raise NotImplementedError + + +def inv(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.inv.html#inv + # """ + raise NotImplementedError + + +def matrix_norm( + x: blosc2.blosc2.NDArray, keepdims: bool = False, ord: int | float | str | None = "fro" +) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented but could be doable. ord may take values: + # * 'fro' - Frobenius norm + # * 'nuc' - nuclear norm + # * 1 - max(sum(abs(x), axis=-2)) + # * 2 - largest singular value (sum(x**2, axis=[-1,-2])) + # * inf - max(sum(abs(x), axis=-1)) + # * -1 - min(sum(abs(x), axis=-2)) + # * -2 - smallest singular value + # * -inf - min(sum(abs(x), axis=-1)) + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.matrix_norm.html#matrix_norm + # """ + raise NotImplementedError + + +def matrix_power(x: blosc2.blosc2.NDArray, n: int) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.matrix_power.html#matrix_power + # """ + raise NotImplementedError + + +def matrix_rank( + x: blosc2.blosc2.NDArray, rtol: float | blosc2.blosc2.NDArray | None = None +) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.matrix_rank.html#matrix_rank + # """ + raise NotImplementedError + + +def pinv( + x: blosc2.blosc2.NDArray, rtol: float | blosc2.blosc2.NDArray | None = None +) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.pinv.html#pinv + # """ + raise NotImplementedError + + +def qr( + x: blosc2.blosc2.NDArray, mode: str = "reduced" +) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.qr.html#qr + # """ + raise NotImplementedError + + +def slogdet(x: blosc2.blosc2.NDArray) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.slogdet.html#slogdet + # """ + raise NotImplementedError + + +def solve(x1: blosc2.blosc2.NDArray, x2: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.solve.html#solve + # """ + raise NotImplementedError + + +def svd( + x: blosc2.blosc2.NDArray, full_matrices: bool = True +) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.svd.html#svd + # """ + raise NotImplementedError + + +def svdvals(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.svdvals.html#svdvals + # """ + raise NotImplementedError + + +def trace(x: blosc2.blosc2.NDArray, offset: int = 0, dtype: np.dtype | None = None) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.trace.html#trace + # """ + raise NotImplementedError + + +def vector_norm( + x: blosc2.blosc2.NDArray, + axis: int | tuple[int] | None = None, + keepdims: bool = False, + ord: int | float = 2, +) -> blosc2.blosc2.NDArray: + # """ + # Not Implemented but could be doable. ord may take values: + # * p: int - p-norm + # * inf - max(x) + # * -inf - min(abs(x)) + + # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.vector_norm.html#vector_norm + # """ + raise NotImplementedError diff --git a/venv/Lib/site-packages/blosc2/me_jit_glue.js b/venv/Lib/site-packages/blosc2/me_jit_glue.js new file mode 100644 index 0000000..65ae823 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/me_jit_glue.js @@ -0,0 +1,462 @@ +/* Runtime-agnostic wasm32 JIT JS glue for miniexpr. + * Callers provide runtime facilities via the `runtime` object. + */ +(function(root) { + 'use strict'; + + function _meJitInstantiate(runtime, wasmBytes, bridgeLookupFnIdx) { + if (!runtime || !wasmBytes) { + return 0; + } + + var HEAPF64 = runtime.HEAPF64; + var HEAPF32 = runtime.HEAPF32; + var wasmMemory = runtime.wasmMemory; + var wasmTable = runtime.wasmTable; + var stackSave = runtime.stackSave; + var stackAlloc = runtime.stackAlloc; + var stackRestore = runtime.stackRestore; + var lengthBytesUTF8 = runtime.lengthBytesUTF8; + var stringToUTF8 = runtime.stringToUTF8; + var addFunction = runtime.addFunction; + var err = runtime.err || function(message) { + if (typeof console !== 'undefined' && typeof console.error === 'function') { + console.error(message); + } + }; + + if (!HEAPF64 || !HEAPF32 || !wasmMemory || !wasmTable || + typeof stackSave !== 'function' || typeof stackAlloc !== 'function' || + typeof stackRestore !== 'function' || typeof lengthBytesUTF8 !== 'function' || + typeof stringToUTF8 !== 'function' || typeof addFunction !== 'function') { + err('[me-wasm-jit] invalid runtime object'); + return 0; + } + + var src = wasmBytes; + var enc = new TextEncoder(); + var dec = new TextDecoder(); + /* --- LEB128 helpers ------------------------------------------------- */ + function readULEB(buf, pos) { + var r = 0, s = 0, b; + do { b = buf[pos++]; r |= (b & 0x7f) << s; s += 7; } while (b & 0x80); + return [r, pos]; + } + function encULEB(v) { + var a = []; + do { var b = v & 0x7f; v >>>= 7; if (v) b |= 0x80; a.push(b); } while (v); + return a; + } + function encStr(s) { + var b = enc.encode(s); + return encULEB(b.length).concat(Array.from(b)); + } + function readName(buf, pos) { + var t = readULEB(buf, pos); + var n = t[0]; + pos = t[1]; + var s = dec.decode(buf.subarray(pos, pos + n)); + return [s, pos + n]; + } + function skipLimits(buf, pos) { + var t = readULEB(buf, pos); + var flags = t[0]; + pos = t[1]; + t = readULEB(buf, pos); + pos = t[1]; + if (flags & 0x01) { + t = readULEB(buf, pos); + pos = t[1]; + } + return pos; + } + function encMemoryImport() { + var imp = []; + imp = imp.concat(encStr("env"), encStr("memory")); + imp.push(0x02, 0x00); /* memory, limits-flag: no-max */ + imp = imp.concat(encULEB(256)); + return imp; + } + function buildImportSecWithMemory() { + var body = encULEB(1); + body = body.concat(encMemoryImport()); + var sec = [0x02]; + sec = sec.concat(encULEB(body.length)); + return sec.concat(body); + } + function patchImportSec(secData) { + var pos = 0; + var t = readULEB(secData, pos); + var nimports = t[0]; + pos = t[1]; + var entries = []; + var hasEnvMemory = false; + for (var i = 0; i < nimports; i++) { + var start = pos; + var moduleName = ""; + var fieldName = ""; + t = readName(secData, pos); + moduleName = t[0]; + pos = t[1]; + t = readName(secData, pos); + fieldName = t[0]; + pos = t[1]; + var kind = secData[pos++]; + if (kind === 0x00) { + t = readULEB(secData, pos); + pos = t[1]; + } + else if (kind === 0x01) { + pos++; /* elem type */ + pos = skipLimits(secData, pos); + } + else if (kind === 0x02) { + pos = skipLimits(secData, pos); + if (moduleName === "env" && fieldName === "memory") { + hasEnvMemory = true; + } + } + else if (kind === 0x03) { + pos += 2; /* valtype + mutability */ + } + else { + throw new Error("unsupported wasm import kind " + kind); + } + entries.push(Array.from(secData.subarray(start, pos))); + } + if (!hasEnvMemory) { + entries.push(encMemoryImport()); + } + var body = encULEB(entries.length); + for (var ei = 0; ei < entries.length; ei++) { + body = body.concat(entries[ei]); + } + var sec = [0x02]; + sec = sec.concat(encULEB(body.length)); + return sec.concat(body); + } + function buildEnvImports() { + var bridgeLookup = null; + var bridgeCache = Object.create(null); + if (bridgeLookupFnIdx) { + bridgeLookup = wasmTable.get(bridgeLookupFnIdx); + } + function lookupBridge(name) { + if (!bridgeLookup) { + return null; + } + if (Object.prototype.hasOwnProperty.call(bridgeCache, name)) { + return bridgeCache[name]; + } + var sp = stackSave(); + try { + var nbytes = lengthBytesUTF8(name) + 1; + var namePtr = stackAlloc(nbytes); + stringToUTF8(name, namePtr, nbytes); + var fnIdx = bridgeLookup(namePtr) | 0; + bridgeCache[name] = fnIdx ? wasmTable.get(fnIdx) : null; + } finally { + stackRestore(sp); + } + return bridgeCache[name]; + } + function bindBridge(name, fallback) { + var fn = lookupBridge(name); + return fn ? fn : fallback; + } + function fdim(x, y) { return x > y ? (x - y) : 0.0; } + function copysign(x, y) { + if (y === 0) { + return (1 / y === -Infinity) ? -Math.abs(x) : Math.abs(x); + } + return y < 0 ? -Math.abs(x) : Math.abs(x); + } + function ldexp(x, e) { return x * Math.pow(2.0, e); } + function rint(x) { + if (!isFinite(x)) { + return x; + } + var n = Math.round(x); + if (Math.abs(x - n) === 0.5) { + n = 2 * Math.round(x / 2); + } + return n; + } + function remainder(x, y) { + if (!isFinite(x) || !isFinite(y) || y === 0.0) { + return NaN; + } + return x - y * Math.round(x / y); + } + function erfApprox(x) { + var sign = x < 0 ? -1.0 : 1.0; + x = Math.abs(x); + var a1 = 0.254829592; + var a2 = -0.284496736; + var a3 = 1.421413741; + var a4 = -1.453152027; + var a5 = 1.061405429; + var p = 0.3275911; + var t = 1.0 / (1.0 + p * x); + var y = 1.0 - (((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t) * Math.exp(-x * x); + return sign * y; + } + function erfcApprox(x) { return 1.0 - erfApprox(x); } + function tgammaApprox(z) { + var p = [ + 676.5203681218851, -1259.1392167224028, 771.32342877765313, + -176.61502916214059, 12.507343278686905, -0.13857109526572012, + 9.9843695780195716e-6, 1.5056327351493116e-7 + ]; + if (z < 0.5) { + return Math.PI / (Math.sin(Math.PI * z) * tgammaApprox(1.0 - z)); + } + z -= 1.0; + var x = 0.99999999999980993; + for (var i = 0; i < p.length; i++) { + x += p[i] / (z + i + 1.0); + } + var t = z + p.length - 0.5; + return Math.sqrt(2.0 * Math.PI) * Math.pow(t, z + 0.5) * Math.exp(-t) * x; + } + function lgammaApprox(x) { + var g = tgammaApprox(x); + return Math.log(Math.abs(g)); + } + function nextafterApprox(x, y) { + if (isNaN(x) || isNaN(y)) { + return NaN; + } + if (x === y) { + return y; + } + if (x === 0.0) { + return y > 0.0 ? Number.MIN_VALUE : -Number.MIN_VALUE; + } + var buf = new ArrayBuffer(8); + var dv = new DataView(buf); + dv.setFloat64(0, x, true); + var bits = dv.getBigUint64(0, true); + if ((y > x) === (x > 0.0)) { + bits += 1n; + } + else { + bits -= 1n; + } + dv.setBigUint64(0, bits, true); + return dv.getFloat64(0, true); + } + function meJitExp10(x) { return Math.pow(10.0, x); } + function meJitSinpi(x) { return Math.sin(Math.PI * x); } + function meJitCospi(x) { return Math.cos(Math.PI * x); } + var mathExp2 = Math.exp2 ? Math.exp2 : function(x) { return Math.pow(2.0, x); }; + function meJitLogaddexp(a, b) { + var hi = a > b ? a : b; + var lo = a > b ? b : a; + return hi + Math.log1p(Math.exp(lo - hi)); + } + function meJitWhere(c, x, y) { return c !== 0.0 ? x : y; } + function vecUnaryF64(inPtr, outPtr, n, fn) { + var ii = inPtr >> 3; + var oo = outPtr >> 3; + for (var i = 0; i < n; i++) { + HEAPF64[oo + i] = fn(HEAPF64[ii + i]); + } + } + function vecBinaryF64(aPtr, bPtr, outPtr, n, fn) { + var aa = aPtr >> 3; + var bb = bPtr >> 3; + var oo = outPtr >> 3; + for (var i = 0; i < n; i++) { + HEAPF64[oo + i] = fn(HEAPF64[aa + i], HEAPF64[bb + i]); + } + } + function vecUnaryF32(inPtr, outPtr, n, fn) { + var ii = inPtr >> 2; + var oo = outPtr >> 2; + for (var i = 0; i < n; i++) { + HEAPF32[oo + i] = fn(HEAPF32[ii + i]); + } + } + function vecBinaryF32(aPtr, bPtr, outPtr, n, fn) { + var aa = aPtr >> 2; + var bb = bPtr >> 2; + var oo = outPtr >> 2; + for (var i = 0; i < n; i++) { + HEAPF32[oo + i] = fn(HEAPF32[aa + i], HEAPF32[bb + i]); + } + } + var env = { + memory: wasmMemory, + acos: Math.acos, acosh: Math.acosh, asin: Math.asin, asinh: Math.asinh, + atan: Math.atan, atan2: Math.atan2, atanh: Math.atanh, cbrt: Math.cbrt, + ceil: Math.ceil, copysign: copysign, cos: Math.cos, cosh: Math.cosh, + erf: erfApprox, erfc: erfcApprox, exp: Math.exp, exp2: mathExp2, + expm1: Math.expm1, fabs: Math.abs, fdim: fdim, floor: Math.floor, + fma: function(a, b, c) { return a * b + c; }, fmax: Math.max, fmin: Math.min, + fmod: function(a, b) { return a % b; }, hypot: Math.hypot, ldexp: ldexp, + lgamma: lgammaApprox, log: Math.log, log10: Math.log10, log1p: Math.log1p, + log2: Math.log2, nextafter: nextafterApprox, pow: Math.pow, remainder: remainder, + rint: rint, round: Math.round, sin: Math.sin, sinh: Math.sinh, sqrt: Math.sqrt, + tan: Math.tan, tanh: Math.tanh, tgamma: tgammaApprox, trunc: Math.trunc, + me_jit_exp10: meJitExp10, me_jit_sinpi: meJitSinpi, me_jit_cospi: meJitCospi, + me_jit_logaddexp: meJitLogaddexp, me_jit_where: meJitWhere + }; + env.me_wasm32_cast_int = function(x) { + return x < 0 ? Math.ceil(x) : Math.floor(x); + }; + env.me_wasm32_cast_float = function(x) { + return x; + }; + env.me_wasm32_cast_bool = function(x) { + return x !== 0 ? 1 : 0; + }; + env.memset = bindBridge("memset", function(ptr, value, n) { + if (n > 0) { + HEAPU8.fill(value & 255, ptr, ptr + n); + } + return ptr | 0; + }); + /* Prefer host wasm bridge symbols; keep JS fallbacks for robustness. */ + env.me_jit_exp10 = bindBridge("me_jit_exp10", env.me_jit_exp10); + env.me_jit_sinpi = bindBridge("me_jit_sinpi", env.me_jit_sinpi); + env.me_jit_cospi = bindBridge("me_jit_cospi", env.me_jit_cospi); + env.me_jit_logaddexp = bindBridge("me_jit_logaddexp", env.me_jit_logaddexp); + env.me_jit_where = bindBridge("me_jit_where", env.me_jit_where); + env.me_jit_vec_sin_f64 = bindBridge("me_jit_vec_sin_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.sin); }); + env.me_jit_vec_cos_f64 = bindBridge("me_jit_vec_cos_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.cos); }); + env.me_jit_vec_exp_f64 = bindBridge("me_jit_vec_exp_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.exp); }); + env.me_jit_vec_log_f64 = bindBridge("me_jit_vec_log_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.log); }); + env.me_jit_vec_exp10_f64 = bindBridge("me_jit_vec_exp10_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, meJitExp10); }); + env.me_jit_vec_sinpi_f64 = bindBridge("me_jit_vec_sinpi_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, meJitSinpi); }); + env.me_jit_vec_cospi_f64 = bindBridge("me_jit_vec_cospi_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, meJitCospi); }); + env.me_jit_vec_atan2_f64 = bindBridge("me_jit_vec_atan2_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.atan2); }); + env.me_jit_vec_hypot_f64 = bindBridge("me_jit_vec_hypot_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.hypot); }); + env.me_jit_vec_pow_f64 = bindBridge("me_jit_vec_pow_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.pow); }); + env.me_jit_vec_fmax_f64 = bindBridge("me_jit_vec_fmax_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.max); }); + env.me_jit_vec_fmin_f64 = bindBridge("me_jit_vec_fmin_f64", function(aPtr, bPtr, outPtr, n) { vecBinaryF64(aPtr, bPtr, outPtr, n, Math.min); }); + env.me_jit_vec_expm1_f64 = bindBridge("me_jit_vec_expm1_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.expm1); }); + env.me_jit_vec_log10_f64 = bindBridge("me_jit_vec_log10_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.log10); }); + env.me_jit_vec_sinh_f64 = bindBridge("me_jit_vec_sinh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.sinh); }); + env.me_jit_vec_cosh_f64 = bindBridge("me_jit_vec_cosh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.cosh); }); + env.me_jit_vec_tanh_f64 = bindBridge("me_jit_vec_tanh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.tanh); }); + env.me_jit_vec_asinh_f64 = bindBridge("me_jit_vec_asinh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.asinh); }); + env.me_jit_vec_acosh_f64 = bindBridge("me_jit_vec_acosh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.acosh); }); + env.me_jit_vec_atanh_f64 = bindBridge("me_jit_vec_atanh_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.atanh); }); + env.me_jit_vec_abs_f64 = bindBridge("me_jit_vec_abs_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.abs); }); + env.me_jit_vec_sqrt_f64 = bindBridge("me_jit_vec_sqrt_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.sqrt); }); + env.me_jit_vec_log1p_f64 = bindBridge("me_jit_vec_log1p_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.log1p); }); + env.me_jit_vec_exp2_f64 = bindBridge("me_jit_vec_exp2_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, mathExp2); }); + env.me_jit_vec_log2_f64 = bindBridge("me_jit_vec_log2_f64", function(inPtr, outPtr, n) { vecUnaryF64(inPtr, outPtr, n, Math.log2); }); + env.me_jit_vec_sin_f32 = bindBridge("me_jit_vec_sin_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.sin); }); + env.me_jit_vec_cos_f32 = bindBridge("me_jit_vec_cos_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.cos); }); + env.me_jit_vec_exp_f32 = bindBridge("me_jit_vec_exp_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.exp); }); + env.me_jit_vec_log_f32 = bindBridge("me_jit_vec_log_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.log); }); + env.me_jit_vec_exp10_f32 = bindBridge("me_jit_vec_exp10_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, meJitExp10); }); + env.me_jit_vec_sinpi_f32 = bindBridge("me_jit_vec_sinpi_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, meJitSinpi); }); + env.me_jit_vec_cospi_f32 = bindBridge("me_jit_vec_cospi_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, meJitCospi); }); + env.me_jit_vec_atan2_f32 = bindBridge("me_jit_vec_atan2_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.atan2); }); + env.me_jit_vec_hypot_f32 = bindBridge("me_jit_vec_hypot_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.hypot); }); + env.me_jit_vec_pow_f32 = bindBridge("me_jit_vec_pow_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.pow); }); + env.me_jit_vec_fmax_f32 = bindBridge("me_jit_vec_fmax_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.max); }); + env.me_jit_vec_fmin_f32 = bindBridge("me_jit_vec_fmin_f32", function(aPtr, bPtr, outPtr, n) { vecBinaryF32(aPtr, bPtr, outPtr, n, Math.min); }); + env.me_jit_vec_expm1_f32 = bindBridge("me_jit_vec_expm1_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.expm1); }); + env.me_jit_vec_log10_f32 = bindBridge("me_jit_vec_log10_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.log10); }); + env.me_jit_vec_sinh_f32 = bindBridge("me_jit_vec_sinh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.sinh); }); + env.me_jit_vec_cosh_f32 = bindBridge("me_jit_vec_cosh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.cosh); }); + env.me_jit_vec_tanh_f32 = bindBridge("me_jit_vec_tanh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.tanh); }); + env.me_jit_vec_asinh_f32 = bindBridge("me_jit_vec_asinh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.asinh); }); + env.me_jit_vec_acosh_f32 = bindBridge("me_jit_vec_acosh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.acosh); }); + env.me_jit_vec_atanh_f32 = bindBridge("me_jit_vec_atanh_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.atanh); }); + env.me_jit_vec_abs_f32 = bindBridge("me_jit_vec_abs_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.abs); }); + env.me_jit_vec_sqrt_f32 = bindBridge("me_jit_vec_sqrt_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.sqrt); }); + env.me_jit_vec_log1p_f32 = bindBridge("me_jit_vec_log1p_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.log1p); }); + env.me_jit_vec_exp2_f32 = bindBridge("me_jit_vec_exp2_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, mathExp2); }); + env.me_jit_vec_log2_f32 = bindBridge("me_jit_vec_log2_f32", function(inPtr, outPtr, n) { vecUnaryF32(inPtr, outPtr, n, Math.log2); }); + return env; + } + /* --- parse sections ------------------------------------------------- */ + var pos = 8, sections = []; + while (pos < src.length) { + var id = src[pos++]; + var tmp = readULEB(src, pos), len = tmp[0]; pos = tmp[1]; + sections.push({ id: id, data: src.subarray(pos, pos + len) }); + pos += len; + } + /* --- reassemble with patched memory -------------------------------- */ + var out = [0x00,0x61,0x73,0x6d, 0x01,0x00,0x00,0x00]; + var impDone = false; + for (var i = 0; i < sections.length; i++) { + var s = sections[i]; + if (s.id === 5) continue; /* drop memory section */ + if (s.id === 2) { + out = out.concat(patchImportSec(s.data)); + impDone = true; + continue; + } + if (!impDone && s.id > 2) { + out = out.concat(buildImportSecWithMemory()); + impDone = true; + } + if (s.id === 7) { /* strip memory export from export section */ + var ep = 0, et = readULEB(s.data, ep), ne = et[0]; ep = et[1]; + var exps = []; + for (var e = 0; e < ne; e++) { + var nt = readULEB(s.data, ep), nl = nt[0]; ep = nt[1]; + var nm = dec.decode(s.data.subarray(ep, ep + nl)); ep += nl; + var kd = s.data[ep++]; + var xt = readULEB(s.data, ep), xi = xt[0]; ep = xt[1]; + if (nm === "memory" && kd === 0x02) continue; + exps.push({ n: nm, k: kd, i: xi }); + } + var eb = encULEB(exps.length); + for (var e = 0; e < exps.length; e++) { + eb = eb.concat(encStr(exps[e].n)); + eb.push(exps[e].k); + eb = eb.concat(encULEB(exps[e].i)); + } + out.push(0x07); + out = out.concat(encULEB(eb.length)); + out = out.concat(eb); + continue; + } + out.push(s.id); + out = out.concat(encULEB(s.data.length)); + out = out.concat(Array.from(s.data)); + } + if (!impDone) { + out = out.concat(buildImportSecWithMemory()); + } + /* --- instantiate with shared memory -------------------------------- */ + var patched = new Uint8Array(out); + try { + var mod = new WebAssembly.Module(patched); + var inst = new WebAssembly.Instance(mod, { env: buildEnvImports() }); + } catch (e) { + err("[me-wasm-jit] " + e.message); + return 0; + } + var fn = inst.exports["me_dsl_jit_kernel"]; + if (!fn) { err("[me-wasm-jit] missing export"); return 0; } + return addFunction(fn, "iiii"); + } + + function _meJitFreeFn(runtime, idx) { + if (!runtime || typeof runtime.removeFunction !== 'function') { + return; + } + if (idx) { + runtime.removeFunction(idx); + } + } + + root._meJitInstantiate = _meJitInstantiate; + root._meJitFreeFn = _meJitFreeFn; + + if (typeof module !== 'undefined' && module.exports) { + module.exports = { + _meJitInstantiate: _meJitInstantiate, + _meJitFreeFn: _meJitFreeFn + }; + } +})(typeof globalThis !== 'undefined' ? globalThis : (typeof self !== 'undefined' ? self : this)); diff --git a/venv/Lib/site-packages/blosc2/ndarray.py b/venv/Lib/site-packages/blosc2/ndarray.py new file mode 100644 index 0000000..c8681f5 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/ndarray.py @@ -0,0 +1,6695 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from __future__ import annotations + +import builtins +import inspect +import math +import tempfile +from abc import abstractmethod +from collections import OrderedDict, namedtuple +from functools import reduce +from itertools import product +from typing import TYPE_CHECKING, Any, NamedTuple, Protocol, runtime_checkable + +from numpy.exceptions import ComplexWarning + +if TYPE_CHECKING: + from collections.abc import Iterator, Sequence + +from dataclasses import asdict + +import ndindex +import numpy as np + +import blosc2 +from blosc2 import SpecialValue, blosc2_ext, compute_chunks_blocks +from blosc2.info import InfoReporter +from blosc2.schunk import SChunk + +from .linalg import matmul +from .utils import ( + _get_local_slice, + _get_selection, + _incomplete_lazyfunc, + get_chunks_idx, + npbinvert, + nplshift, + nprshift, + process_key, + slice_to_chunktuple, +) + +# These functions in ufunc_map in ufunc_map_1param are implemented in numexpr and so we call +# those instead (since numexpr uses multithreading it is faster) +ufunc_map = { + np.add: "+", + np.subtract: "-", + np.multiply: "*", + np.divide: "/", + np.true_divide: "/", + np.floor_divide: "//", + np.power: "**", + np.less: "<", + np.less_equal: "<=", + np.greater: ">", + np.greater_equal: ">=", + np.equal: "==", + np.not_equal: "!=", + np.bitwise_and: "&", + np.bitwise_or: "|", + np.bitwise_xor: "^", + np.arctan2: "arctan2", + nplshift: "<<", # nplshift selected above according to numpy version + nprshift: ">>", # nprshift selected above according to numpy version + np.remainder: "%", + np.nextafter: "nextafter", + np.copysign: "copysign", + np.hypot: "hypot", + np.maximum: "maximum", + np.minimum: "minimum", +} + +# implemented in numexpr +ufunc_map_1param = { + np.sqrt: "sqrt", + np.sin: "sin", + np.cos: "cos", + np.tan: "tan", + np.arcsin: "arcsin", + np.arccos: "arccos", + np.arctan: "arctan", + np.sinh: "sinh", + np.cosh: "cosh", + np.tanh: "tanh", + np.arcsinh: "arcsinh", + np.arccosh: "arccosh", + np.arctanh: "arctanh", + np.exp: "exp", + np.expm1: "expm1", + np.log: "log", + np.log10: "log10", + np.log1p: "log1p", + np.log2: "log2", + np.abs: "abs", + np.conj: "conj", + np.real: "real", + np.imag: "imag", + npbinvert: "~", # npbinvert selected above according to numpy version + np.isnan: "isnan", + np.isfinite: "isfinite", + np.isinf: "isinf", + np.floor: "floor", + np.ceil: "ceil", + np.trunc: "trunc", + np.signbit: "signbit", + np.round: "round", +} + + +@runtime_checkable +class Array(Protocol): + """ + A typing protocol for array-like objects with basic array interface. + + This protocol describes the basic interface required by blosc2 arrays. + It is implemented by blosc2 classes (:ref:`NDArray`, :ref:`NDField`, + :ref:`LazyArray`, :ref:`C2Array`, :ref:`ProxyNDSource`...) + and is compatible with NumPy arrays and other array-like containers + (e.g., PyTorch, TensorFlow, Dask, Zarr, ...). + """ + + @property + def dtype(self) -> Any: + """The data type of the array.""" + ... + + @property + def shape(self) -> tuple[int, ...]: + """The shape of the array.""" + ... + + def __len__(self) -> int: + """The length of the array.""" + ... + + def __getitem__(self, key: Any) -> Any: + """Get items from the array.""" + ... + + +def is_documented_by(original): + def wrapper(target): + target.__doc__ = original.__doc__ + return target + + return wrapper + + +def is_inside_new_expr() -> bool: + """ + Whether the current code is being executed during the creation of new expression. + """ + # Get the current call stack + stack = inspect.stack() + return builtins.any(frame_info.function in {"_new_expr", "_open_lazyarray"} for frame_info in stack) + + +def make_key_hashable(key): + if isinstance(key, slice): + return (key.start, key.stop, key.step) + elif isinstance(key, tuple | list): + return tuple(make_key_hashable(k) for k in key) + elif isinstance(key, np.ndarray): + return tuple(key.tolist()) + else: + return key + + +def get_ndarray_start_stop(ndim, key, shape): + # key should be Nones and slices + none_mask, start, stop, step = [], [], [], [] + for i, s in enumerate(key): + none_mask.append(s is None) + if s is not None: + start.append(s.start if s.start is not None else 0) + stop.append(s.stop if s.stop is not None else shape[i - np.sum(none_mask)]) + step.append(s.step if s.step is not None else 1) + # Check that start and stop values do not exceed the shape + for i in range(ndim): + if start[i] < 0: + start[i] = shape[i] + start[i] + if start[i] > shape[i]: + start[i] = shape[i] + if stop[i] < 0: + stop[i] = shape[i] + stop[i] + if stop[i] > shape[i]: + stop[i] = shape[i] + + return start, stop, tuple(step), none_mask + + +def are_partitions_aligned(shape, chunks, blocks): + """ + Check if the partitions defined by chunks and blocks are aligned with the shape. + + This function verifies that the shape is aligned with the chunks and the chunks are aligned + with the blocks. + + Returns + ------- + bool + True if the partitions are aligned, False otherwise. + """ + # Check alignment + alignment_shape_chunks = builtins.all(s % c == 0 for s, c in zip(shape, chunks, strict=True)) + if not alignment_shape_chunks: + return False + return builtins.all(c % b == 0 for c, b in zip(chunks, blocks, strict=True)) + + +def are_partitions_behaved(shape, chunks, blocks): + """ + Check if the partitions defined by chunks and blocks are well-behaved with respect to the shape. + + This function verifies that partitions are C-contiguous with respect the outer container. + + Returns + ------- + bool + True if the partitions are well-behaved, False otherwise. + """ + + # Check C-contiguity among partitions + def check_contiguity(shape, part): + ndims = len(shape) + inner_dim = ndims - 1 + for i, size, unit in zip(reversed(range(ndims)), reversed(shape), reversed(part), strict=True): + if size > unit: + if i < inner_dim: + if size % unit != 0: + return False + else: + if size != unit: + return False + inner_dim = i + return True + + # Check C-contiguity for blocks inside chunks + if not check_contiguity(chunks, blocks): + return False + + # Check C-contiguity for chunks inside shape + return check_contiguity(shape, chunks) + + +def get_flat_slices_orig(shape: tuple[int], s: tuple[slice, ...]) -> list[slice]: + """ + From array with `shape`, get the flattened list of slices corresponding to `s`. + + Parameters + ---------- + shape: tuple[int] + The shape of the array. + s: tuple[slice] + The slice we want to flatten. + + Returns + ------- + list[slice] + A list of slices that correspond to the slice `s`. + """ + # Note: this has been rewritten to use cython, see get_flat_slices + # It is kept here for reference + # + # Process the slice s to get start and stop indices + key = np.index_exp[s] + start = [k.start if k.start is not None else 0 for k in key] + # For stop, cap the values to the shape (shape may not be an exact multiple of the chunks) + stop = [builtins.min(k.stop if k.stop is not None else shape[i], shape[i]) for i, k in enumerate(key)] + + # Calculate the strides for each dimension + strides = np.cumprod((1,) + shape[::-1][:-1])[::-1] + + # Generate the 1-dimensional slices + slices = [] + current_slice_start = None + current_slice_end = None + for idx in np.ndindex(*[stop[i] - start[i] for i in range(len(shape))]): + flat_idx = builtins.sum((start[i] + idx[i]) * strides[i] for i in range(len(shape))) + if current_slice_start is None: + current_slice_start = flat_idx + current_slice_end = flat_idx + elif flat_idx == current_slice_end + 1: + current_slice_end = flat_idx + else: + slices.append(slice(current_slice_start, current_slice_end + 1)) + current_slice_start = flat_idx + current_slice_end = flat_idx + + if current_slice_start is not None: + slices.append(slice(current_slice_start, current_slice_end + 1)) + + return slices + + +def get_flat_slices( + shape: tuple[int], + s: tuple[slice, ...], + c_order: bool = True, +) -> list[slice]: + """ + From array with `shape`, get the flattened list of slices corresponding to `s`. + + Parameters + ---------- + shape: tuple + The shape of the array. + s: tuple + The slice we want to flatten. + c_order: bool + Whether to flatten the slices in C order (row-major) or just plain order. + Default is C order. + + Returns + ------- + list + A list of slices that correspond to the slice `s`. + """ + ndim = len(shape) + if ndim == 0: + # this will likely cause failure since expected output is tuple of slices + # however, the list conversion in the last line causes the process to be killed for some reason if shape = () + return () + start = [s[i].start if s[i].start is not None else 0 for i in range(ndim)] + stop = [builtins.min(s[i].stop if s[i].stop is not None else shape[i], shape[i]) for i in range(ndim)] + # Steps are not used in the computation, so raise an error if they are not None or 1 + if builtins.any(s[i].step not in (None, 1) for i in range(ndim)): + raise ValueError("steps are not supported in slices") + + # Calculate the strides for each dimension + # Both methods are equivalent + # strides = np.cumprod((1,) + shape[::-1][:-1])[::-1] + strides = [reduce(lambda x, y: x * y, shape[i + 1 :], 1) for i in range(ndim)] + + # Convert lists to numpy arrays + start = np.array(start, dtype=np.int64) + stop = np.array(stop, dtype=np.int64) + strides = np.array(strides, dtype=np.int64) + + if not c_order: + # Generate just a single 1-dimensional slice + flat_start = np.sum(start * strides) + # Compute the size of the slice + flat_size = math.prod(stop - start) + return [slice(flat_start, flat_start + flat_size)] + + # Generate and return the 1-dimensional slices in C order + return list(blosc2_ext.slice_flatter(start, stop, strides)) + + +def reshape( + src: blosc2.Array, + shape: tuple | list, + c_order: bool = True, + **kwargs: Any, +) -> NDArray: + """Returns an array containing the same data with a new shape. + + This only works when src.shape is 1-dimensional. Multidim case for src is + interesting, but not supported yet. + + Parameters + ---------- + src: :ref:`NDArray` or :ref:`NDField` or :ref:`LazyArray` or :ref:`C2Array` + The input array. + shape : tuple or list + The new shape of the array. It should have the same number of elements + as the current shape. + c_order: bool + Whether to reshape the array in C order (row-major) or insertion order. + Insertion order means that values will be stored in the array + following the order of chunks in the source array. + Default is C order. + kwargs : dict, optional + Additional keyword arguments supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + A new array with the requested shape. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> shape = [23 * 11] + >>> a = np.arange(np.prod(shape)) + >>> # Create an array + >>> b = blosc2.asarray(a) + >>> # Reshape the array + >>> c = blosc2.reshape(b, (11, 23)) + >>> print(c.shape) + (11, 23) + """ + + if src.ndim != 1: + raise ValueError("reshape only works when src.shape is 1-dimensional") + # Check if the new shape is valid + if math.prod(shape) != math.prod(src.shape): + raise ValueError("total size of new array must be unchanged") + + # Create the new array + dst = empty(shape, dtype=src.dtype, **kwargs) + + if is_inside_new_expr() or 0 in shape: + # We already have the dtype and shape, so return immediately + return dst + + if shape == (): # get_flat_slices fails for this case so just return directly + dst[()] = src[()] if src.shape == () else src[0] + return dst + + # Copy the data chunk by chunk + for dst_chunk in dst.iterchunks_info(): + dst_slice = tuple( + slice(c * s, (c + 1) * s) for c, s in zip(dst_chunk.coords, dst.chunks, strict=False) + ) + # Cap the stop indices in dst_slices to the dst.shape, and create a new list of slices + dst_slice = tuple( + slice(s.start, builtins.min(s.stop, sh)) for s, sh in zip(dst_slice, dst.shape, strict=False) + ) + size_dst_slice = math.prod([s.stop - s.start for s in dst_slice]) + # Find the series of slices in source array that correspond to the destination chunk + # (assuming the source array is 1-dimensional here) + # t0 = time() + # src_slices = get_flat_slices_orig(dst.shape, dst_slice) + # Use the get_flat_slices which uses a much faster iterator in cython + src_slices = get_flat_slices(dst.shape, dst_slice, c_order) + # print(f"Time to get slices: {time() - t0:.3f} s") + # Compute the size for slices in the source array + size_src_slices = builtins.sum(s.stop - s.start for s in src_slices) + if size_src_slices != size_dst_slice: + raise ValueError("source slice size is not equal to the destination chunk size") + # Now, assemble the slices for assignment in the destination array + dst_buf = np.empty(size_dst_slice, dtype=src.dtype) + dst_buf_len = 0 + for src_slice in src_slices: + slice_size = src_slice.stop - src_slice.start + dst_buf_slice = slice(dst_buf_len, dst_buf_len + slice_size) + dst_buf_len += slice_size + if hasattr(src, "res_getitem"): + # Fast path for lazy UDFs (important for e.g. arange or linspace) + # This essentially avoids the need to create a new, + # potentially large NumPy array in memory. + # This is not critical for Linux, but it is for Windows/Mac. + dst_buf[dst_buf_slice] = src.res_getitem[src_slice] + else: + dst_buf[dst_buf_slice] = src[src_slice] + # Compute the shape of dst_slice + dst_slice_shape = tuple(s.stop - s.start for s in dst_slice) + # ... and assign the buffer to the destination array + dst[dst_slice] = dst_buf.reshape(dst_slice_shape) + + return dst + + +def _check_allowed_dtypes( + value: bool | int | float | str | blosc2.Array, +): + def _is_array_like(v: Any) -> bool: + try: + # Try Protocol runtime check first (works when possible) + if isinstance(v, blosc2.Array): + return True + except Exception: + # Some runtime contexts may raise (or return False) — fall back to duck typing + pass + # Structural fallback: common minimal array interface + return hasattr(v, "shape") and hasattr(v, "dtype") and callable(getattr(v, "__getitem__", None)) + + if not (_is_array_like(value) or np.isscalar(value)): + raise RuntimeError( + f"Expected blosc2.Array or scalar instances and you provided a '{type(value)}' instance" + ) + + +def sum( + ndarr: blosc2.Array, + axis: int | tuple[int] | None = None, + dtype: np.dtype | str = None, + keepdims: bool = False, + **kwargs: Any, +) -> blosc2.Array | int | float | complex | bool: + """ + Return the sum of array elements over a given axis. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array or expression. + axis: int or tuple of ints, optional + Axis or axes along which a sum is performed. By default, axis=None, + sums all the elements of the input array. If axis is negative, + it counts from the last to the first axis. + dtype: np.dtype or list str, optional + The type of the returned array and of the accumulator in which the + elements are summed. The dtype of :paramref:`ndarr` is used by default unless it has + an integer dtype of less precision than the default platform integer. + keepdims: bool, optional + If set to True, the reduced axes are left in the result + as dimensions with size one. With this option, the result will broadcast + correctly against the input array. + fp_accuracy: :ref:`blosc2.FPAccuracy`, optional + Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. + Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. + kwargs: dict, optional + Additional keyword arguments supported by the :func:`empty` constructor. + + Returns + ------- + sum_along_axis: np.ndarray or :ref:`NDArray` or scalar + The sum of the elements along the axis. + + References + ---------- + `np.sum `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> # Example array + >>> array = np.array([[1, 2, 3], [4, 5, 6]]) + >>> nd_array = blosc2.asarray(array) + >>> # Sum all elements in the array (axis=None) + >>> total_sum = blosc2.sum(nd_array) + >>> print("Sum of all elements:", total_sum) + 21 + >>> # Sum along axis 0 (columns) + >>> sum_axis_0 = blosc2.sum(nd_array, axis=0) + >>> print("Sum along axis 0 (columns):", sum_axis_0) + Sum along axis 0 (columns): [5 7 9] + """ + return ndarr.sum(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) + + +def cumulative_sum( + ndarr: blosc2.Array, + axis: int | tuple[int] | None = None, + dtype: np.dtype | str = None, + include_initial: bool = False, + **kwargs: Any, +) -> blosc2.Array: + """ + Calculates the cumulative sum of elements in the input array ndarr. + + Parameters + ----------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array or expression. + axis: int + Axis along which a cumulative sum must be computed. If array is 1D, axis may be None; otherwise the axis must be specified. + dtype: dtype + Data type of the returned array. + include_initial : bool + Boolean indicating whether to include the initial value as the first value in the output. Initial value will be zero. Default: False. + fp_accuracy: :ref:`blosc2.FPAccuracy`, optional + Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. + Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. + kwargs: dict, optional + Additional keyword arguments supported by the :func:`empty` constructor. + + Returns + ------- + out: blosc2.Array + An array containing the cumulative sums. Let N be the size of the axis along which to compute the cumulative sum. + If include_initial is True, the returned array has the same shape as ndarr, except the size of the axis along which to compute the cumulative sum is N+1. + If include_initial is False, the returned array has the same shape as ndarr. + """ + return ndarr.cumulative_sum(axis=axis, dtype=dtype, include_initial=include_initial, **kwargs) + + +def cumulative_prod( + ndarr: blosc2.Array, + axis: int | tuple[int] | None = None, + dtype: np.dtype | str = None, + include_initial: bool = False, + **kwargs: Any, +) -> blosc2.Array: + """ + Calculates the cumulative product of elements in the input array ndarr. + + Parameters + ----------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array or expression. + axis: int + Axis along which a cumulative product must be computed. If array is 1D, axis may be None; otherwise the axis must be specified. + dtype: dtype + Data type of the returned array. + include_initial : bool + Boolean indicating whether to include the initial value as the first value in the output. Initial value will be one. Default: False. + fp_accuracy: :ref:`blosc2.FPAccuracy`, optional + Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. + Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. + kwargs: dict, optional + Additional keyword arguments supported by the :func:`empty` constructor. + + Returns + ------- + out: blosc2.Array + An array containing the cumulative products. Let N be the size of the axis along which to compute the cumulative product. + If include_initial is True, the returned array has the same shape as ndarr, except the size of the axis along which to compute the cumulative product is N+1. + If include_initial is False, the returned array has the same shape as ndarr. + """ + return ndarr.cumulative_prod(axis=axis, dtype=dtype, include_initial=include_initial, **kwargs) + + +def mean( + ndarr: blosc2.Array, + axis: int | tuple[int] | None = None, + dtype: np.dtype | str = None, + keepdims: bool = False, + **kwargs: Any, +) -> blosc2.Array | int | float | complex | bool: + """ + Return the arithmetic mean along the specified axis. + + The parameters are documented in the :func:`sum `. + + Returns + ------- + mean_along_axis: np.ndarray or :ref:`NDArray` or scalar + The mean of the elements along the axis. + + References + ---------- + `np.mean `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> # Example array + >>> array = np.array([[1, 2, 3], [4, 5, 6]] + >>> nd_array = blosc2.asarray(array) + >>> # Compute the mean of all elements in the array (axis=None) + >>> overall_mean = blosc2.mean(nd_array) + >>> print("Mean of all elements:", overall_mean) + Mean of all elements: 3.5 + """ + return ndarr.mean(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) + + +def std( + ndarr: blosc2.Array, + axis: int | tuple[int] | None = None, + dtype: np.dtype | str = None, + ddof: int = 0, + keepdims: bool = False, + **kwargs: Any, +) -> blosc2.Array | int | float | bool: + """ + Return the standard deviation along the specified axis. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array or expression. + axis: int or tuple of ints, optional + Axis or axes along which the standard deviation is computed. By default, `axis=None` + computes the standard deviation of the flattened array. + dtype: np.dtype or list str, optional + Type to use in computing the standard deviation. For integer inputs, the + default is float32; for floating point inputs, it is the same as the input dtype. + ddof: int, optional + Means Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. By default, ddof is zero. + keepdims: bool, optional + If set to True, the reduced axes are left in the result as + dimensions with size one. This ensures that the result will broadcast correctly + against the input array. + fp_accuracy: :ref:`blosc2.FPAccuracy`, optional + Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. + Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. + kwargs: dict, optional + Additional keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + std_along_axis: np.ndarray or :ref:`NDArray` or scalar + The standard deviation of the elements along the axis. + + References + ---------- + `np.std `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> # Create an instance of NDArray with some data + >>> array = np.array([[1, 2, 3], [4, 5, 6]]) + >>> nd_array = blosc2.asarray(array) + >>> # Compute the standard deviation of the entire array + >>> std_all = blosc2.std(nd_array) + >>> print("Standard deviation of the entire array:", std_all) + Standard deviation of the entire array: 1.707825127659933 + >>> # Compute the standard deviation along axis 0 (columns) + >>> std_axis0 = blosc2.std(nd_array, axis=0) + >>> print("Standard deviation along axis 0:", std_axis0) + Standard deviation along axis 0: [1.5 1.5 1.5] + """ + return ndarr.std(axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, **kwargs) + + +def var( + ndarr: blosc2.Array, + axis: int | tuple[int] | None = None, + dtype: np.dtype | str = None, + ddof: int = 0, + keepdims: bool = False, + **kwargs: Any, +) -> blosc2.Array | int | float | bool: + """ + Return the variance along the specified axis. + + The parameters are documented in the :func:`std `. + + Returns + ------- + var_along_axis: np.ndarray or :ref:`NDArray` or scalar + The variance of the elements along the axis. + + References + ---------- + `np.var `_ + + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> # Create an instance of NDArray with some data + >>> array = np.array([[1, 2, 3], [4, 5, 6]]) + >>> nd_array = blosc2.asarray(array) + >>> # Compute the variance of the entire array + >>> var_all = blosc2.var(nd_array) + >>> print("Variance of the entire array:", var_all) + Variance of the entire array: 2.9166666666666665 + >>> # Compute the variance along axis 0 (columns) + >>> var_axis0 = blosc2.var(nd_array, axis=0) + >>> print("Variance along axis 0:", var_axis0) + Variance along axis 0: [2.25 2.25 2.25] + """ + return ndarr.var(axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, **kwargs) + + +def prod( + ndarr: blosc2.Array, + axis: int | tuple[int] | None = None, + dtype: np.dtype | str = None, + keepdims: bool = False, + **kwargs: Any, +) -> blosc2.Array | int | float | complex | bool: + """ + Return the product of array elements over a given axis. + + The parameters are documented in the :func:`sum `. + + Returns + ------- + product_along_axis: np.ndarray or :ref:`NDArray` or scalar + The product of the elements along the axis. + + References + ---------- + `np.prod `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> # Create an instance of NDArray with some data + >>> array = np.array([[11, 22, 33], [4, 15, 36]]) + >>> nd_array = blosc2.asarray(array) + >>> # Compute the product of all elements in the array + >>> prod_all = blosc2.prod(nd_array) + >>> print("Product of all elements in the array:", prod_all) + Product of all elements in the array: 17249760 + >>> # Compute the product along axis 1 (rows) + >>> prod_axis1 = blosc2.prod(nd_array, axis=1) + >>> print("Product along axis 1:", prod_axis1) + Product along axis 1: [7986 2160] + """ + return ndarr.prod(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) + + +def min( + ndarr: blosc2.Array, + axis: int | tuple[int] | None = None, + keepdims: bool = False, + **kwargs: Any, +) -> blosc2.Array | int | float | complex | bool: + """ + Return the minimum along a given axis. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array or expression. + axis: int or tuple of ints, optional + Axis or axes along which to operate. By default, flattened input is used. + keepdims: bool, optional + If set to True, the axes which are reduced are left in the result as + dimensions with size one. With this option, the result will broadcast correctly + against the input array. + fp_accuracy: :ref:`blosc2.FPAccuracy`, optional + Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. + Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. + kwargs: dict, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + min_along_axis: np.ndarray or :ref:`NDArray` or scalar + The minimum of the elements along the axis. + + References + ---------- + `np.min `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> array = np.array([1, 3, 7, 8, 9, 31]) + >>> nd_array = blosc2.asarray(array) + >>> min_all = blosc2.min(nd_array) + >>> print("Minimum of all elements in the array:", min_all) + Minimum of all elements in the array: 1 + >>> # Compute the minimum along axis 0 with keepdims=True + >>> min_keepdims = blosc2.min(nd_array, axis=0, keepdims=True) + >>> print("Minimum along axis 0 with keepdims=True:", min_keepdims) + Minimum along axis 0 with keepdims=True: [1] + """ + return ndarr.min(axis=axis, keepdims=keepdims, **kwargs) + + +def max( + ndarr: blosc2.Array, + axis: int | tuple[int] | None = None, + keepdims: bool = False, + **kwargs: Any, +) -> blosc2.Array | int | float | complex | bool: + """ + Return the maximum along a given axis. + + The parameters are documented in the :func:`min `. + + Returns + ------- + max_along_axis: np.ndarray or :ref:`NDArray` or scalar + The maximum of the elements along the axis. + + References + ---------- + `np.max `_ + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> data = np.array([[11, 2, 36, 24, 5, 69], [73, 81, 49, 6, 73, 0]]) + >>> ndarray = blosc2.asarray(data) + >>> print("NDArray data:", ndarray[:]) + NDArray data: [[11 2 36 24 5 69] + [73 81 49 6 73 0]] + >>> # Compute the maximum along axis 0 and 1 + >>> max_along_axis_0 = blosc2.max(ndarray, axis=0) + >>> print("Maximum along axis 0:", max_along_axis_0) + Maximum along axis 0: [73 81 49 24 73 69] + >>> max_along_axis_1 = blosc2.max(ndarray, axis=1) + >>> print("Maximum along axis 1:", max_along_axis_1) + Maximum along axis 1: [69 81] + >>> max_flattened = blosc2.max(ndarray) + >>> print("Maximum of the flattened array:", max_flattened) + Maximum of the flattened array: 81 + """ + return ndarr.max(axis=axis, keepdims=keepdims, **kwargs) + + +def any( + ndarr: blosc2.Array, + axis: int | tuple[int] | None = None, + keepdims: bool = False, + **kwargs: Any, +) -> blosc2.Array | bool: + """ + Test whether any array element along a given axis evaluates to True. + + The parameters are documented in the :func:`min `. + + Returns + ------- + any_along_axis: np.ndarray or :ref:`NDArray` or scalar + The result of the evaluation along the axis. + + References + ---------- + `np.any `_ + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> data = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 0]]) + >>> # Convert the NumPy array to a Blosc2 NDArray + >>> ndarray = blosc2.asarray(data) + >>> print("NDArray data:", ndarray[:]) + NDArray data: [[1 0 0] + [0 1 0] + [0 0 0]] + >>> any_along_axis_0 = blosc2.any(ndarray, axis=0) + >>> print("Any along axis 0:", any_along_axis_0) + Any along axis 0: [True True False] + >>> any_flattened = blosc2.any(ndarray) + >>> print("Any in the flattened array:", any_flattened) + Any in the flattened array: True + """ + return ndarr.any(axis=axis, keepdims=keepdims, **kwargs) + + +def argmin( + ndarr: blosc2.Array, axis: int | None = None, keepdims: bool = False, **kwargs +) -> blosc2.Array | int: + """ + Returns the indices of the minimum values along a specified axis. + + When the minimum value occurs multiple times, only the indices corresponding to the first occurrence are returned. + + Parameters + ---------- + x: blosc2.Array + Input array. Should have a real-valued data type. + + axis: int | None + Axis along which to search. If None, return index of the minimum value of flattened array. Default: None. + + keepdims: bool + If True, reduced axis included in the result as singleton dimension. Otherwise, axis not included in the result. Default: False. + fp_accuracy: :ref:`blosc2.FPAccuracy`, optional + Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. + Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. + + Returns + ------- + out: blosc2.Array + If axis is None, a zero-dimensional array containing the index of the first occurrence of the minimum value; otherwise, a non-zero-dimensional array containing the indices of the minimum values. + """ + return ndarr.argmin(axis=axis, keepdims=keepdims, **kwargs) + + +def argmax( + ndarr: blosc2.Array, axis: int | None = None, keepdims: bool = False, **kwargs +) -> blosc2.Array | int: + """ + Returns the indices of the maximum values along a specified axis. + + When the maximum value occurs multiple times, only the indices corresponding to the first occurrence are returned. + + Parameters + ---------- + x: blosc2.Array + Input array. Should have a real-valued data type. + + axis: int | None + Axis along which to search. If None, return index of the maximum value of flattened array. Default: None. + + keepdims: bool + If True, reduced axis included in the result as singleton dimension. Otherwise, axis not included in the result. Default: False. + fp_accuracy: :ref:`blosc2.FPAccuracy`, optional + Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. + Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. + + Returns + ------- + out: blosc2.Array + If axis is None, a zero-dimensional array containing the index of the first occurrence of the maximum value; otherwise, a non-zero-dimensional array containing the indices of the maximum values. + """ + return ndarr.argmax(axis=axis, keepdims=keepdims, **kwargs) + + +def all( + ndarr: blosc2.Array, + axis: int | tuple[int] | None = None, + keepdims: bool = False, + **kwargs: Any, +) -> blosc2.Array | bool: + """ + Test whether all array elements along a given axis evaluate to True. + + The parameters are documented in the :func:`min `. + + Returns + ------- + all_along_axis: np.ndarray or :ref:`NDArray` or scalar + The result of the evaluation along the axis. + + References + ---------- + `np.all `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> data = np.array([True, True, False, True, True, True]) + >>> ndarray = blosc2.asarray(data) + >>> # Test if all elements are True along the default axis (flattened array) + >>> result_flat = blosc2.all(ndarray) + >>> print("All elements are True (flattened):", result_flat) + All elements are True (flattened): False + """ + return ndarr.all(axis=axis, keepdims=keepdims, **kwargs) + + +def sin(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Compute the trigonometric sine, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array containing angles in radians. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the sine of the input angles. The result can be evaluated. + + References + ---------- + `np.sin `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> angles = np.array([0, np.pi/6, np.pi/4, np.pi/2, np.pi]) + >>> nd_array = blosc2.asarray(angles) + >>> result_ = blosc2.sin(nd_array) + >>> result = result_[:] + >>> print("Angles in radians:", angles) + Angles in radians: [0. 0.52359878 0.78539816 1.57079633 3.14159265] + >>> print("Sine of the angles:", result) + Sine of the angles: [0.00000000e+00 5.00000000e-01 7.07106781e-01 1.00000000e+00 + 1.22464680e-16] + """ + return blosc2.LazyExpr(new_op=(ndarr, "sin", None)) + + +def cos(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Trigonometric cosine, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array containing angles in radians. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the cosine of the input angles. The result can be evaluated. + + References + ---------- + `np.cos `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> angles = np.array([0, np.pi/6, np.pi/4, np.pi/2, np.pi]) + >>> nd_array = blosc2.asarray(angles) + >>> result_ = blosc2.cos(nd_array) + >>> result = result_[:] + >>> print("Angles in radians:", angles) + Angles in radians: [0. 0.52359878 0.78539816 1.57079633 3.14159265] + >>> print("Cosine of the angles:", result) + Cosine of the angles: [ 1.00000000e+00 8.66025404e-01 7.07106781e-01 6.12323400e-17 + -1.00000000e+00] + """ + return blosc2.LazyExpr(new_op=(ndarr, "cos", None)) + + +def tan(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Compute the trigonometric tangent, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array containing angles in radians. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the tangent of the input angles. + The result can be evaluated. + + References + ---------- + `np.tan `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> angles = np.array([0, np.pi/6, np.pi/4, np.pi/2, np.pi]) + >>> nd_array = blosc2.asarray(angles) + >>> result_ = blosc2.tan(nd_array) + >>> result = result_[:] + >>> print("Angles in radians:", angles) + Angles in radians: [0. 0.52359878 0.78539816 1.57079633 3.14159265] + >>> print("Tangent of the angles:", result) + Tangent of the angles: [ 0.00000000e+00 5.77350269e-01 1.00000000e+00 1.63312394e+16 + -1.22464680e-16] + """ + return blosc2.LazyExpr(new_op=(ndarr, "tan", None)) + + +def sqrt(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Return the non-negative square-root of an array, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the square root of the input array. + The result can be evaluated. + + References + ---------- + `np.sqrt `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> data = np.array([0, np.pi/6, np.pi/4, np.pi/2, np.pi]) + >>> nd_array = blosc2.asarray(data) + >>> result_ = blosc2.sqrt(nd_array) + >>> result = result_[:] + >>> print("Original numbers:", data) + Original numbers: [ 0 1 4 9 16 25] + >>> print("Square roots:", result) + Square roots: [0. 1. 2. 3. 4. 5.] + """ + return blosc2.LazyExpr(new_op=(ndarr, "sqrt", None)) + + +def sinh(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Hyperbolic sine, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the hyperbolic sine of the input array. + The result can be evaluated. + + References + ---------- + `np.sinh `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> numbers = np.array([-2, -1, 0, 1, 2]) + >>> ndarray = blosc2.asarray(numbers) + >>> result_lazy = blosc2.sinh(ndarray) + >>> result = result_lazy[:] + >>> print("Original numbers:", numbers) + Original numbers: [-2 -1 0 1 2] + >>> print("Hyperbolic sine:", result) + Hyperbolic sine: [-3.62686041 -1.17520119 0. 1.17520119 3.62686041] + """ + return blosc2.LazyExpr(new_op=(ndarr, "sinh", None)) + + +def cosh(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Compute the hyperbolic cosine, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the hyperbolic cosine of the input array. + The result can be evaluated. + + References + ---------- + `np.cosh `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> numbers = np.array([-2, -1, 0, 1, 2]) + >>> ndarray = blosc2.asarray(numbers) + >>> result_lazy = blosc2.cosh(ndarray) + >>> result = result_lazy[:] + >>> print("Original numbers:", numbers) + Original numbers: [-2 -1 0 1 2] + >>> print("Hyperbolic cosine:", result) + Hyperbolic cosine: [3.76219569 1.54308063 1. 1.54308063 3.76219569] + """ + return blosc2.LazyExpr(new_op=(ndarr, "cosh", None)) + + +def tanh(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Compute the hyperbolic tangent, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the hyperbolic tangent of the input array. + The result can be evaluated. + + References + ---------- + `np.tanh `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> numbers = np.array([-2, -1, 0, 1, 2]) + >>> ndarray = blosc2.asarray(numbers) + >>> result_lazy = blosc2.tanh(ndarray) + >>> result = result_lazy[:] + >>> print("Original numbers:", numbers) + Original numbers: [-2 -1 0 1 2] + >>> print("Hyperbolic tangent:", result) + Hyperbolic tangent: [-0.96402758 -0.76159416 0. 0.76159416 0.96402758] + """ + return blosc2.LazyExpr(new_op=(ndarr, "tanh", None)) + + +def arcsin(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Compute the inverse sine, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the inverse sine of the input array. + The result can be evaluated. + + References + ---------- + `np.arcsin `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> numbers = np.array([-1, -0.5, 0, 0.5, 1]) + >>> ndarray = blosc2.asarray(numbers) + >>> result_lazy = blosc2.arcsin(ndarray) + >>> result = result_lazy[:] + >>> print("Original numbers:", numbers) + Original numbers: [-1. -0.5 0. 0.5 1. ] + >>> print("Arcsin:", result) + Arcsin: [-1.57079633 -0.52359878 0. 0.52359878 1.57079633] + """ + return blosc2.LazyExpr(new_op=(ndarr, "arcsin", None)) + + +asin = arcsin # alias + + +def arccos(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Compute the inverse cosine, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the inverse cosine of the input array. + The result can be evaluated. + + References + ---------- + `np.arccos `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> numbers = np.array([-1, -0.5, 0, 0.5, 1]) + >>> ndarray = blosc2.asarray(numbers) + >>> result_lazy = blosc2.arccos(ndarray) + >>> result = result_lazy[:] + >>> print("Original numbers:", numbers) + Original numbers: [-1. -0.5 0. 0.5 1. ] + >>> print("Arccos:", result) + Arccos: [3.14159265 2.0943951 1.57079633 1.04719755 0. ] + """ + return blosc2.LazyExpr(new_op=(ndarr, "arccos", None)) + + +acos = arccos # alias + + +def arctan(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Compute the inverse tangent, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the inverse tangent of the input array. + The result can be evaluated. + + References + ---------- + `np.arctan `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> numbers = np.array([-1, -0.5, 0, 0.5, 1]) + >>> ndarray = blosc2.asarray(numbers) + >>> result_lazy = blosc2.arctan(ndarray) + >>> result = result_lazy[:] + >>> print("Original numbers:", numbers) + Original numbers: [-1. -0.5 0. 0.5 1. ] + >>> print("Arctan:", result) + Arctan: [-0.78539816 -0.46364761 0. 0.46364761 0.78539816] + """ + return blosc2.LazyExpr(new_op=(ndarr, "arctan", None)) + + +atan = arctan # alias + + +def arctan2(ndarr1: blosc2.Array, ndarr2: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Compute the element-wise arc tangent of ``ndarr1 / ndarr2`` choosing the quadrant correctly. + + Parameters + ---------- + ndarr1: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` + The first input array. + ndarr2: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` + The second input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the element-wise arc tangent of ``ndarr1 / ndarr2``. + The result can be evaluated. + + References + ---------- + `np.arctan2 `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> y = np.array([0, 1, 0, -1, 1]) + >>> x = np.array([1, 1, -1, -1, 0]) + >>> ndarray_y = blosc2.asarray(y) + >>> ndarray_x = blosc2.asarray(x) + >>> result_lazy = blosc2.arctan2(ndarray_y, ndarray_x) + >>> result = result_lazy[:] + >>> print("y:", y) + y: [ 0 1 0 -1 1] + >>> print("x:", x) + x: [ 1 1 -1 -1 0] + >>> print("Arctan2(y, x):", result) + Arctan2(y, x): [ 0. 0.78539816 3.14159265 -2.35619449 1.57079633] + """ + return blosc2.LazyExpr(new_op=(ndarr1, "arctan2", ndarr2)) + + +atan2 = arctan2 # alias + + +def arcsinh(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Compute the inverse hyperbolic sine, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the inverse hyperbolic sine of the input array. + The result can be evaluated. + + References + ---------- + `np.arcsinh `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([-2, -1, 0, 1, 2]) + >>> ndarray = blosc2.asarray(values) + >>> result_lazy = blosc2.arcsinh(ndarray) + >>> result = result_lazy[:] + >>> print("Original values:", values) + Original values: [-2 -1 0 1 2] + >>> print("Arcsinh:", result) + Arcsinh: [-1.44363548 -0.88137359 0. 0.88137359 1.44363548] + """ + return blosc2.LazyExpr(new_op=(ndarr, "arcsinh", None)) + + +asinh = arcsinh # alias + + +def arccosh(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Compute the inverse hyperbolic cosine, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the inverse hyperbolic cosine of the input array. + The result can be evaluated. + + References + ---------- + `np.arccosh `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([1, 2, 3, 4, 5]) + >>> ndarray = blosc2.asarray(values) + >>> result_lazy = blosc2.arccosh(ndarray) + >>> result = result_lazy[:] + >>> print("Original values:", values) + Original values: [1 2 3 4 5] + >>> print("Arccosh:", result) + Arccosh: [0. 1.3169579 1.76274717 2.06343707 2.29243167] + """ + return blosc2.LazyExpr(new_op=(ndarr, "arccosh", None)) + + +acosh = arccosh # alias + + +def arctanh(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Compute the inverse hyperbolic tangent, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the inverse hyperbolic tangent of the input array. + The result can be evaluated. + + References + ---------- + `np.arctanh `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([-0.9, -0.5, 0, 0.5, 0.9]) + >>> ndarray = blosc2.asarray(values) + >>> result_lazy = blosc2.arctanh(ndarray) + >>> result = result_lazy[:] + >>> print("Original values:", values) + Original values: [-0.9 -0.5 0. 0.5 0.9] + >>> print("Arctanh:", result) + Arctanh: [-1.47221949 -0.54930614 0. 0.54930614 1.47221949] + """ + return blosc2.LazyExpr(new_op=(ndarr, "arctanh", None)) + + +atanh = arctanh # alias + + +def exp(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Calculate the exponential of all elements in the input array. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the exponential of the input array. + The result can be evaluated. + + References + ---------- + `np.exp `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([0, 1, 2, 3, 4]) + >>> ndarray = blosc2.asarray(values) + >>> result_lazy = blosc2.exp(ndarray) + >>> result = result_lazy[:] + >>> print("Original values:", values) + Original values: [0 1 2 3 4] + >>> print("Exponential:", result) + Exponential: [ 1. 2.71828183 7.3890561 20.08553692 54.59815003] + """ + return blosc2.LazyExpr(new_op=(ndarr, "exp", None)) + + +def expm1(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Calculate ``exp(ndarr) - 1`` for all elements in the array. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing ``exp(ndarr) - 1`` of the input array. + The result can be evaluated. + + References + ---------- + `np.expm1 `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([-1, -0.5, 0, 0.5, 1]) + >>> ndarray = blosc2.asarray(values) + >>> result_lazy = blosc2.expm1(ndarray) + >>> result = result_lazy[:] + >>> print("Original values:", values) + Original values: [-1. -0.5 0. 0.5 1. ] + >>> print("Expm1:", result) + Expm1: [-0.63212056 -0.39346934 0. 0.64872127 1.71828183] + """ + return blosc2.LazyExpr(new_op=(ndarr, "expm1", None)) + + +def log(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Compute the natural logarithm, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the natural logarithm of the input array + + References + ---------- + `np.log `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([1, 2, 3, 4, 5]) + >>> ndarray = blosc2.asarray(values) + >>> result_lazy = blosc2.log(ndarray) + >>> result = result_lazy[:] + >>> print("Original values:", values) + Original values: [1 2 3 4 5] + >>> print("Logarithm (base e):", result) + Logarithm (base e): [0. 0.69314718 1.09861229 1.38629436 1.60943791] + """ + return blosc2.LazyExpr(new_op=(ndarr, "log", None)) + + +def log10(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Return the base 10 logarithm of the input array, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the base 10 logarithm of the input array. + + References + ---------- + `np.log10 `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([1, 10, 100, 1000, 10000]) + >>> ndarray = blosc2.asarray(values) + >>> result_lazy = blosc2.log10(ndarray) + >>> result = result_lazy[:] + >>> print("Original values:", values) + Original values: [ 1 10 100 1000 10000] + >>> print("Logarithm (base 10):", result) + Logarithm (base 10): [0. 1. 2. 3. 4.] + """ + return blosc2.LazyExpr(new_op=(ndarr, "log10", None)) + + +def log1p(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Return the natural logarithm of one plus the input array, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the natural logarithm of one plus the input array. + + References + ---------- + `np.log1p `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([-0.9, -0.5, 0, 0.5, 0.9]) + >>> ndarray = blosc2.asarray(values) + >>> result_lazy = blosc2.log1p(ndarray) + >>> result = result_lazy[:] + >>> print("Original values:", values) + Original values: [-0.9 -0.5 0. 0.5 0.9] + >>> print("Log1p (log(1 + x)):", result) + Log1p (log(1 + x)): [-2.30258509 -0.69314718 0. 0.40546511 0.64185389] + """ + return blosc2.LazyExpr(new_op=(ndarr, "log1p", None)) + + +def log2(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Return the base 2 logarithm of the input array, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the base 2 logarithm of the input array. + + References + ---------- + `np.log2 `_ + + """ + return blosc2.LazyExpr(new_op=(ndarr, "log2", None)) + + +def conj(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Return the complex conjugate, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the complex conjugate of the input array. + + References + ---------- + `np.conj `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([1+2j, 3-4j, -5+6j, 7-8j]) + >>> ndarray = blosc2.asarray(values) + >>> result_ = blosc2.conj(ndarray) + >>> result = result_[:] + >>> print("Original values:", values) + Original values: [ 1.+2.j 3.-4.j -5.+6.j 7.-8.j] + >>> print("Complex conjugates:", result) + Complex conjugates: [ 1.-2.j 3.+4.j -5.-6.j 7.+8.j] + """ + return blosc2.LazyExpr(new_op=(ndarr, "conj", None)) + + +def real(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Return the real part of the complex array, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the real part of the input array. + + References + ---------- + `np.real `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> complex_values = np.array([1+2j, 3-4j, -5+6j, 7-8j]) + >>> ndarray = blosc2.asarray(complex_values) + >>> result_ = blosc2.real(ndarray) + >>> result = result_[:] + >>> print("Original complex values:", complex_values) + Original values: [ 1.+2.j 3.-4.j -5.+6.j 7.-8.j] + >>> print("Real parts:", result) + Real parts: [ 1. 3. -5. 7.] + """ + return blosc2.LazyExpr(new_op=(ndarr, "real", None)) + + +def imag(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Return the imaginary part of the complex array, element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression representing the imaginary part of the input array. + + References + ---------- + `np.imag `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> complex_values = np.array([2+3j, -1+4j, 0-2j, 5+6j]) + >>> ndarray = blosc2.asarray(complex_values) + >>> result_ = blosc2.imag(ndarray) + >>> result = result_[:] + >>> print("Original complex values:", complex_values) + Original complex values: [ 2.+3.j -1.+4.j 0.-2.j 5.+6.j] + >>> print("Imaginary parts:", result) + Imaginary parts: [ 3. 4. -2. 6.] + """ + return blosc2.LazyExpr(new_op=(ndarr, "imag", None)) + + +@_incomplete_lazyfunc +def contains(ndarr: blosc2.Array, value: str | bytes | blosc2.Array, /) -> blosc2.LazyExpr: + """ + Check if the array contains a specified value. + + Parameters + ---------- + ndarr: :ref:`Array` + The input array. + value: str or bytes or :ref:`Array` + The value to be checked. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression that can be evaluated to check if the value + is contained in the array. + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([b"apple", b"xxbananaxxx", b"cherry", b"date"]) + >>> text_values = blosc2.asarray(values) + >>> value_to_check = b"banana" + >>> expr = blosc2.contains(text_values, value_to_check) + >>> result = expr.compute() + >>> print("Contains 'banana':", result[:]) + Contains 'banana': [False True False False] + """ + # def chunkwise_contains(inputs, output, offset): + # x1, x2 = inputs + # # output[...] = np.isin(x1, x2, assume_unique=assume_unique, invert=invert, kind=kind) + # output[...] = np.char.find(x1, x2) != -1 + + if not isinstance(value, str | bytes | NDArray): + raise TypeError("value should be a string, bytes or a NDArray!") + + return blosc2.LazyExpr(new_op=(ndarr, "contains", value)) + + +def abs(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Calculate the absolute value element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression that can be evaluated to get the absolute values. + + References + ---------- + `np.abs `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([-5, -3, 0, 2, 4]) + >>> ndarray = blosc2.asarray(values) + >>> result_ = blosc2.abs(ndarray) + >>> result = result_[:] + >>> print("Original values:", values) + Original values: [-5 -3 0 2 4] + >>> print("Absolute values:", result) + Absolute values: [5. 3. 0. 2. 4.] + """ + return blosc2.LazyExpr(new_op=(ndarr, "abs", None)) + + +def isnan(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Return True/False for not-a-number values element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression that can be evaluated to get the True/False array of results. + + References + ---------- + `np.isnan `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([-5, -3, np.nan, 2, 4]) + >>> ndarray = blosc2.asarray(values) + >>> result_ = blosc2.isnan(ndarray) + >>> result = result_[:] + >>> print("isnan:", result) + isnan: [False, False, True, False, False] + """ + return blosc2.LazyExpr(new_op=(ndarr, "isnan", None)) + + +def isfinite(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Return True/False for finite values element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression that can be evaluated to get the True/False array of results. + + References + ---------- + `np.isfinite `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([-5, -3, np.inf, 2, 4]) + >>> ndarray = blosc2.asarray(values) + >>> result_ = blosc2.isfinite(ndarray) + >>> result = result_[:] + >>> print("isfinite:", result) + isfinite: [True, True, False, True, True] + """ + return blosc2.LazyExpr(new_op=(ndarr, "isfinite", None)) + + +def isinf(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: + """ + Return True/False for infinite values element-wise. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + Returns + ------- + out: :ref:`LazyExpr` + A lazy expression that can be evaluated to get the True/False array of results. + + References + ---------- + `np.isinf `_ + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> values = np.array([-5, -3, np.inf, 2, 4]) + >>> ndarray = blosc2.asarray(values) + >>> result_ = blosc2.isinf(ndarray) + >>> result = result_[:] + >>> print("isinf:", result) + isinf: [False, False, True, False, False] + """ + return blosc2.LazyExpr(new_op=(ndarr, "isinf", None)) + + +# def nonzero(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: +# """ +# Return indices of nonzero values. + +# Parameters +# ---------- +# ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` +# The input array. + +# Returns +# ------- +# out: :ref:`LazyExpr` +# A lazy expression that can be evaluated to get the array of results. + +# References +# ---------- +# `np.nonzero `_ +# """ +# # FIXME: This is not correct +# return ndarr.__ne__(0) + + +def count_nonzero(ndarr: blosc2.Array, axis: int | Sequence[int] | None = None) -> int: + """ + Return number of nonzero values along axes. + + Parameters + ---------- + ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` + The input array. + + axis: int | Sequence[int] | None + Axes along which to count nonzero entries. If None, sum over whole array. Default: None. + + Returns + ------- + out: int + Number of nonzero elements. + + References + ---------- + `np.count_nonzero `_ + """ + # TODO: Optimise this + return sum(ndarr.__ne__(0), axis=axis) + + +def equal( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the truth value of x1_i == x2_i for each element x1_i of the input array x1 + with the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + x2:blosc2.Array + Second input array. Must be compatible with x1. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.equal `_ + """ + return x1.__eq__(x2) + + +def not_equal( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the truth value of x1_i != x2_i for each element x1_i of the input array x1 + with the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + x2:blosc2.Array + Second input array. Must be compatible with x1. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.not_equal `_ + """ + return x1.__ne__(x2) + + +def less_equal( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the truth value of x1_i <= x2_i for each element x1_i of the input array x1 + with the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + x2:blosc2.Array + Second input array. Must be compatible with x1. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.less_equal `_ + """ + return x1.__le__(x2) + + +def less( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the truth value of x1_i < x2_i for each element x1_i of the input array x1 + with the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + x2:blosc2.Array + Second input array. Must be compatible with x1. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.less `_ + """ + return x1.__lt__(x2) + + +def greater_equal( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the truth value of x1_i >= x2_i for each element x1_i of the input array x1 + with the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + x2:blosc2.Array + Second input array. Must be compatible with x1. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.greater_equal `_ + """ + return x1.__ge__(x2) + + +def greater( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the truth value of x1_i > x2_i for each element x1_i of the input array x1 + with the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + x2:blosc2.Array + Second input array. Must be compatible with x1. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.greater `_ + """ + return x1.__gt__(x2) + + +def multiply( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the value of x1_i * x2_i for each element x1_i of the input array x1 + with the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + x2:blosc2.Array + Second input array. Must be compatible with x1. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.multiply `_ + """ + return x1 * x2 + + +def divide( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the value of x1_i / x2_i for each element x1_i of the input array x1 + with the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + x2:blosc2.Array + Second input array. Must be compatible with x1. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.divide `_ + """ + return x1 / x2 + + +def nextafter( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Returns the next representable floating-point value for each element x1_i of the input + array x1 in the direction of the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. Real-valued floating point dtype. + + x2:blosc2.Array + Second input array. Must be compatible with x1 and have same data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.nextafter `_ + """ + return blosc2.LazyExpr(new_op=(x1, "nextafter", x2)) + + +def hypot( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the square root of the sum of squares for each element x1_i of the input array + x1 with the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. Real-valued floating point dtype. + + x2:blosc2.Array + Second input array. Must be compatible with x1. Real-valued floating point dtype. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.hypot `_ + """ + return blosc2.LazyExpr(new_op=(x1, "hypot", x2)) + + +def copysign( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Composes a floating-point value with the magnitude of x1_i and the sign of x2_i + for each element of the input array x1. + + Parameters + ---------- + x1: blosc2.Array + First input array. Real-valued floating point dtype. + + x2:blosc2.Array + Second input array. Must be compatible with x1. Real-valued floating point dtype. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.copysign `_ + """ + return blosc2.LazyExpr(new_op=(x1, "copysign", x2)) + + +def maximum( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the maximum value for each element x1_i of the input array x1 relative to the + respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. Real-valued dtype. + + x2:blosc2.Array + Second input array. Must be compatible with x1. Real-valued dtype. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.maximum `_ + """ + return blosc2.LazyExpr(new_op=(x1, "maximum", x2)) + + +def minimum( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the minimum value for each element x1_i of the input array x1 relative to the + respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. Real-valued dtype. + + x2:blosc2.Array + Second input array. Must be compatible with x1. Real-valued dtype. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.minimum `_ + """ + return blosc2.LazyExpr(new_op=(x1, "minimum", x2)) + + +def reciprocal(x: blosc2.Array) -> blosc2.LazyExpr: + """ + Computes the value of 1/x1_i for each element x1_i of the input array x1. + + Parameters + ---------- + x: blosc2.Array + First input array, floating-point data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.reciprocal `_ + """ + return 1.0 / x + + +def floor(x: blosc2.Array) -> blosc2.LazyExpr: + """ + Rounds each element x_i of the input array x to the greatest (i.e., closest to +infinity) + integer-valued number that is not greater than x_i. + + Parameters + ---------- + x: blosc2.Array + First input array. May have any real-valued data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.floor `_ + """ + return blosc2.LazyExpr(new_op=(x, "floor", None)) + + +def ceil(x: blosc2.Array) -> blosc2.LazyExpr: + """ + Rounds each element x_i of the input array x to the smallest (i.e., closest to -infinity) + integer-valued number that is not smaller than x_i. + + Parameters + ---------- + x: blosc2.Array + First input array. May have any real-valued data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.ceil `_ + """ + return blosc2.LazyExpr(new_op=(x, "ceil", None)) + + +def trunc(x: blosc2.Array) -> blosc2.LazyExpr: + """ + Rounds each element x_i of the input array x to the closest to 0 + integer-valued number. + + Parameters + ---------- + x: blosc2.Array + First input array. May have any real-valued data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.trunc `_ + """ + return blosc2.LazyExpr(new_op=(x, "trunc", None)) + + +def signbit(x: blosc2.Array) -> blosc2.LazyExpr: + """ + Determines whether the sign bit is set for each element x_i of the input array x. + + The sign bit of a real-valued floating-point number x_i is set whenever x_i is either -0, + less than zero, or a signed NaN (i.e., a NaN value whose sign bit is 1). + + Parameters + ---------- + x: blosc2.Array + First input array. May have any real-valued floating-point data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.signbit `_ + """ + return blosc2.LazyExpr(new_op=(x, "signbit", None)) + + +def sign(x: blosc2.Array) -> blosc2.LazyExpr: + """ + Returns an indication of the sign of a number for each element x_i of the input array x. + + Parameters + ---------- + x: blosc2.Array + First input array. May have any numeric data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results (-1, 0 or 1). + + References + ---------- + `np.sign `_ + """ + return blosc2.LazyExpr(new_op=(x, "sign", None)) + + +def round(x: blosc2.Array) -> blosc2.LazyExpr: + """ + Rounds each element x_i of the input array x to the nearest integer-valued number. + + Parameters + ---------- + x: blosc2.Array + First input array. May have any numeric data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results (-1, 0 or 1). + + References + ---------- + `np.round `_ + """ + return blosc2.LazyExpr(new_op=(x, "round", None)) + + +def floor_divide( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the value of x1_i // x2_i for each element x1_i of the input array x1 + with the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any real-valued data type. + + x2:blosc2.Array + Second input array. Must be compatible with x1. May have any real-valued data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.floor_divide `_ + """ + return x1 // x2 + + +def add( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the value of x1_i + x2_i for each element x1_i of the input array x1 + with the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + x2:blosc2.Array + Second input array. Must be compatible with x1. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.add `_ + """ + return x1 + x2 + + +def subtract( + x1: blosc2.Array, + x2: blosc2.Array, +) -> blosc2.LazyExpr: + """ + Computes the value of x1_i - x2_i for each element x1_i of the input array x1 + with the respective element x2_i of the input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + x2:blosc2.Array + Second input array. Must be compatible with x1. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.subtract `_ + """ + return x1 - x2 + + +def square(x1: blosc2.Array) -> blosc2.LazyExpr: + """ + Computes the value of x1_i**2 for each element x1_i of the input array x1. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.square `_ + """ + return x1 * x1 + + +def pow( + x1: blosc2.Array | int | float | complex, + x2: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Computes the value of x1_i**x2_i for each element x1_i of the input array x1 and x2_i + of x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + x2:blosc2.Array + Second input array. Must be compatible with x1. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.pow `_ + """ + return x1**x2 + + +def logical_xor( + x1: blosc2.Array | int | float | complex, + x2: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Computes the value of x1_i ^ x2_i for each element x1_i of the input array x1 and x2_i + of x2. + + Parameters + ---------- + x1: blosc2.Array + First input array, boolean. + + x2:blosc2.Array + Second input array. Must be compatible with x1, boolean. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.logical_xor `_ + """ + if blosc2.result_type(x1, x2) != blosc2.bool_: + raise TypeError("Both operands must be boolean types for logical ops.") + return x1 ^ x2 + + +def logical_and( + x1: blosc2.Array | int | float | complex, + x2: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Computes the value of x1_i & x2_i for each element x1_i of the input array x1 and x2_i + of x2. + + Parameters + ---------- + x1: blosc2.Array + First input array, boolean. + + x2:blosc2.Array + Second input array. Must be compatible with x1. Boolean. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.logical_and `_ + """ + if blosc2.result_type(x1, x2) != blosc2.bool_: + raise TypeError("Both operands must be boolean types for logical ops.") + return x1 & x2 + + +def logical_or( + x1: blosc2.Array | int | float | complex, + x2: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Computes the value of x1_i | x2_i for each element x1_i of the input array x1 and x2_i + of x2. + + Parameters + ---------- + x1: blosc2.Array + First input array, boolean. + + x2: blosc2.Array + Second input array. Must be compatible with x1, boolean. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.logical_or `_ + """ + if blosc2.result_type(x1, x2) != blosc2.bool_: + raise TypeError("Both operands must be boolean types for logical ops.") + return x1 | x2 + + +def logical_not( + x1: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Computes the value of ~x1_i for each element x1_i of the input array x1. + + Parameters + ---------- + x1: blosc2.Array + Input array, boolean. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.logical_not `_ + """ + if blosc2.result_type(x1) != blosc2.bool_: + raise TypeError("Operand must be boolean type for logical ops.") + return ~x1 + + +def bitwise_xor( + x1: blosc2.Array | int | float | complex, + x2: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Computes the value of x1_i ^ x2_i for each element x1_i of the input array x1 and x2_i + of x2. + + Parameters + ---------- + x1: blosc2.Array + First input array, integer or boolean. + + x2:blosc2.Array + Second input array. Must be compatible with x1, integer or boolean. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.bitwise_xor `_ + """ + return x1 ^ x2 + + +def bitwise_and( + x1: blosc2.Array | int | float | complex, + x2: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Computes the value of x1_i & x2_i for each element x1_i of the input array x1 and x2_i + of x2. + + Parameters + ---------- + x1: blosc2.Array + First input array, integer or boolean. + + x2:blosc2.Array + Second input array. Must be compatible with x1. Integer or boolean. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.bitwise_and `_ + """ + return x1 & x2 + + +def bitwise_or( + x1: blosc2.Array | int | float | complex, + x2: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Computes the value of x1_i | x2_i for each element x1_i of the input array x1 and x2_i + of x2. + + Parameters + ---------- + x1: blosc2.Array + First input array, integer or boolean. + + x2: blosc2.Array + Second input array. Must be compatible with x1, integer or boolean. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.bitwise_or `_ + """ + return x1 | x2 + + +def bitwise_invert( + x1: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Computes the value of ~x1_i for each element x1_i of the input array x1. + + Parameters + ---------- + x1: blosc2.Array + Input array, integer or boolean. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.bitwise_invert `_ + """ + return ~x1 + + +def bitwise_right_shift( + x1: blosc2.Array | int | float | complex, + x2: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Shifts the bits of each element x1_i of the input array x1 to the right according to + the respective element x2_i of the input array x2. + + Note: This operation is an arithmetic shift (i.e., sign-propagating) and thus equivalent to + floor division by a power of two. + + Parameters + ---------- + x1: blosc2.Array + First input array, integer. + + x2: blosc2.Array + Second input array. Must be compatible with x1, integer. + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.bitwise_right_shift `_ + """ + return x1.__rshift__(x2) + + +def bitwise_left_shift( + x1: blosc2.Array | int | float | complex, + x2: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Shifts the bits of each element x1_i of the input array x1 to the left by appending x2_i + (i.e., the respective element in the input array x2) zeros to the right of x1_i. + + Note: this operation is equivalent to multiplying x1 by 2**x2. + + Parameters + ---------- + x1: blosc2.Array + First input array, integer. + + x2: blosc2.Array + Second input array. Must be compatible with x1, integer. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.bitwise_left_shift `_ + """ + return x1.__lshift__(x2) + + +def positive( + x1: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Computes the numerical positive of each element x_i (i.e., out_i = +x_i) of the input array x. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.positive `_ + """ + return blosc2.LazyExpr(new_op=(0, "+", x1)) + + +def negative( + x1: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Computes the numerical negative of each element x_i (i.e., out_i = -x_i) of the input array x. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.negative `_ + """ + return blosc2.LazyExpr(new_op=(0, "-", x1)) + + +def remainder( + x1: blosc2.Array | int | float | complex, + x2: blosc2.Array | int | float | complex, +) -> blosc2.LazyExpr: + """ + Returns the remainder of division for each element x1_i of the input array x1 and the + respective element x2_i of the input array x2. + + Note: This function is equivalent to the Python modulus operator x1_i % x2_i. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any data type. + + x2: blosc2.Array + Second input array. Must be compatible with x1. May have any data type. + + Returns + ------- + out: LazyExpr + A LazyArray containing the element-wise results. + + References + ---------- + `np.remainder `_ + """ + return blosc2.LazyExpr(new_op=(x1, "%", x2)) + + +@_incomplete_lazyfunc +def clip( + x: blosc2.Array, + min: int | float | blosc2.Array | None = None, + max: int | float | blosc2.Array | None = None, + **kwargs: Any, +) -> NDArray: + """ + Clamps each element x_i of the input array x to the range [min, max]. + + Parameters + ---------- + x: blosc2.Array + Input array. Should have a real-valued data type. + + min: int | float | blosc2.Array | None + Lower-bound of the range to which to clamp. If None, no lower bound must be applied. + Default: None. + + max: int | float | blosc2.Array | None + Upper-bound of the range to which to clamp. If None, no upper bound must be applied. + Default: None. + + kwargs: Any + kwargs accepted by the :func:`empty` constructor + + Returns + ------- + out: NDArray + An array containing element-wise results. + + """ + + def chunkwise_clip(inputs, output, offset): + x, min, max = inputs + output[...] = np.clip(x, min, max) + + dtype = blosc2.result_type(x) + shape = () if np.isscalar(x) else None + return blosc2.lazyudf(chunkwise_clip, (x, min, max), dtype=dtype, shape=shape, **kwargs) + + +@_incomplete_lazyfunc +def logaddexp(x1: int | float | blosc2.Array, x2: int | float | blosc2.Array, **kwargs: Any) -> NDArray: + """ + Calculates the logarithm of the sum of exponentiations log(exp(x1) + exp(x2)) for + each element x1_i of the input array x1 with the respective element x2_i of the + input array x2. + + Parameters + ---------- + x1: blosc2.Array + First input array. May have any real-valued floating-point data type. + + x2: blosc2.Array + Second input array. Must be compatible with x1. May have any + real-valued floating-point data type. + + kwargs: Any + kwargs accepted by the :func:`empty` constructor + + Returns + ------- + out: NDArray + An array containing element-wise results. + + """ + + def chunkwise_logaddexp(inputs, output, offset): + x1, x2 = inputs + output[...] = np.logaddexp(x1, x2) + + dtype = blosc2.result_type(x1, x2) + if dtype == blosc2.bool_: + raise TypeError("logaddexp doesn't accept boolean arguments.") + + if np.issubdtype(dtype, np.integer): + dtype = blosc2.float32 + shape = () if np.isscalar(x1) and np.isscalar(x2) else None + return blosc2.lazyudf(chunkwise_logaddexp, (x1, x2), dtype=dtype, shape=shape, **kwargs) + + +# implemented in python-blosc2 +local_ufunc_map = { + np.logaddexp: logaddexp, + np.logical_not: logical_not, + np.logical_and: logical_and, + np.logical_or: logical_or, + np.logical_xor: logical_xor, + np.matmul: matmul, +} + + +class Operand: + """Base class for all operands in expressions.""" + + _device = "cpu" + + def __array_namespace__(self, api_version: str | None = None) -> Any: + """Return an object with all the functions and attributes of the module.""" + return blosc2 + + # Provide minimal __array_interface__ to allow NumPy to work with this object + @property + def __array_interface__(self): + return { + "shape": self.shape, + "typestr": self.dtype.str, + "data": self[()], + "version": 3, + } + + @property + @abstractmethod + def dtype(self) -> np.dtype: + """ + Get the data type of the :ref:`Operand`. + + Returns + ------- + out: np.dtype + The data type of the :ref:`Operand`. + """ + pass + + @property + @abstractmethod + def shape(self) -> tuple[int]: + """ + Get the shape of the :ref:`Operand`. + + Returns + ------- + out: tuple + The shape of the :ref:`Operand`. + """ + pass + + @property + @abstractmethod + def ndim(self) -> int: + """ + Get the number of dimensions of the :ref:`Operand`. + + Returns + ------- + out: int + The number of dimensions of the :ref:`Operand`. + """ + pass + + @property + @abstractmethod + def info(self) -> InfoReporter: + """ + Get information about the :ref:`Operand`. + + Returns + ------- + out: InfoReporter + A printable class with information about the :ref:`Operand`. + """ + pass + + @property + def device(self): + "Hardware device the array data resides on. Always equal to 'cpu'." + return self._device + + def to_device(self: NDArray, device: str): + """ + Copy the array from the device on which it currently resides to the specified device. + + Parameters + ---------- + self: NDArray + Array instance. + + device: str + Device to move array object to. Returns error except when device=='cpu'. + + Returns + ------- + out: NDArray + If device='cpu', the same array; else raises an Error. + """ + if device != "cpu": + raise ValueError(f"Unsupported device: {device}. Only 'cpu' is accepted.") + return self + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # Handle operations at the array level + if method != "__call__": + return NotImplemented + + if ufunc in local_ufunc_map: + return local_ufunc_map[ufunc](*inputs) + + if ufunc in ufunc_map: + value = inputs[0] if inputs[1] is self else inputs[1] + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(inputs[0], ufunc_map[ufunc], inputs[1])) + + if ufunc in ufunc_map_1param: + value = inputs[0] + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(value, ufunc_map_1param[ufunc], None)) + + return NotImplemented # if not implemented in numexpr will default to NumPy + + def __add__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, "+", value)) + + def __radd__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + return self.__add__(value) + + def __iadd__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + return self.__add__(value) + + @is_documented_by(negative) + def __neg__(self) -> blosc2.LazyExpr: + return negative(self) + + @is_documented_by(positive) + def __pos__(self) -> blosc2.LazyExpr: + return positive(self) + + @is_documented_by(remainder) + def __mod__(self, other) -> blosc2.LazyExpr: + return remainder(self, other) + + @is_documented_by(remainder) + def __imod__(self, other) -> blosc2.LazyExpr: + return self.__mod__(other) + + @is_documented_by(remainder) + def __rmod__(self, other) -> blosc2.LazyExpr: + return remainder(other, self) + + def __sub__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, "-", value)) + + def __isub__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, "-", value)) + + def __rsub__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(value, "-", self)) + + @is_documented_by(multiply) + def __mul__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, "*", value)) + + def __imul__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + return self.__mul__(value) + + def __rmul__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + return self.__mul__(value) + + def __truediv__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, "/", value)) + + def __itruediv__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + return self.__truediv__(value) + + def __rtruediv__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(value, "/", self)) + + @is_documented_by(floor_divide) + def __floordiv__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, "//", value)) + + def __ifloordiv__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return self.__floordiv__(value) + + def __rfloordiv__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(value, "//", self)) + + def __lt__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, "<", value)) + + def __le__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, "<=", value)) + + def __gt__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, ">", value)) + + def __ge__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, ">=", value)) + + def __eq__(self, value: int | float | blosc2.Array, /): + _check_allowed_dtypes(value) + if blosc2._disable_overloaded_equal: + return self is value + return blosc2.LazyExpr(new_op=(self, "==", value)) + + def __ne__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, "!=", value)) + + def __pow__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, "**", value)) + + def __ipow__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, "**", value)) + + def __rpow__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(value, "**", self)) + + @is_documented_by(abs) + def __abs__(self) -> blosc2.LazyExpr: + return abs(self) + + @is_documented_by(bitwise_and) + def __and__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + _check_allowed_dtypes(value) + return blosc2.LazyExpr(new_op=(self, "&", value)) + + def __iand__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + return self.__and__(value) + + def __rand__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: + return self.__and__(value) + + @is_documented_by(bitwise_xor) + def __xor__(self, other) -> blosc2.LazyExpr: + return blosc2.LazyExpr(new_op=(self, "^", other)) + + def __ixor__(self, other) -> blosc2.LazyExpr: + return self.__xor__(other) + + def __rxor__(self, other) -> blosc2.LazyExpr: + return self.__xor__(other) + + @is_documented_by(bitwise_or) + def __or__(self, other) -> blosc2.LazyExpr: + return blosc2.LazyExpr(new_op=(self, "|", other)) + + def __ior__(self, other) -> blosc2.LazyExpr: + return self.__or__(other) + + def __ror__(self, other) -> blosc2.LazyExpr: + return self.__or__(other) + + @is_documented_by(bitwise_invert) + def __invert__(self) -> blosc2.LazyExpr: + return blosc2.LazyExpr(new_op=(self, "~", None)) + + @is_documented_by(bitwise_right_shift) + def __rshift__(self, other) -> blosc2.LazyExpr: + return blosc2.LazyExpr(new_op=(self, ">>", other)) + + def __irshift__(self, other) -> blosc2.LazyExpr: + return self.__rshift__(other) + + def __rrshift__(self, other) -> blosc2.LazyExpr: + return blosc2.LazyExpr(new_op=(other, ">>", self)) + + @is_documented_by(bitwise_left_shift) + def __lshift__(self, other) -> blosc2.LazyExpr: + return blosc2.LazyExpr(new_op=(self, "<<", other)) + + def __ilshift__(self, other) -> blosc2.LazyExpr: + return self.__lshift__(other) + + def __rlshift__(self, other) -> blosc2.LazyExpr: + return blosc2.LazyExpr(new_op=(other, "<<", self)) + + def __bool__(self) -> bool: + if math.prod(self.shape) != 1: + raise ValueError(f"The truth value of an array of shape {self.shape} is ambiguous.") + return bool(self[()]) + + def __float__(self) -> float: + if math.prod(self.shape) != 1: + raise ValueError(f"Cannot convert array of shape {self.shape} to float.") + return float(self[()]) + + def __int__(self) -> bool: + if math.prod(self.shape) != 1: + raise ValueError(f"Cannot convert array of shape {self.shape} to int.") + return int(self[()]) + + def __index__(self) -> bool: + if not np.issubdtype(self.dtype, np.integer): + raise ValueError( + f"Cannot convert array of dtype {self.dtype} to index array (must have dtype int)." + ) + return self.__int__() + + def __complex__(self) -> complex: + if math.prod(self.shape) != 1: + raise ValueError(f"Cannot convert array of shape {self.shape} to complex float.") + return complex(self[()]) + + def item(self) -> float | bool | complex | int: + """ + Copy an element of an array to a standard Python scalar and return it. + """ + return self[()].item() + + def where(self, value1=None, value2=None): + """ + Select ``value1`` or ``value2`` values based on ``True``/``False`` for ``self``. + + Parameters + ---------- + value1: array_like, optional + The value to select when element of ``self`` is True. + value2: array_like, optional + The value to select when element of ``self`` is False. + + Returns + ------- + out: LazyExpr + A new expression with the where condition applied. + """ + expr = blosc2.LazyExpr._new_expr("o0", {"o0": self}, guess=False) + return expr.where(value1, value2) + + @is_documented_by(sum) + def sum(self, axis=None, dtype=None, keepdims=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.sum(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) + + @is_documented_by(cumulative_sum) + def cumulative_sum(self, axis=None, dtype=None, include_initial=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.cumulative_sum(axis=axis, dtype=dtype, include_initial=include_initial, **kwargs) + + @is_documented_by(cumulative_prod) + def cumulative_prod(self, axis=None, dtype=None, include_initial=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.cumulative_prod(axis=axis, dtype=dtype, include_initial=include_initial, **kwargs) + + @is_documented_by(mean) + def mean(self, axis=None, dtype=None, keepdims=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.mean(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) + + @is_documented_by(std) + def std(self, axis=None, dtype=None, ddof=0, keepdims=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.std(axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, **kwargs) + + @is_documented_by(var) + def var(self, axis=None, dtype=None, ddof=0, keepdims=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.var(axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, **kwargs) + + @is_documented_by(prod) + def prod(self, axis=None, dtype=None, keepdims=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.prod(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) + + @is_documented_by(min) + def min(self, axis=None, keepdims=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.min(axis=axis, keepdims=keepdims, **kwargs) + + @is_documented_by(max) + def max(self, axis=None, keepdims=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.max(axis=axis, keepdims=keepdims, **kwargs) + + @is_documented_by(argmax) + def argmax(self, axis=None, keepdims=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.argmax(axis=axis, keepdims=keepdims, **kwargs) + + @is_documented_by(argmin) + def argmin(self, axis=None, keepdims=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.argmin(axis=axis, keepdims=keepdims, **kwargs) + + @is_documented_by(any) + def any(self, axis=None, keepdims=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.any(axis=axis, keepdims=keepdims, **kwargs) + + @is_documented_by(all) + def all(self, axis=None, keepdims=False, **kwargs): + expr = blosc2.LazyExpr(new_op=(self, None, None)) + return expr.all(axis=axis, keepdims=keepdims, **kwargs) + + +class LimitedSizeDict(OrderedDict): + def __init__(self, max_entries, *args, **kwargs): + self.max_entries = max_entries + super().__init__(*args, **kwargs) + + def __setitem__(self, key, value): + if len(self) >= self.max_entries: + self.popitem(last=False) + super().__setitem__(key, value) + + +def detect_aligned_chunks( + key: Sequence[slice], shape: Sequence[int], chunks: Sequence[int], consecutive: bool = False +) -> list[int]: + """ + Detect whether a multidimensional slice is aligned with chunk boundaries. + + Parameters + ---------- + key : Sequence of slice + The multidimensional slice to check. + shape : Sequence of int + Shape of the NDArray. + chunks : Sequence of int + Chunk shape of the NDArray. + consecutive : bool, default=False + If True, check if the chunks are consecutive in storage order. + If False, only check for chunk boundary alignment. + + Returns + ------- + list[int] + List of chunk indices (in C-order) that the slice overlaps with. + If the slice isn't aligned with chunk boundaries, returns an empty list. + If consecutive=True and chunks aren't consecutive, returns an empty list. + """ + if len(key) != len(shape): + return [] + + # Check that slice boundaries are exact multiple of chunk boundaries + for i, s in enumerate(key): + if s.start is not None and s.start % chunks[i] != 0: + return [] + if s.stop is not None and s.stop % chunks[i] != 0: + return [] + + # Parse the slice boundaries + start_indices = [] + end_indices = [] + n_chunks = [] + + for i, s in enumerate(key): + start = s.start if s.start is not None else 0 + stop = s.stop if s.stop is not None else shape[i] + chunk_size = chunks[i] + start_idx = start // chunk_size + end_idx = stop // chunk_size + start_indices.append(start_idx) + end_indices.append(end_idx) + n_chunks.append(shape[i] // chunk_size) + + # Get all chunk combinations in the slice + indices = [range(start, end) for start, end in zip(start_indices, end_indices, strict=False)] + result = [] + + for combination in product(*indices): + flat_index = 0 + multiplier = 1 + for idx, n in zip(reversed(range(len(n_chunks))), reversed(n_chunks), strict=False): + flat_index += combination[idx] * multiplier + multiplier *= n + result.append(flat_index) + + # Check if chunks are consecutive if requested + if consecutive and result: + sorted_result = sorted(result) + if sorted_result[-1] - sorted_result[0] + 1 != len(sorted_result): + return [] + + # The array of indices must be consecutive + for i in range(len(sorted_result) - 1): + if sorted_result[i + 1] - sorted_result[i] != 1: + return [] + + return sorted(result) + + +class NDOuterIterator: + def __init__(self, ndarray: NDArray | NDField, cache_size=1): + self.ndarray = ndarray + self.outer_dim_size = ndarray.shape[0] + self.inner_shape = ndarray.shape[1:] + self.current_index = 0 + # Cache for 1D arrays; for higher dimensions, the implementation should be more involved + self.chunk_size = ndarray.chunks[0] if len(ndarray.shape) == 1 else None + self.cache = {} if len(ndarray.shape) == 1 else None + self.cache_size = cache_size + + def __iter__(self): + return self + + def __next__(self): + if self.current_index >= self.outer_dim_size: + raise StopIteration + + outer_index = self.current_index + self.current_index += 1 + + if self.cache is not None: + chunk_index = outer_index // self.chunk_size + local_index = outer_index % self.chunk_size + + if chunk_index not in self.cache: + if len(self.cache) >= self.cache_size: + self.cache.pop(next(iter(self.cache))) + self.cache[chunk_index] = self.ndarray[ + chunk_index * self.chunk_size : (chunk_index + 1) * self.chunk_size + ] + + return self.cache[chunk_index][local_index] + else: + return self.ndarray[outer_index] + + +class NDArray(blosc2_ext.NDArray, Operand): + def __init__(self, **kwargs): + self._schunk = SChunk(_schunk=kwargs["_schunk"], _is_view=True) # SChunk Python instance + self._keep_last_read = False + # Where to store the last read data + self._last_read = {} + base = kwargs.pop("_base", None) + super().__init__(kwargs["_array"], base=base) + # Accessor to fields + self._fields = {} + if self.dtype.fields: + for field in self.dtype.fields: + self._fields[field] = NDField(self, field) + + @property + def cparams(self) -> blosc2.CParams: + """The compression parameters used by the array.""" + return self.schunk.cparams + + @property + def dparams(self) -> blosc2.DParams: + """The decompression parameters used by the array.""" + return self.schunk.dparams + + @property + def nbytes(self) -> int: + """The number of bytes used by the array.""" + return self.schunk.nbytes + + @property + def cbytes(self) -> int: + """The number of compressed bytes used by the array.""" + return self.schunk.cbytes + + @property + def cratio(self) -> float: + """The compression ratio of the array.""" + return self.schunk.cratio + + # TODO: Uncomment when blosc2.Storage is available + # @property + # def storage(self) -> blosc2.Storage: + # """The storage of the array.""" + # return self.schunk.storage + + @property + def urlpath(self) -> str: + """The URL path of the array.""" + return self.schunk.urlpath + + @property + def meta(self) -> dict: + """The metadata of the array.""" + return self.schunk.meta + + @property + def vlmeta(self) -> dict: + """The variable-length metadata of the array.""" + return self.schunk.vlmeta + + @property + def fields(self) -> dict: + """ + Dictionary with the fields of the structured array. + + Returns + ------- + fields: dict + A dictionary with the fields of the structured array. + + See Also + -------- + :ref:`NDField` + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> shape = (10,) + >>> dtype = np.dtype([('a', np.int32), ('b', np.float64)]) + >>> # Create a structured array + >>> sa = blosc2.zeros(shape, dtype=dtype) + >>> # Check that fields are equal + >>> assert sa.fields['a'] == sa.fields['b'] + """ + return self._fields + + @property + def keep_last_read(self) -> bool: + """Indicates whether the last read data should be kept in memory.""" + return self._keep_last_read + + @keep_last_read.setter + def keep_last_read(self, value: bool) -> None: + """Set whether the last read data should be kept in memory. + + This always clears the last read data (if any). + """ + if not isinstance(value, bool): + raise TypeError("keep_last_read should be a boolean") + # Reset last read data + self._last_read.clear() + self._keep_last_read = value + + @property + def info(self) -> InfoReporter: + """ + Print information about this array. + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> my_array = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + >>> array = blosc2.asarray(my_array) + >>> print(array.info) + type : NDArray + shape : (10,) + chunks : (10,) + blocks : (10,) + dtype : int64 + cratio : 0.73 + cparams : {'blocksize': 80, + 'clevel': 1, + 'codec': , + 'codec_meta': 0, + 'filters': [, + , + , + , + , + ], + 'filters_meta': [0, 0, 0, 0, 0, 0], + 'nthreads': 4, + 'splitmode': , + 'typesize': 8, + 'use_dict': 0} + dparams : {'nthreads': 4} + """ + return InfoReporter(self) + + @property + def info_items(self) -> list: + """A list of tuples with the information about this array. + Each tuple contains the name of the attribute and its value. + """ + items = [] + items += [("type", f"{self.__class__.__name__}")] + items += [("shape", self.shape)] + items += [("chunks", self.chunks)] + items += [("blocks", self.blocks)] + items += [("dtype", self.dtype)] + items += [("nbytes", self.nbytes)] + items += [("cbytes", self.cbytes)] + items += [("cratio", f"{self.cratio:.2f}")] + items += [("cparams", self.cparams)] + items += [("dparams", self.dparams)] + return items + + @property + def schunk(self) -> blosc2.SChunk: + """ + The :ref:`SChunk ` reference of the :ref:`NDArray`. + All the attributes from the :ref:`SChunk ` can be accessed through + this instance as `self.schunk`. + + See Also + -------- + :ref:`SChunk Attributes ` + """ + return self._schunk + + @property + def shape(self) -> tuple[int]: + """Returns the data shape of this container. + + If the shape is a multiple of each dimension of :attr:`chunks`, + it will be the same as :attr:`ext_shape`. + + See Also + -------- + :attr:`ext_shape` + """ + return super().shape + + @property + def ext_shape(self) -> tuple[int]: + """The padded data shape. + + The padded data is filled with zeros to make the real data fit into blocks and chunks, but it + will never be retrieved as actual data (so the user can ignore this). + In case :attr:`shape` is multiple in each dimension of :attr:`chunks` it will be the same + as :attr:`shape`. + + See Also + -------- + :attr:`shape` + :attr:`chunks` + """ + return super().ext_shape + + @property + def chunks(self) -> tuple[int]: + """Returns the data chunk shape of this container. + + If the chunk shape is a multiple of each dimension of :attr:`blocks`, + it will be the same as :attr:`ext_chunks`. + + See Also + -------- + :attr:`ext_chunks` + """ + return super().chunks + + @property + def ext_chunks(self) -> tuple[int]: + """ + Returns the padded chunk shape which defines the chunksize in the associated schunk. + + This will be the chunk shape used to store each chunk, filling the extra positions + with zeros (padding). If the :attr:`chunks` is a multiple of + each dimension of :attr:`blocks` it will be the same as :attr:`chunks`. + + See Also + -------- + :attr:`chunks` + """ + return super().ext_chunks + + @property + def blocks(self) -> tuple[int]: + """The block shape of this container.""" + return super().blocks + + @property + def ndim(self) -> int: + """The number of dimensions of this container.""" + return super().ndim + + @property + def size(self) -> int: + """The size (in elements) for this container.""" + return super().size + + @property + def chunksize(self) -> int: + """Returns the data chunk size (in bytes) for this container. + + This will not be the same as + :attr:`SChunk.chunksize ` + in case :attr:`chunks` is not multiple in + each dimension of :attr:`blocks` (or equivalently, if :attr:`chunks` is + not the same as :attr:`ext_chunks`). + + See Also + -------- + :attr:`chunks` + :attr:`ext_chunks` + """ + return super().chunksize + + @property + def dtype(self) -> np.dtype: + """ + Data-type of the array's elements. + """ + return super().dtype + + @property + def blocksize(self) -> int: + """The block size (in bytes) for this container. + + This is a shortcut to + :attr:`SChunk.blocksize ` and can be accessed + through the :attr:`schunk` attribute as well. + + See Also + -------- + :attr:`schunk` + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> array = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + >>> ndarray = blosc2.asarray(array) + >>> print("Block size:", ndarray.blocksize) + Block size: 80 + """ + return self._schunk.blocksize + + @property + def oindex(self) -> OIndex: + """Shortcut for orthogonal (outer) indexing, see :func:`get_oselection_numpy`""" + return OIndex(self) + + # @property + # def vindex(self) -> VIndex: + # """Shortcut for vectorised indexing. Not yet supported.""" + # return VIndex(self) + + @property + def T(self): + """Return the transpose of a 2-dimensional array.""" + if self.ndim != 2: + raise ValueError("This property only works for 2-dimensional arrays.") + return blosc2.linalg.permute_dims(self) + + @property + def mT(self): + """Transpose of a matrix (or a stack of matrices).""" + if self.ndim < 2: + raise ValueError("This property only works for N-dimensional arrays with N>=2.") + axes = np.arange(self.ndim) + axes[-1] = self.ndim - 2 + axes[-2] = self.ndim - 1 + return blosc2.linalg.permute_dims(self, axes=axes) + + def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray: + """ + Select a slice from the array using a fancy index. + Closely matches NumPy fancy indexing behaviour, except in + some edge cases which are not supported by ndindex. + Array indices separated by slice object - e.g. arr[0, :10, [0,1]] - are NOT supported. + See https://www.blosc.org/posts/blosc2-fancy-indexing for more details. + + Parameters + ---------- + key: list or np.ndarray + + Returns + ------- + out: np.ndarray + + """ + # TODO: Make this faster and avoid running out of memory - avoid broadcasting keys + + ## Can't do this because ndindex doesn't support all the same indexing cases as Numpy + # if math.prod(self.shape) * self.dtype.itemsize < blosc2.MAX_FAST_PATH_SIZE: + # return self[:][key] # load into memory for smallish arrays + shape = self.shape + chunks = self.chunks + + # TODO: try to optimise and avoid this expand which seems to copy - maybe np.broadcast + _slice = ndindex.ndindex(key).expand(shape) # handles negative indices -> positive internally + out_shape = _slice.newshape(shape) + _slice = _slice.raw + # now all indices are slices or arrays of integers (or booleans) + # # moreover, all arrays are consecutive (otherwise an error is raised) + + if np.all([isinstance(s, (slice, np.ndarray)) for s in _slice]) and np.all( + [s.dtype is not bool for s in _slice if isinstance(s, np.ndarray)] + ): + chunks = np.array(chunks) + # |------| + # ------| arrs |------ + arridxs = [i for i, s in enumerate(_slice) if isinstance(s, np.ndarray)] + begin, end = arridxs[0], arridxs[-1] + 1 + + start, stop, step, _ = get_ndarray_start_stop(begin, _slice[:begin], self.shape[:begin]) + prior_tuple = tuple( + slice(s, st, stp) for s, st, stp in zip(start, stop, step, strict=True) + ) # convert to start and stop +ve + start, stop, step, _ = get_ndarray_start_stop( + len(self.shape[end:]), _slice[end:], self.shape[end:] + ) + post_tuple = tuple( + slice(s, st, stp) for s, st, stp in zip(start, stop, step, strict=True) + ) # convert to start and stop +ve + + flat_shape = tuple( + (i.stop - i.start - i.step // builtins.abs(i.step)) // i.step + 1 for i in prior_tuple + ) + idx_dim = np.prod(_slice[begin].shape, dtype=np.int32) + + # TODO: find a nicer way to do the copy maybe + arr = np.empty((idx_dim, end - begin), dtype=_slice[begin].dtype) + for i, s in enumerate(_slice[begin:end]): + arr[:, i] = s.reshape(-1) # have to do a copy + + flat_shape += (idx_dim,) + flat_shape += tuple( + (i.stop - i.start - i.step // builtins.abs(i.step)) // i.step + 1 for i in post_tuple + ) + # out_shape could have new dims if indexing arrays are not all 1D + # (we have just flattened them so need to handle accordingly) + divider = chunks[begin:end] + chunked_arr = arr // divider + if arr.shape[-1] == 1: # 1D chunks, can avoid loading whole chunks + idx_order = np.argsort(arr.squeeze(axis=1), axis=-1) # sort by real index + chunk_nitems = np.bincount(chunked_arr.reshape(-1), minlength=self.schunk.nchunks) + unique_chunks = np.nonzero(chunk_nitems)[0][:, None] # add dummy axis + chunk_nitems = chunk_nitems[unique_chunks] + else: + chunked_arr = np.ascontiguousarray( + chunked_arr + ) # ensure C-order memory to allow structured dtype view + # TODO: check that avoids sort and copy (alternative: maybe do a bincount with structured data types?) + _, row_ids, idx_inv, chunk_nitems = np.unique( + chunked_arr.view([("", chunked_arr.dtype)] * chunked_arr.shape[1]), + return_counts=True, + return_index=True, + return_inverse=True, + ) + # In some versions of Numpy, output of np.unique has dummy dimension + idx_inv = idx_inv if len(idx_inv.shape) == 1 else idx_inv.squeeze(-1) + unique_chunks = chunked_arr[row_ids] + # sort by chunks (can't sort by index since larger index could belong to lower chunk) + # e.g. chunks of (100, 10) means (50, 15) has chunk idx (0,1) but (60,5) has (0, 0) + idx_order = np.argsort(idx_inv) + sorted_idxs = arr[idx_order] + out = np.empty(flat_shape, dtype=self.dtype) + shape = np.array(shape) + + chunk_nitems_cumsum = np.cumsum(chunk_nitems) + cprior_slices = [ + slice_to_chunktuple(s, c) for s, c in zip(prior_tuple, chunks[:begin], strict=True) + ] + cpost_slices = [slice_to_chunktuple(s, c) for s, c in zip(post_tuple, chunks[end:], strict=True)] + # TODO: rewrite to allow interleaved slices/array indexes + for chunk_i, chunk_idx in enumerate(unique_chunks): + start = 0 if chunk_i == 0 else chunk_nitems_cumsum[chunk_i - 1] + stop = chunk_nitems_cumsum[chunk_i] + selection = sorted_idxs[start:stop] + out_mid_selection = (idx_order[start:stop],) + if ( + arr.shape[-1] == 1 + ): # can avoid loading in whole chunk if 1D for array indexed chunks, a bit faster + chunk_begin = selection[0] + chunk_end = selection[-1] + 1 + else: + chunk_begin = chunk_idx * chunks[begin:end] + chunk_end = np.minimum((chunk_idx + 1) * chunks[begin:end], shape[begin:end]) + loc_mid_selection = tuple(a for a in (selection - chunk_begin).T) + + # loop over chunks coming from slices before and after array indices + for cprior_tuple in product(*cprior_slices): + out_prior_selection, prior_selection, loc_prior_selection = _get_selection( + cprior_tuple, prior_tuple, chunks[:begin] + ) + for cpost_tuple in product(*cpost_slices): + out_post_selection, post_selection, loc_post_selection = _get_selection( + cpost_tuple, post_tuple, chunks[end:] + ) + locbegin, locend = _get_local_slice( + prior_selection, post_selection, (chunk_begin, chunk_end) + ) + to_be_loaded = np.empty(locend - locbegin, dtype=self.dtype) + # basically load whole chunk, except for slice part at beginning and end + super().get_slice_numpy(to_be_loaded, (locbegin, locend)) + loc_idx = loc_prior_selection + loc_mid_selection + loc_post_selection + out_idx = out_prior_selection + out_mid_selection + out_post_selection + out[out_idx] = to_be_loaded[loc_idx] + return out.reshape(out_shape) # should have filled in correct order, just need to reshape + + # Default when there are booleans + # TODO: for boolean indexing could be optimised by avoiding + # calculating out_shape prior to loop and keeping track on-the-fly (like in LazyExpr machinery) + out = np.empty(out_shape, dtype=self.dtype) + return self._get_set_findex_default(_slice, out) + + def _get_set_findex_default(self, _slice, out=None, value=None): + _get = out is not None + out = self if out is None else out # default return for setitem with no intersecting chunks + if 0 in self.shape: + return out + chunk_size = ndindex.ChunkSize(self.chunks) # only works with nonzero chunks + # repeated indices are grouped together + intersecting_chunks = chunk_size.as_subchunks( + _slice, self.shape + ) # if _slice is (), returns all chunks + for c in intersecting_chunks: + sub_idx = _slice.as_subindex(c).raw + sel_idx = c.as_subindex(_slice) + start, stop, step, _ = get_ndarray_start_stop(self.ndim, c.raw, self.shape) + chunk = np.empty(tuple(sp - st for st, sp in zip(start, stop, strict=True)), dtype=self.dtype) + super().get_slice_numpy(chunk, (start, stop)) + if _get: + new_shape = sel_idx.newshape(out.shape) + out[sel_idx.raw] = chunk[sub_idx].reshape(new_shape) + else: + chunk[sub_idx] = value if np.isscalar(value) else value[sel_idx.raw] + out = super().set_slice((start, stop), chunk) + return out + + def get_oselection_numpy(self, key: list | np.ndarray) -> np.ndarray: + """ + Select independently from self along axes specified in key. Key must be same length as self shape. + See Zarr https://zarr.readthedocs.io/en/stable/user-guide/arrays.html#orthogonal-indexing. + """ + shape = tuple(len(k) for k in key) + self.shape[len(key) :] + # Create the array to store the result + arr = np.empty(shape, dtype=self.dtype) + return super().get_oindex_numpy(arr, key) + + def set_oselection_numpy(self, key: list | np.ndarray, arr: NDArray) -> np.ndarray: + """ + Select independently from self along axes specified in key and set to entries in arr. + Key must be same length as self shape. + See Zarr https://zarr.readthedocs.io/en/stable/user-guide/arrays.html#orthogonal-indexing. + """ + return super().set_oindex_numpy(key, arr) + + def _get_set_nonunit_steps(self, _slice, out=None, value=None): + start, stop, step, mask = _slice + _get = out is not None + out = self if out is None else out # default return for setitem with no intersecting chunks + if 0 in self.shape: + return out + + chunks = self.chunks + _slice = tuple(slice(s, st, stp) for s, st, stp in zip(start, stop, step, strict=True)) + intersecting_chunks = [ + slice_to_chunktuple(s, c) for s, c in zip(_slice, chunks, strict=True) + ] # internally handles negative steps + for c in product(*intersecting_chunks): + sel_idx, glob_selection, sub_idx = _get_selection(c, _slice, chunks) + sel_idx = tuple(s for s, m in zip(sel_idx, mask, strict=True) if not m) + sub_idx = tuple(s if not m else s.start for s, m in zip(sub_idx, mask, strict=True)) + locstart, locstop = _get_local_slice( + glob_selection, + (), + ((), ()), # switches start and stop for negative steps + ) + chunk = np.empty( + tuple(sp - st for st, sp in zip(locstart, locstop, strict=True)), dtype=self.dtype + ) + # basically load whole chunk, except for slice part at beginning and end + super().get_slice_numpy(chunk, (locstart, locstop)) # copy relevant slice of chunk + if _get: + out[sel_idx] = chunk[sub_idx] # update relevant parts of chunk + else: + chunk[sub_idx] = ( + value if np.isscalar(value) else value[sel_idx] + ) # update relevant parts of chunk + out = super().set_slice((locstart, locstop), chunk) # load updated partial chunk into array + return out + + def __getitem__( + self, + key: None + | int + | slice + | Sequence[slice | int | np.bool_ | np.ndarray[int | np.bool_] | None] + | NDArray[int | np.bool_] + | blosc2.LazyExpr + | str, + ) -> np.ndarray | blosc2.LazyExpr: + """ + Retrieve a (multidimensional) slice as specified by the key. + + Note that this __getitem__ closely matches NumPy fancy indexing behaviour, except in + some edge cases which are not supported by ndindex. + Array indices separated by slice object - e.g. arr[0, :10, [0,1]] - are NOT supported. + See https://www.blosc.org/posts/blosc2-fancy-indexing for more details. + + Parameters + ---------- + key: int, slice, sequence of (slices, int), array of bools, LazyExpr or str + The slice(s) to be retrieved. Note that step parameter is not yet honored + in slices. If a LazyExpr is provided, the expression is expected to be of + boolean type, and the result will be another LazyExpr returning the values + of this array where the expression is True. + When key is a (nd-)array of bools, the result will be the values of ``self`` + where the bool values are True (similar to NumPy). + If key is an N-dim array of integers, the result will be the values of + this array at the specified indices with the shape of the index. + If the key is a string, and it is a field name of self, a :ref:`NDField` + accessor will be returned; if not, it will be attempted to convert to a + :ref:`LazyExpr`, and will search for its operands in the fields of ``self``. + + Returns + ------- + out: np.ndarray | blosc2.LazyExpr + The requested data as a NumPy array or a :ref:`LazyExpr`. + + Examples + -------- + >>> import blosc2 + >>> shape = [25, 10] + >>> # Create an array + >>> a = blosc2.full(shape, 3.3333) + >>> # Get slice as a NumPy array + >>> a[:5, :5] + array([[3.3333, 3.3333, 3.3333, 3.3333, 3.3333], + [3.3333, 3.3333, 3.3333, 3.3333, 3.3333], + [3.3333, 3.3333, 3.3333, 3.3333, 3.3333], + [3.3333, 3.3333, 3.3333, 3.3333, 3.3333], + [3.3333, 3.3333, 3.3333, 3.3333, 3.3333]]) + """ + # The more general case (this is quite slow) + # If the key is a LazyExpr, decorate with ``where`` and return it + if isinstance(key, blosc2.LazyExpr): + return key.where(self) + if isinstance(key, str): + if self.dtype.fields is None: + raise ValueError("The array is not structured (its dtype does not have fields)") + if key in self.fields: + # A shortcut to access fields + return self.fields[key] + # Assume that the key is a boolean expression + expr = blosc2.LazyExpr._new_expr(key, self.fields, guess=False) + return expr.where(self) + + key = key[()] if isinstance(key, NDArray) else key # key not iterable + key = tuple(k[()] if isinstance(k, NDArray) else k for k in key) if isinstance(key, tuple) else key + + # decompress NDArrays + key_, mask = process_key(key, self.shape) # internally handles key an integer + key = key[()] if hasattr(key, "shape") and key.shape == () else key # convert to scalar + + # fancy indexing + if isinstance(key_, (list, np.ndarray)) or builtins.any( + isinstance(k, (list, np.ndarray)) for k in key_ + ): + # check scalar booleans, which add 1 dim to beginning + if np.issubdtype(type(key), bool) and np.isscalar(key): + if key: + _slice = ndindex.ndindex(()).expand(self.shape) # just get whole array + out_shape = _slice.newshape(self.shape) + out = np.empty(out_shape, dtype=self.dtype) + return np.expand_dims(self._get_set_findex_default(_slice, out=out), 0) + else: # do nothing + return np.empty((0,) + self.shape, dtype=self.dtype) + elif ( + hasattr(key, "dtype") and np.issubdtype(key.dtype, np.bool_) and key.shape == self.shape + ): # check ORIGINAL key + # This can be interpreted as a boolean expression but only for key shape same as self shape + expr = blosc2.LazyExpr._new_expr("key", {"key": key}, guess=False).where(self) + # Decorate with where and force a getitem operation to return actual values. + # This behavior is consistent with NumPy, although different from e.g. ['expr'] + # which returns a lazy expression. + # This is faster than the fancy indexing path + return expr[:] + return self.get_fselection_numpy(key) # fancy index default, can be quite slow + + start, stop, step, none_mask = get_ndarray_start_stop(self.ndim, key_, self.shape) + shape = np.array( + [(sp - st - np.sign(stp)) // stp + 1 for st, sp, stp in zip(start, stop, step, strict=True)] + ) + if mask is not None: # there are some dummy dims from ints + # only get mask for not Nones in key to have nm_ same length as shape + nm_ = [not m for m, n in zip(mask, none_mask, strict=True) if not n] + # have to make none_mask refer to sliced dims (which will be less if ints present) + none_mask = [n for m, n in zip(mask, none_mask, strict=True) if not m] + shape = tuple(shape[nm_]) + + # Create the array to store the result + nparr = np.empty(shape, dtype=self.dtype) + if step != (1,) * self.ndim: + nparr = self._get_set_nonunit_steps((start, stop, step, [not i for i in nm_]), out=nparr) + else: + nparr = super().get_slice_numpy(nparr, (start, stop)) + + if np.any(none_mask): + nparr = np.expand_dims(nparr, axis=[i for i, n in enumerate(none_mask) if n]) + + if self._keep_last_read: + self._last_read.clear() + inmutable_key = make_key_hashable(key) + self._last_read[inmutable_key] = nparr + + return nparr + + def __setitem__( + self, + key: int | slice | Sequence[slice | int | np.bool_ | np.ndarray[int | np.bool_] | None] | None, + value: object, + ): + """Set a slice of the array. + + Parameters + ---------- + key: int, slice or sequence of slices + The index or indices specifying the slice(s) to be updated. Note that the step parameter + is not yet supported. + value: Py_Object Supporting the Buffer Protocol + An object supporting the + `Buffer Protocol `_ + which will be used to overwrite the specified slice(s). + + Examples + -------- + >>> import blosc2 + >>> # Create an array + >>> a = blosc2.full([8, 8], 3.3333) + >>> # Set a slice to 0 + >>> a[:5, :5] = 0 + >>> a[:] + array([[0. , 0. , 0. , 0. , 0. , 3.3333, 3.3333, 3.3333], + [0. , 0. , 0. , 0. , 0. , 3.3333, 3.3333, 3.3333], + [0. , 0. , 0. , 0. , 0. , 3.3333, 3.3333, 3.3333], + [0. , 0. , 0. , 0. , 0. , 3.3333, 3.3333, 3.3333], + [0. , 0. , 0. , 0. , 0. , 3.3333, 3.3333, 3.3333], + [3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333], + [3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333], + [3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333]]) + """ + blosc2_ext.check_access_mode(self.schunk.urlpath, self.schunk.mode) + + # key not iterable + key = key[()] if isinstance(key, NDArray) else key + key = tuple(k[()] if isinstance(k, NDArray) else k for k in key) if isinstance(key, tuple) else key + + key_, mask = process_key(key, self.shape) # internally handles key an integer + if hasattr(value, "shape") and value.shape == (): + value = value.item() + value = ( + value if np.isscalar(value) else blosc2.as_simpleproxy(value) + ) # convert to SimpleProxy for e.g. JAX, Tensorflow, PyTorch + + if builtins.any(isinstance(k, (list, np.ndarray)) for k in key_): # fancy indexing + _slice = ndindex.ndindex(key_).expand( + self.shape + ) # handles negative indices -> positive internally + # check scalar booleans, which add 1 dim to beginning but which cause problems for ndindex.as_subindex + if ( + key.shape == () and hasattr(key, "dtype") and np.issubdtype(key.dtype, np.bool_) + ): # check ORIGINAL key after decompression + if key: + _slice = ndindex.ndindex(()).expand(self.shape) # just get whole array + else: # do nothing + return self + return self._get_set_findex_default(_slice, value=value) + + start, stop, step, none_mask = get_ndarray_start_stop(self.ndim, key_, self.shape) + + if step != (1,) * self.ndim: # handle non-unit or negative steps + if np.any(none_mask): + raise ValueError("Cannot mix non-unit steps and None indexing for __setitem__.") + return self._get_set_nonunit_steps((start, stop, step, mask), value=value) + + shape = [sp - st for sp, st in zip(stop, start, strict=False)] + if isinstance(value, blosc2.Operand): # handles SimpleProxy, NDArray, LazyExpr etc. + value = value[()] # convert to numpy + if np.isscalar(value) or value.shape == (): + value = np.full(shape, value, dtype=self.dtype) + if value.dtype != self.dtype: # handles decompressed NDArray too + try: + value = value.astype(self.dtype) + except ComplexWarning: + # numexpr type inference can lead to unnecessary type promotions + # when using complex functions (e.g. conj) with real arrays + value = value.real.astype(self.dtype) + + return super().set_slice((start, stop), value) + + def __iter__(self): + """Iterate over the (outer) elements of the array. + + Returns + ------- + out: iterator + """ + return NDOuterIterator(self) + + def __len__(self) -> int: + """Returns the length of the first dimension of the array. + This is equivalent to ``self.shape[0]``. + """ + if self.shape == (): + raise TypeError("len() of unsized object") + return self.shape[0] + + def get_chunk(self, nchunk: int) -> bytes: + """Shortcut to :meth:`SChunk.get_chunk `. This can be accessed + through the :attr:`schunk` attribute as well. + + Parameters + ---------- + nchunk: int + The index of the chunk to retrieve. + + Returns + ------- + chunk: bytes + The chunk data at the specified index. + + See Also + -------- + :attr:`schunk` + The attribute that provides access to the underlying `SChunk` object. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> # Create an SChunk with some data + >>> array = np.arange(10) + >>> ndarray = blosc2.asarray(array) + >>> chunk = ndarray.get_chunk(0) + >>> # Decompress the chunk to convert it into a numpy array + >>> decompressed_chunk = blosc2.decompress(chunk) + >>> np_array_chunk = np.frombuffer(decompressed_chunk, dtype=np.int64) + >>> # Verify the content of the chunk + >>> if isinstance(np_array_chunk, np.ndarray): + >>> print(np_array_chunk) + >>> print(np_array_chunk.shape) # Assuming chunk is a list or numpy array + [ 0 1 2 3 4 5 6 7 8 9] + (10,) + """ + return self.schunk.get_chunk(nchunk) + + def reshape(self, shape: tuple[int], **kwargs: Any) -> NDArray: + """Return a new array with the specified shape. + + See full documentation in :func:`reshape`. + + See Also + -------- + :func:`reshape` + """ + return reshape(self, shape, **kwargs) + + def iterchunks_info( + self, + ) -> Iterator[ + NamedTuple( + "info", + nchunk=int, + coords=tuple, + cratio=float, + special=blosc2.SpecialValue, + repeated_value=bytes | None, + lazychunk=bytes, + ) + ]: + """ + Iterate over :paramref:`self` chunks of the array, providing information on index + and special values. + + Yields + ------ + info: namedtuple + A namedtuple with the following fields: + + nchunk: int + The index of the chunk. + coords: tuple + The coordinates of the chunk, in chunk units. + cratio: float + The compression ratio of the chunk. + special: :class:`SpecialValue` + The special value enum of the chunk; if 0, the chunk is not special. + repeated_value: :attr:`self.dtype` or None + The repeated value for the chunk; if not SpecialValue.VALUE, it is None. + lazychunk: bytes + A buffer containing the complete lazy chunk. + + Examples + -------- + >>> import blosc2 + >>> a = blosc2.full(shape=(1000, ) * 3, fill_value=9, chunks=(500, ) * 3, dtype="f4") + >>> for info in a.iterchunks_info(): + ... print(info.coords) + (0, 0, 0) + (0, 0, 1) + (0, 1, 0) + (0, 1, 1) + (1, 0, 0) + (1, 0, 1) + (1, 1, 0) + (1, 1, 1) + """ + ChunkInfoNDArray = namedtuple( + "ChunkInfoNDArray", ["nchunk", "coords", "cratio", "special", "repeated_value", "lazychunk"] + ) + chunks_idx = np.array(self.ext_shape) // np.array(self.chunks) + for cinfo in self.schunk.iterchunks_info(): + nchunk, cratio, special, repeated_value, lazychunk = cinfo + coords = tuple(np.unravel_index(cinfo.nchunk, chunks_idx)) + if cinfo.special == SpecialValue.VALUE: + repeated_value = np.frombuffer(cinfo.repeated_value, dtype=self.dtype)[0] + yield ChunkInfoNDArray(nchunk, coords, cratio, special, repeated_value, lazychunk) + + def tobytes(self) -> bytes: + """Returns a buffer containing the data of the entire array. + + Returns + ------- + out: bytes + The buffer with the data of the whole array. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> dtype = np.dtype("i4") + >>> shape = [23, 11] + >>> a = np.arange(0, int(np.prod(shape)), dtype=dtype).reshape(shape) + >>> # Create an array + >>> b = blosc2.asarray(a) + >>> b.tobytes() == bytes(a[...]) + True + """ + return super().tobytes() + + def to_cframe(self) -> bytes: + """Get a bytes object containing the serialized :ref:`NDArray` instance. + + Returns + ------- + out: bytes + The buffer containing the serialized :ref:`NDArray` instance. + + See Also + -------- + :func:`~blosc2.ndarray_from_cframe` + This function can be used to reconstruct a NDArray from the serialized bytes. + + Examples + -------- + >>> import blosc2 + >>> a = blosc2.full(shape=(1000, 1000), fill_value=9, dtype='i4') + >>> # Get the bytes object containing the serialized instance + >>> cframe_bytes = a.to_cframe() + >>> blosc_array = blosc2.ndarray_from_cframe(cframe_bytes) + >>> print("Shape of the NDArray:", blosc_array.shape) + >>> print("Data type of the NDArray:", blosc_array.dtype) + Shape of the NDArray: (1000, 1000) + Data type of the NDArray: int32 + """ + return super().to_cframe() + + def copy(self, dtype: np.dtype | str = None, **kwargs: Any) -> NDArray: + """Create a copy of an array with different parameters. + + Parameters + ---------- + dtype: np.dtype or list str + The new array dtype. Default is `self.dtype`. + + Other Parameters + ---------------- + kwargs: dict, optional + Additional keyword arguments supported by the :func:`empty` constructor. + If not specified, the defaults will be taken from the original + array (except for the urlpath). + + Returns + ------- + out: :ref:`NDArray` + A :ref:`NDArray` with a copy of the data. + + See Also + -------- + :func:`copy` + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> shape = (10, 10) + >>> blocks = (10, 10) + >>> dtype = np.bool_ + >>> # Create a NDArray with default chunks + >>> a = blosc2.zeros(shape, blocks=blocks, dtype=dtype) + >>> # Get a copy with default chunks and blocks + >>> b = a.copy(chunks=None, blocks=None) + >>> np.array_equal(b[...], a[...]) + True + """ + if dtype is None: + dtype = self.dtype + + # Add the default parameters + kwargs["cparams"] = kwargs.get("cparams", self.cparams) + kwargs["dparams"] = kwargs.get("dparams", self.dparams) + if "meta" in kwargs: + # Do not allow to pass meta to copy + raise ValueError("meta should not be passed to copy") + + kwargs = _check_ndarray_kwargs(**kwargs) + return super().copy(dtype, **kwargs) + + def save(self, urlpath: str, contiguous=True, **kwargs: Any) -> None: + """Save the array to a file. + + This is a convenience function that calls the :func:`copy` method with the + `urlpath` parameter and the additional keyword arguments provided. + + See :func:`save` for more information. + + Parameters + ---------- + urlpath: str + The path where the array will be saved. + contiguous: bool, optional + Whether to save the array contiguously. + + Other Parameters + ---------------- + kwargs: dict, optional + Additional keyword arguments supported by the :func:`save` method. + + Returns + ------- + out: None + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> shape = (10, 10) + >>> blocks = (10, 10) + >>> dtype = np.bool_ + >>> # Create a NDArray with default chunks + >>> a = blosc2.zeros(shape, blocks=blocks, dtype=dtype) + >>> # Save the array to a file + >>> a.save("array.b2frame") + """ + blosc2_ext.check_access_mode(urlpath, "w") + # Add urlpath to kwargs + kwargs["urlpath"] = urlpath + # Add the contiguous parameter + kwargs["contiguous"] = contiguous + + super().copy(self.dtype, **kwargs) + + def resize(self, newshape: tuple | list) -> None: + """Change the shape of the array by growing or shrinking one or more dimensions. + + Parameters + ---------- + newshape : tuple or list + The new shape of the array. It should have the same number of dimensions + as :paramref:`self`, the current shape. + + Returns + ------- + out: None + + Notes + ----- + The array values in the newly added positions are not initialized. + The user is responsible for initializing them. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> import math + >>> dtype = np.dtype(np.float32) + >>> shape = [23, 11] + >>> a = np.linspace(1, 3, num=math.prod(shape)).reshape(shape) + >>> # Create an array + >>> b = blosc2.asarray(a) + >>> newshape = [50, 10] + >>> # Extend first dimension, shrink second dimension + >>> b.resize(newshape) + >>> b.shape + (50, 10) + """ + if 0 in self.chunks or 0 in self.blocks: + raise ValueError( + "Cannot resize array. Perhaps you want to specify chunks/blocks on array creation. For 1D arrays, a good chunks value is (cache_size/typesize,)!" + ) + blosc2_ext.check_access_mode(self.schunk.urlpath, self.schunk.mode) + super().resize(newshape) + + def slice(self, key: int | slice | Sequence[slice], **kwargs: Any) -> NDArray: + """Get a (multidimensional) slice as a new :ref:`NDArray`. + + Parameters + ---------- + key: int, slice or sequence of slices + The index for the slices to be retrieved. Note that the step parameter is + not yet supported in slices. + + Other Parameters + ---------------- + kwargs: dict, optional + Additional keyword arguments supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + An array containing the requested data. The dtype will match that of `self`. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> shape = [23, 11] + >>> a = np.arange(np.prod(shape)).reshape(shape) + >>> # Create an array + >>> b = blosc2.asarray(a) + >>> slices = (slice(3, 7), slice(1, 11)) + >>> # Get a slice as a new NDArray + >>> c = b.slice(slices) + >>> print(c.shape) + (4, 10) + >>> print(type(c)) + + + Notes + ----- + There is a fast path for slices that are aligned with underlying chunks. + Aligned means that the slices are made entirely with complete chunks. + """ + if "cparams" not in kwargs: + kwargs["cparams"] = { + "codec": self.cparams.codec, + "clevel": self.cparams.clevel, + "filters": self.cparams.filters, + } + kwargs = _check_ndarray_kwargs(**kwargs) # sets cparams to defaults + key, mask = process_key(key, self.shape) + start, stop, step, _ = get_ndarray_start_stop(self.ndim, key, self.shape) + + # Fast path for slices made with aligned chunks + if step == (1,) * self.ndim: + aligned_chunks = detect_aligned_chunks(key, self.shape, self.chunks, consecutive=False) + if aligned_chunks: + # print("Aligned chunks detected", aligned_chunks) + # Create a new ndarray for the key slice + new_shape = [ + sp - st for sp, st in zip([k.stop for k in key], [k.start for k in key], strict=False) + ] + newarr = blosc2.empty( + shape=new_shape, + dtype=self.dtype, + chunks=self.chunks, + blocks=self.blocks, + **kwargs, + ) + # Get the chunks from the original array and update the new array + # No need for chunks to decompress and compress again + for order, nchunk in enumerate(aligned_chunks): + chunk = self.schunk.get_chunk(nchunk) + newarr.schunk.update_chunk(order, chunk) + return newarr.squeeze(axis=np.where(mask)[0]) # remove any dummy dims introduced + + key = (start, stop) + ndslice = super().get_slice(key, mask, **kwargs) + + # This is memory intensive, but we have not a better way to do it yet + # TODO: perhaps add a step param in the get_slice method in the future? + if step != (1,) * self.ndim: + nparr = ndslice[...] + if len(step) == 1: + nparr = nparr[:: step[0]] + else: + slice_ = tuple(slice(None, None, st) for st in step) + nparr = nparr[slice_] + return asarray(nparr, **kwargs) + + return ndslice + + def squeeze(self, axis: int | Sequence[int]) -> NDArray: + """Remove single-dimensional entries from the shape of the array. + + This method modifies the array in-place. If mask is None removes any dimensions with size 1. + If axis is provided, it should be an int or tuple of ints and the corresponding + dimensions (of size 1) will be removed. + + Returns + ------- + out: NDArray + + Examples + -------- + >>> import blosc2 + >>> shape = [1, 23, 1, 11, 1] + >>> # Create an array + >>> a = blosc2.full(shape, 2**30) + >>> a.shape + (1, 23, 1, 11, 1) + >>> # Squeeze the array + >>> a.squeeze() + >>> a.shape + (23, 11) + """ + return blosc2.squeeze(self, axis=axis) + + def indices(self, order: str | list[str] | None = None, **kwargs: Any) -> NDArray: + """ + Return the indices of a sorted array following the specified order. + + This is only valid for 1-dim structured arrays. + + See full documentation in :func:`indices`. + """ + return indices(self, order, **kwargs) + + def sort(self, order: str | list[str] | None = None, **kwargs: Any) -> NDArray: + """ + Return a sorted array following the specified order, or the order of the fields. + + This is only valid for 1-dim structured arrays. + + See full documentation in :func:`sort`. + """ + return sort(self, order, **kwargs) + + def as_ffi_ptr(self): + """Returns the pointer to the raw FFI blosc2::b2nd_array_t object. + + This function is useful for passing the array to C functions. + """ + return super().as_ffi_ptr() + + def __matmul__(self, other): + return blosc2.linalg.matmul(self, other) + + +def squeeze(x: Array, axis: int | Sequence[int]) -> NDArray: + """ + Remove single-dimensional entries from the shape of the array. + + This method modifies the array in-place. + + Parameters + ---------- + x: Array + input array. + axis: int | Sequence[int] + Axis (or axes) to squeeze. + + Returns + ------- + out: Array + An output array having the same data type and elements as x. + + Examples + -------- + >>> import blosc2 + >>> shape = [1, 23, 1, 11, 1] + >>> # Create an array + >>> b = blosc2.full(shape, 2**30) + >>> b.shape + (1, 23, 1, 11, 1) + >>> # Squeeze the array + >>> blosc2.squeeze(b) + >>> b.shape + (23, 11) + """ + axis = [axis] if isinstance(axis, int) else axis + mask = [False for i in range(x.ndim)] + for a in axis: + if a < 0: + a += x.ndim # Adjust axis to be within the array's dimensions + if mask[a]: + raise ValueError("Axis values must be unique.") + mask[a] = True + return blosc2_ext.squeeze(x, axis_mask=mask) + + +def array_from_ffi_ptr(array_ptr) -> NDArray: + """ + Create an NDArray from a raw FFI pointer. + + This function is useful for passing arrays across FFI boundaries. + This function move the ownership of the underlying `b2nd_array_t*` object to the new NDArray, and it will be freed + when the object is destroyed. + """ + return blosc2_ext.array_from_ffi_ptr(array_ptr) + + +def where( + condition: blosc2.LazyExpr | NDArray, + x: blosc2.Array | int | float | complex | bool | str | bytes | None = None, + y: blosc2.Array | int | float | complex | bool | str | bytes | None = None, +) -> blosc2.LazyExpr: + """ + Return elements chosen from `x` or `y` depending on `condition`. + + Parameters + ---------- + condition: :ref:`LazyExpr` + Where True, yield `x`, otherwise yield `y`. + x: :ref:`NDArray` or :ref:`NDField` or np.ndarray or scalar or bytes + Values from which to choose when `condition` is True. + y: :ref:`NDArray` or :ref:`NDField` or np.ndarray or scalar or bytes + Values from which to choose when `condition` is False. + + References + ---------- + `np.where `_ + """ + return condition.where(x, y) + + +@_incomplete_lazyfunc +def startswith( + a: str | blosc2.Array, prefix: str | blosc2.Array +) -> NDArray: # start: int = 0, end: int | None = None, **kwargs) + """ + Copy-pasted from numpy documentation: https://numpy.org/doc/stable/reference/generated/numpy.char.startswith.html + Returns a boolean array which is True where the string element in a starts with prefix, otherwise False. + + Parameters + ---------- + a : blosc2.Array + Input array of bytes_ or str_ dtype + + prefix : blosc2.Array + Prefix array of bytes_ or str_ dtype + + start: int | blosc2.Array + With start, test beginning at that position. + + end: int | blosc2.Array + With end, stop comparing at that position. + + kwargs: Any + kwargs accepted by the :func:`empty` constructor + + Returns + ------- + out: blosc2.Array, bool + Has the same shape as element. + + """ + + # def chunkwise_startswith(inputs, output, offset): + # x1, x2 = inputs + # # output[...] = np.char.startswith(x1, x2, start=start, end=end) + # output[...] = np.char.startswith(x1, x2) + + return blosc2.LazyExpr(new_op=(a, "startswith", prefix)) + + +@_incomplete_lazyfunc +def endswith( + a: str | blosc2.Array, suffix: str | blosc2.Array +) -> NDArray: # start: int = 0, end: int | None = None, **kwargs) -> NDArray: + """ + Copy-pasted from numpy documentation: https://numpy.org/doc/stable/reference/generated/numpy.char.endswith.html + Returns a boolean array which is True where the string element in a ends with suffix, otherwise False. + + Parameters + ---------- + a : blosc2.Array + Input array of bytes_ or str_ dtype + + suffix : blosc2.Array + suffix array of bytes_ or str_ dtype + + start: int | blosc2.Array + With start, test beginning at that position. + + end: int | blosc2.Array + With end, stop comparing at that position. + + kwargs: Any + kwargs accepted by the :func:`empty` constructor + + Returns + ------- + out: blosc2.Array, bool + Has the same shape as element. + + """ + # def chunkwise_endswith(inputs, output, offset): + # x1, x2 = inputs + # # output[...] = np.char.endswith(x1, x2, start=start, end=end) + # output[...] = np.char.endswith(x1, x2) + + return blosc2.LazyExpr(new_op=(a, "endswith", suffix)) + + +@_incomplete_lazyfunc +def lower(a: str | blosc2.Array) -> NDArray: + """ + Copy-pasted from numpy documentation: https://numpy.org/doc/stable/reference/generated/numpy.char.lower.html + Return an array with the elements converted to lowercase. + Call str.lower element-wise. + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : blosc2.Array + Input array of bytes_ or str_ dtype + kwargs: Any + kwargs accepted by the :func:`empty` constructor + + Returns + ------- + out: blosc2.Array, of bytes_ or str_ dtype + Has the same shape as element. + + """ + return blosc2.LazyExpr(new_op=(a, "lower", None)) + + +@_incomplete_lazyfunc +def upper(a: str | blosc2.Array) -> NDArray: + """ + Copy-pasted from numpy documentation: https://numpy.org/doc/stable/reference/generated/numpy.char.upper.html + Return an array with the elements converted to uppercase. + Call str.lower element-wise. + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : blosc2.Array + Input array of bytes_ or str_ dtype + kwargs: Any + kwargs accepted by the :func:`empty` constructor + + Returns + ------- + out: blosc2.Array, of bytes_ or str_ dtype + Has the same shape as element. + + """ + return blosc2.LazyExpr(new_op=(a, "upper", None)) + + +def lazywhere(value1=None, value2=None): + """Decorator to apply a where condition to a LazyExpr.""" + + def inner_decorator(func): + def wrapper(*args, **kwargs): + return func(*args, **kwargs).where(value1, value2) + + return wrapper + + return inner_decorator + + +def _check_shape(shape): + if isinstance(shape, int | np.integer): + shape = (shape,) + elif not isinstance(shape, tuple | list): + raise TypeError("shape should be a tuple or a list!") + if len(shape) > blosc2.MAX_DIM: + raise ValueError(f"shape length {len(shape)} is too large (>{blosc2.MAX_DIM})!") + return shape + + +def _check_dtype(dtype): + dtype = np.dtype(dtype) + if dtype.itemsize > blosc2.MAX_TYPESIZE: + raise ValueError(f"dtype itemsize {dtype.itemsize} is too large (>{blosc2.MAX_TYPESIZE})!") + if dtype == np.str_: # itemsize is 0 + dtype = np.dtype(" NDArray: + """Create an empty array. + + Parameters + ---------- + shape: int, tuple or list + The shape for the final array. + dtype: np.dtype or list str + The data type of the array elements in NumPy format. Default is `np.uint8`. + This will override the `typesize` + in the compression parameters if they are provided. + + Other Parameters + ---------------- + kwargs: dict, optional + Keyword arguments supported: + chunks: tuple or list + The chunk shape. If None (default), Blosc2 will compute + an efficient chunk shape. + blocks: tuple or list + The block shape. If None (default), Blosc2 will compute + an efficient block shape. This will override the `blocksize` + in the cparams if they are provided. + + The other keyword arguments supported are the same as for the + :obj:`SChunk.__init__ ` constructor. + + Returns + ------- + out: :ref:`NDArray` + A :ref:`NDArray` is returned. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> shape = [20, 20] + >>> dtype = np.int32 + >>> # Create empty array with default chunks and blocks + >>> array = blosc2.empty(shape, dtype=dtype) + >>> array.shape + (20, 20) + >>> array.dtype + dtype('int32') + """ + dtype = _check_dtype(dtype) + shape = _check_shape(shape) + kwargs = _check_ndarray_kwargs(**kwargs) + chunks = kwargs.pop("chunks", None) + blocks = kwargs.pop("blocks", None) + chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) + return blosc2_ext.empty(shape, chunks, blocks, dtype, **kwargs) + + +def uninit(shape: int | tuple | list, dtype: np.dtype | str = np.float64, **kwargs: Any) -> NDArray: + """Create an array with uninitialized values. + + The parameters and keyword arguments are the same as for the + :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + A :ref:`NDArray` is returned. + + Examples + -------- + >>> import blosc2 + >>> shape = [8, 8] + >>> chunks = [6, 5] + >>> # Create uninitialized array + >>> array = blosc2.uninit(shape, dtype='f8', chunks=chunks) + >>> array.shape + (8, 8) + >>> array.chunks + (6, 5) + >>> array.dtype + dtype('float64') + """ + dtype = _check_dtype(dtype) + shape = _check_shape(shape) + kwargs = _check_ndarray_kwargs(**kwargs) + chunks = kwargs.pop("chunks", None) + blocks = kwargs.pop("blocks", None) + chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) + return blosc2_ext.uninit(shape, chunks, blocks, dtype, **kwargs) + + +def nans(shape: int | tuple | list, dtype: np.dtype | str = np.float64, **kwargs: Any) -> NDArray: + """Create an array with NaNs values. + + The parameters and keyword arguments are the same as for the + :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray ` + A :ref:`NDArray ` is returned. + + Examples + -------- + >>> import blosc2 + >>> shape = [8, 8] + >>> chunks = [6, 5] + >>> # Create an array of NaNs + >>> array = blosc2.nans(shape, dtype='f8', chunks=chunks) + >>> array.shape + (8, 8) + >>> array.chunks + (6, 5) + >>> array.dtype + dtype('float64') + """ + dtype = _check_dtype(dtype) + shape = _check_shape(shape) + kwargs = _check_ndarray_kwargs(**kwargs) + chunks = kwargs.pop("chunks", None) + blocks = kwargs.pop("blocks", None) + chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) + return blosc2_ext.nans(shape, chunks, blocks, dtype, **kwargs) + + +def zeros(shape: int | tuple | list, dtype: np.dtype | str = np.float64, **kwargs: Any) -> NDArray: + """Create an array with zero as the default value + for uninitialized portions of the array. + + The parameters and keyword arguments are the same as for the + :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + A :ref:`NDArray` is returned. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> shape = [8, 8] + >>> chunks = [6, 5] + >>> blocks = [5, 5] + >>> dtype = np.float64 + >>> # Create zeros array + >>> array = blosc2.zeros(shape, dtype=dtype, chunks=chunks, blocks=blocks) + >>> array.shape + (8, 8) + >>> array.chunks + (6, 5) + >>> array.blocks + (5, 5) + >>> array.dtype + dtype('float64') + """ + dtype = _check_dtype(dtype) + shape = _check_shape(shape) + kwargs = _check_ndarray_kwargs(**kwargs) + chunks = kwargs.pop("chunks", None) + blocks = kwargs.pop("blocks", None) + chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) + return blosc2_ext.zeros(shape, chunks, blocks, dtype, **kwargs) + + +def full( + shape: int | tuple | list, + fill_value: bytes | int | float | bool, + dtype: np.dtype | str = None, + **kwargs: Any, +) -> NDArray: + """Create an array, with :paramref:`fill_value` being used as the default value + for uninitialized portions of the array. + + Parameters + ---------- + shape: int, tuple or list + The shape of the final array. + fill_value: bytes, int, float or bool + Default value to use for uninitialized portions of the array. + Its size will override the `typesize` + in the cparams if they are passed. + dtype: np.dtype or list str + The ndarray dtype in NumPy format. By default, this will + be taken from the :paramref:`fill_value`. + This will override the `typesize` + in the cparams if they are passed. + + Other Parameters + ---------------- + kwargs: dict, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + A :ref:`NDArray` is returned. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> shape = [25, 10] + >>> # Create array filled with True + >>> array = blosc2.full(shape, True) + >>> array.shape + (25, 10) + >>> array.dtype + dtype('bool') + """ + if isinstance(fill_value, bytes): + dtype = np.dtype(f"S{len(fill_value)}") + if dtype is None: + dtype = np.array(fill_value).dtype + else: + dtype = np.dtype(dtype) + dtype = _check_dtype(dtype) + shape = _check_shape(shape) + kwargs = _check_ndarray_kwargs(**kwargs) + chunks = kwargs.pop("chunks", None) + blocks = kwargs.pop("blocks", None) + chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) + return blosc2_ext.full(shape, chunks, blocks, fill_value, dtype, **kwargs) + + +def ones(shape: int | tuple | list, dtype: np.dtype | str = None, **kwargs: Any) -> NDArray: + """Create an array with one as values. + + The parameters and keyword arguments are the same as for the + :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + A :ref:`NDArray` is returned. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> shape = [8, 8] + >>> chunks = [6, 5] + >>> blocks = [5, 5] + >>> dtype = np.float64 + >>> # Create ones array + >>> array = blosc2.ones(shape, dtype=dtype, chunks=chunks, blocks=blocks) + >>> array.shape + (8, 8) + >>> array.chunks + (6, 5) + >>> array.blocks + (5, 5) + >>> array.dtype + dtype('float64') + """ + if dtype is None: + dtype = blosc2.DEFAULT_FLOAT + return full(shape, 1, dtype, **kwargs) + + +def arange( + start: int | float, + stop: int | float | None = None, + step: int | float | None = 1, + dtype: np.dtype | str = None, + shape: int | tuple | list | None = None, + c_order: bool = True, + **kwargs: Any, +) -> NDArray: + """ + Return evenly spaced values within a given interval. + Due to rounding errors for chunkwise filling, may differ + from numpy.arange in edge cases. + + Parameters + ---------- + start: int, float + The starting value of the sequence. + stop: int, float + The end value of the sequence. + step: int, float or None + Spacing between values. + dtype: np.dtype or list str + The data type of the array elements in NumPy format. Default is + None. If dtype is None, inferred from start, stop and step. + Output type is integer unless one or more have type float. + This will override the `typesize` in the compression parameters if + they are provided. + shape: int, tuple or list + The shape of the final array. If None, the shape will be computed. + c_order: bool + Whether to store the array in C order (row-major) or insertion order. + Insertion order means that values will be stored in the array + following the order of chunks in the array; this is more memory + efficient, as it does not require an intermediate copy of the array. + Default is C order. + + Other Parameters + ---------------- + kwargs: dict, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + A :ref:`NDArray` is returned. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> # Create an array with values from 0 to 10 + >>> array = blosc2.arange(0, 10, 1) + >>> print(array) + [0 1 2 3 4 5 6 7 8 9] + """ + + def _arange_num_elements(start, stop, step): + return builtins.max(math.ceil((stop - start) / step), 0) + + def arange_fill(inputs, output, offset): + lout = len(output) + start, _, step = inputs + start += offset[0] * step + stop = start + lout * step + if _arange_num_elements(start, stop, step) == lout: # USE ARANGE IF POSSIBLE (2X FASTER) + output[:] = np.arange(start, stop, step, dtype=output.dtype) + else: # use linspace to have finer control over exclusion of endpoint for float types + output[:] = np.linspace(start, stop, lout, endpoint=False, dtype=output.dtype) + + @blosc2.dsl_kernel + def ramp_arange(start, step): + return start + _flat_idx * step # noqa: F821 # DSL index/shape symbols resolved by miniexpr + + if step is None: # not array-api compliant but for backwards compatibility + step = 1 + if stop is None: + stop = start + start = 0 + NUM = _arange_num_elements(start, stop, step) + if shape is None: + shape = (builtins.max(NUM, 0),) + else: + # Check that the shape is consistent with the start, stop and step values + if math.prod(shape) != NUM: + raise ValueError("The shape is not consistent with the start, stop and step values") + if dtype is None: + dtype = ( + blosc2.DEFAULT_FLOAT + if np.any([np.issubdtype(type(d), float) for d in (start, stop, step)]) + else blosc2.DEFAULT_INT + ) + dtype = _check_dtype(dtype) + + if is_inside_new_expr() or NUM < 0: + # We already have the dtype and shape, so return immediately + return blosc2.zeros(shape, dtype=dtype, **kwargs) + + # Windows and wasm32 does not support complex numbers in DSL + if blosc2.isdtype(dtype, "complex floating"): + lshape = (math.prod(shape),) + lazyarr = blosc2.lazyudf(arange_fill, (start, stop, step), dtype=dtype, shape=lshape) + + if len(shape) == 1: + # C order is guaranteed, and no reshape is needed + return lazyarr.compute(**kwargs) + + return reshape(lazyarr, shape, c_order=c_order, **kwargs) + else: + lazyarr = blosc2.lazyudf(ramp_arange, (start, step), dtype=dtype, shape=shape) + return lazyarr.compute(**kwargs) + + +# Define a numpy linspace-like function +def linspace( + start: int | float | complex, + stop: int | float | complex, + num: int | None = None, + dtype=None, + endpoint: bool = True, + shape=None, + c_order: bool = True, + **kwargs: Any, +) -> NDArray: + """Return evenly spaced numbers over a specified interval. + + This is similar to `numpy.linspace` but it returns a `NDArray` + instead of a numpy array. Also, it supports a `shape` parameter + to return a ndim array. + + Parameters + ---------- + start: int, float, complex + The starting value of the sequence. + stop: int, float, complex + The end value of the sequence. + num: int | None + Number of samples to generate. Default None. + dtype: np.dtype or list str + The data type of the array elements in NumPy format. If None, inferred from + start, stop, step. Default is None. + endpoint: bool + If True, `stop` is the last sample. Otherwise, it is not included. + shape: int, tuple or list + The shape of the final array. If None, the shape will be guessed from `num`. + c_order: bool + Whether to store the array in C order (row-major) or insertion order. + Insertion order means that values will be stored in the array + following the order of chunks in the array; this is more memory + efficient, as it does not require an intermediate copy of the array. + Default is True. + **kwargs: Any + Keyword arguments accepted by the :func:`empty` constructor. + + + Returns + ------- + out: :ref:`NDArray` + A :ref:`NDArray` is returned. + """ + + def linspace_fill(inputs, output, offset): + lout = len(output) + start, stop, num, endpoint = inputs + # if num = 1 do nothing + step = (stop - start) / (num - 1) if endpoint and num > 1 else (stop - start) / num + # Compute proper start and stop values for the current chunk + # except for 0th iter, have already included start_ in prev iter + start_ = start + offset[0] * step + stop_ = start_ + lout * step + if offset[0] + lout == num: # reached end, include stop if necessary + output[:] = np.linspace(start_, stop, lout, endpoint=endpoint, dtype=output.dtype) + else: + output[:] = np.linspace(start_, stop_, lout, endpoint=False, dtype=output.dtype) + + @blosc2.dsl_kernel + def ramp_linspace(start, step): + return float(start) + _flat_idx * float(step) # noqa: F821 # DSL index/shape symbols resolved by miniexpr + + if shape is None: + if num is None: + raise ValueError("Either `shape` or `num` must be specified.") + # num is not None + shape = (num,) + else: + num = math.prod(shape) if num is None else num + + # check compatibility of shape and num + if math.prod(shape) != num or num < 0: + raise ValueError( + f"Shape is not consistent with the specified num value {num}." + "num must be nonnegative." + if num < 0 + else "" + ) + + if dtype is None: + dtype = ( + blosc2.DEFAULT_COMPLEX + if np.any([np.issubdtype(type(d), complex) for d in (start, stop)]) + else blosc2.DEFAULT_FLOAT + ) + + dtype = _check_dtype(dtype) + + if is_inside_new_expr() or num == 0: + # We already have the dtype and shape, so return immediately + return blosc2.zeros(shape, dtype=dtype, **kwargs) # will return empty array for num == 0 + + # Windows and wasm32 does not support complex numbers in DSL + if blosc2.isdtype(dtype, "complex floating"): + inputs = (start, stop, num, endpoint) + lazyarr = blosc2.lazyudf(linspace_fill, inputs, dtype=dtype, shape=(num,)) + if len(shape) == 1: + # C order is guaranteed, and no reshape is needed + return lazyarr.compute(**kwargs) + + return reshape(lazyarr, shape, c_order=c_order, **kwargs) + else: + nitems = num - 1 if endpoint else num + step = (float(stop) - float(start)) / float(nitems) if nitems > 0 else 0.0 + inputs = (start, step) + lazyarr = blosc2.lazyudf(ramp_linspace, inputs, dtype=dtype, shape=shape) + return lazyarr.compute(**kwargs) + + +def eye(N, M=None, k=0, dtype=np.float64, **kwargs: Any) -> NDArray: + """Return a 2-D array with ones on the diagonal and zeros elsewhere. + + Parameters + ---------- + N: int + Number of rows in the output. + M: int, optional + Number of columns in the output. If None, defaults to `N`. + k: int, optional + Index of the diagonal: 0 (the default) refers to the main diagonal, + a positive value refers to an upper diagonal, and a negative value + to a lower diagonal. + dtype: np.dtype or list str + The data type of the array elements in NumPy format. Default is `np.float64`. + + Returns + ------- + out: :ref:`NDArray` + A :ref:`NDArray` is returned. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> array = blosc2.eye(2, 3, dtype=np.int32) + >>> print(array[:]) + [[1 0 0] + [0 1 0]] + """ + + def fill_eye(inputs, output: np.array, offset: tuple): + out_k = offset[0] - offset[1] + inputs[0] + output[:] = np.eye(*output.shape, out_k, dtype=output.dtype) + + if M is None: + M = N + shape = (N, M) + dtype = _check_dtype(dtype) + + if is_inside_new_expr(): + # We already have the dtype and shape, so return immediately + return blosc2.zeros(shape, dtype=dtype) + + lazyarr = blosc2.lazyudf(fill_eye, (k,), dtype=dtype, shape=shape) + return lazyarr.compute(**kwargs) + + +def fromiter(iterable, shape, dtype, c_order=True, **kwargs) -> NDArray: + """Create a new array from an iterable object. + + Parameters + ---------- + iterable: iterable + An iterable object providing data for the array. + shape: int, tuple or list + The shape of the final array. + dtype: np.dtype or list str + The data type of the array elements in NumPy format. + c_order: bool + Whether to store the array in C order (row-major) or insertion order. + Insertion order means that iterable values will be stored in the array + following the order of chunks in the array; this is more memory + efficient, as it does not require an intermediate copy of the array. + Default is C order. + + Other Parameters + ---------------- + kwargs: dict, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + A :ref:`NDArray` is returned. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> # Create an array from an iterable + >>> array = blosc2.fromiter(range(10), shape=(10,), dtype=np.int64) + >>> print(array[:]) + [0 1 2 3 4 5 6 7 8 9] + """ + + def iter_fill(inputs, output, offset): + nout = math.prod(output.shape) + (iterable,) = inputs + output[:] = np.fromiter(iterable, dtype=output.dtype, count=nout).reshape(output.shape) + + dtype = _check_dtype(dtype) + + if is_inside_new_expr(): + # We already have the dtype and shape, so return immediately + return blosc2.zeros(shape, dtype=dtype) + + lshape = (math.prod(shape),) + inputs = (iterable,) + lazyarr = blosc2.lazyudf(iter_fill, inputs, dtype=dtype, shape=lshape) + + if len(shape) == 1: + # C order is guaranteed, and no reshape is needed + return lazyarr.compute(**kwargs) + + # TODO: in principle, the next should work, but tests still fail: + # return reshape(lazyarr, shape, c_order=c_order, **kwargs) + # Creating a temporary file is a workaround for the issue + with tempfile.NamedTemporaryFile(suffix=".b2nd", delete=True) as tmp_file: + larr = lazyarr.compute(urlpath=tmp_file.name, mode="w") # intermediate array + return reshape(larr, shape, c_order=c_order, **kwargs) + + +def frombuffer( + buffer: bytes, shape: int | tuple | list, dtype: np.dtype | str = np.uint8, **kwargs: Any +) -> NDArray: + """Create an array out of a buffer. + + Parameters + ---------- + buffer: bytes + The buffer of the data to populate the container. + shape: int, tuple or list + The shape for the final container. + dtype: np.dtype or list str + The ndarray dtype in NumPy format. Default is `np.uint8`. + This will override the `typesize` + in the cparams if they are passed. + + Other Parameters + ---------------- + kwargs: dict, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + A :ref:`NDArray` is returned. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> shape = [25, 10] + >>> chunks = (49, 49) + >>> dtype = np.dtype("|S8") + >>> typesize = dtype.itemsize + >>> # Create a buffer + >>> buffer = bytes(np.random.normal(0, 1, np.prod(shape)) * typesize) + >>> # Create a NDArray from a buffer with default blocks + >>> a = blosc2.frombuffer(buffer, shape, chunks=chunks, dtype=dtype) + """ + shape = _check_shape(shape) + kwargs = _check_ndarray_kwargs(**kwargs) + chunks = kwargs.pop("chunks", None) + blocks = kwargs.pop("blocks", None) + chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) + return blosc2_ext.from_buffer(buffer, shape, chunks, blocks, dtype, **kwargs) + + +def copy(array: NDArray, dtype: np.dtype | str = None, **kwargs: Any) -> NDArray: + """ + This is equivalent to :meth:`NDArray.copy` + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> # Create an instance of NDArray with some data + >>> original_array = blosc2.asarray(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]])) + >>> # Create a copy of the array without changing dtype + >>> copied_array = blosc2.copy(original_array) + >>> print("Copied array (default dtype):") + >>> print(copied_array) + Copied array (default dtype): + [[1.1 2.2 3.3] + [4.4 5.5 6.6]] + """ + return array.copy(dtype, **kwargs) + + +def concat(arrays: list[NDArray], /, axis=0, **kwargs: Any) -> NDArray: + """Concatenate a list of arrays along a specified axis. + + Parameters + ---------- + arrays: list of :ref:`NDArray` + A list containing two or more NDArray instances to be concatenated. + axis: int, optional + The axis along which the arrays will be concatenated. Default is 0. + + Other Parameters + ---------------- + kwargs: dict, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + A new NDArray containing the concatenated data. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> arr1 = blosc2.arange(0, 5, dtype=np.int32) + >>> arr2 = blosc2.arange(5, 10, dtype=np.int32) + >>> result = blosc2.concat([arr1, arr2]) + >>> print(result[:]) + [0 1 2 3 4 5 6 7 8 9] + """ + if len(arrays) < 2: + return arrays[0] + arr1 = arrays[0] + if not isinstance(arr1, blosc2.NDArray): + raise TypeError("All inputs must be instances of blosc2.NDArray") + # Do a first pass for checking array compatibility + if axis < 0: + axis += arr1.ndim + if axis >= arr1.ndim: + raise ValueError(f"Axis {axis} is out of bounds for array of dimension {arr1.ndim}.") + for arr2 in arrays[1:]: + if not isinstance(arr2, blosc2.NDArray): + raise TypeError("All inputs must be instances of blosc2.NDArray") + if arr1.ndim != arr2.ndim: + raise ValueError("Both arrays must have the same number of dimensions for concatenation.") + if arr1.dtype != arr2.dtype: + raise ValueError("Both arrays must have the same dtype for concatenation.") + # Check that the shapes match, except for the concatenation axis + if arr1.shape[:axis] != arr2.shape[:axis] or arr1.shape[axis + 1 :] != arr2.shape[axis + 1 :]: + raise ValueError( + f"Shapes of the arrays do not match along the concatenation axis {axis}: " + f"{arr1.shape} vs {arr2.shape}" + ) + + kwargs = _check_ndarray_kwargs(**kwargs) + # Proceed with the actual concatenation + copy = True + # When provided urlpath coincides with an array + mode = kwargs.pop("mode", "a") # default mode for blosc2 is "a" + for arr2 in arrays[1:]: + arr1 = blosc2_ext.concat(arr1, arr2, axis, copy=copy, mode=mode, **kwargs) + # Have now overwritten existing file (if mode ='w'), need to change mode + # for concatenating to the same file + mode = "r" if mode == "r" else "a" + # arr1 is now the result of the concatenation, so we can now just enlarge it + copy = False + + return arr1 + + +def expand_dims(array: NDArray, axis=0) -> NDArray: + """ + Expand the shape of an array by adding new axes at the specified positions. + + Parameters + ---------- + array: :ref:`NDArray` + The array to be expanded. + axis: int or list of int, optional + Position in the expanded axes where the new axis (or axes) is placed. Default is 0. + + Returns + ------- + out: :ref:`NDArray` + A new NDArray with the expanded shape. + """ + array = blosc2.asarray(array) + if not isinstance(array, blosc2.NDArray): + raise TypeError("Argument array must be instance of blosc2.NDArray") + axis = [axis] if isinstance(axis, int) else axis + final_dims = array.ndim + len(axis) + mask = [False for i in range(final_dims)] + for a in axis: + if a < 0: + a += final_dims # Adjust axis to be within the new stacked array's dimensions + if mask[a]: + raise ValueError("Axis values must be unique.") + mask[a] = True + return blosc2_ext.expand_dims(array, axis_mask=mask, final_dims=final_dims) + + +def stack(arrays: list[NDArray], axis=0, **kwargs: Any) -> NDArray: + """Stack multiple arrays, creating a new axis. + + Parameters + ---------- + arrays: list of :ref:`NDArray` + A list containing two or more NDArray instances to be stacked. + axis: int, optional + The new axis along which the arrays will be stacked. Default is 0. + + Other Parameters + ---------------- + kwargs: dict, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + A new NDArray containing the stacked data. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> arr1 = blosc2.arange(0, 6, dtype=np.int32, shape=(2,3)) + >>> arr2 = blosc2.arange(6, 12, dtype=np.int32, shape=(2,3)) + >>> result = blosc2.stack([arr1, arr2]) + >>> print(result.shape) + (2, 2, 3) + """ + if axis < 0: + axis += arrays[0].ndim + 1 # Adjust axis to be within the new stacked array's dimensions + newarrays = [] + for arr in arrays: + newarrays += [blosc2.expand_dims(arr, axis=axis)] + return blosc2.concat(newarrays, axis, **kwargs) + + +def save(array: NDArray, urlpath: str, contiguous=True, **kwargs: Any) -> None: + """Save an array to a file. + + Parameters + ---------- + array: :ref:`NDArray` + The array to be saved. + urlpath: str + The path to the file where the array will be saved. + contiguous: bool, optional + Whether to store the array contiguously. + + Other Parameters + ---------------- + kwargs: dict, optional + Keyword arguments that are supported by the :func:`save` method. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> # Create an array + >>> array = blosc2.arange(0, 100, dtype=np.int64, shape=(10, 10)) + >>> # Save the array to a file + >>> blosc2.save(array, "array.b2", mode="w") + """ + array.save(urlpath, contiguous, **kwargs) + + +def asarray(array: Sequence | blosc2.Array, copy: bool | None = None, **kwargs: Any) -> NDArray: + """Convert the `array` to an `NDArray`. + + Parameters + ---------- + array: array_like + An array supporting numpy array interface. + + copy: bool | None, optional + Whether to copy the input. If True, the function copies. + If False, raise a ValueError if copy is necessary. If None and + input is NDArray, avoid copy by returning lazyexpr. + Default: None. + + kwargs: dict, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` or :ref:`LazyExpr` + An new NDArray or LazyExpr made of :paramref:`array`. + + Notes + ----- + This will create the NDArray chunk-by-chunk directly from the input array, + without the need to create a contiguous NumPy array internally. This can + be used for ingesting e.g. disk or network based arrays very effectively + and without consuming lots of memory. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> # Create some data + >>> shape = [25, 10] + >>> a = np.arange(0, np.prod(shape), dtype=np.int64).reshape(shape) + >>> # Create a NDArray from a NumPy array + >>> nda = blosc2.asarray(a) + """ + # Convert scalars to numpy array + casting = kwargs.pop("casting", "unsafe") + if casting != "unsafe": + raise ValueError("Only unsafe casting is supported at the moment.") + if not hasattr(array, "shape"): + array = np.asarray(array) # defaults if dtype=None + dtype_ = blosc2.proxy._convert_dtype(array.dtype) + dtype = kwargs.pop("dtype", dtype_) # check if dtype provided + kwargs = _check_ndarray_kwargs(**kwargs) + chunks = kwargs.pop("chunks", None) + blocks = kwargs.pop("blocks", None) + # Use the chunks and blocks from the array if they are not passed + if chunks is None and hasattr(array, "chunks"): + chunks = array.chunks + # Zarr adds a .blocks property that maps to a zarr.indexing.BlockIndex object + # Let's avoid this + if blocks is None and hasattr(array, "blocks") and isinstance(array.blocks, (tuple, list)): + blocks = array.blocks + + copy = True if copy is None and not isinstance(array, NDArray) else copy + if copy: + chunks, blocks = compute_chunks_blocks(array.shape, chunks, blocks, dtype_, **kwargs) + # Fast path for small arrays. This is not too expensive in terms of memory consumption. + shape = array.shape + small_size = 2**24 # 16 MB + array_nbytes = math.prod(shape) * dtype_.itemsize + if array_nbytes < small_size: + if not isinstance(array, np.ndarray) and hasattr(array, "chunks"): + # A getitem operation should be enough to get a numpy array + array = array[()] + + array = np.require(array, dtype=dtype, requirements="C") # require contiguous array + + return blosc2_ext.asarray(array, chunks, blocks, **kwargs) + + # Create the empty array + ndarr = empty(shape, dtype_, chunks=chunks, blocks=blocks, **kwargs) + behaved = are_partitions_behaved(shape, chunks, blocks) + + # Get the coordinates of the chunks + chunks_idx, nchunks = get_chunks_idx(shape, chunks) + + # Iterate over the chunks and update the empty array + for nchunk in range(nchunks): + # Compute current slice coordinates + coords = tuple(np.unravel_index(nchunk, chunks_idx)) + slice_ = tuple( + slice(c * s, builtins.min((c + 1) * s, shape[i])) + for i, (c, s) in enumerate(zip(coords, chunks, strict=True)) + ) + # Ensure the array slice is contiguous and of correct dtype + array_slice = np.require(array[slice_], dtype=dtype, requirements="C") + if behaved: + # The whole chunk is to be updated, so this fastpath is safe + ndarr.schunk.update_data(nchunk, array_slice, copy=False) + else: + ndarr[slice_] = array_slice + else: + if not isinstance(array, NDArray): + raise ValueError("Must always do a copy for asarray unless NDArray provided.") + # TODO: make a direct view possible + return array + + return ndarr + + +def astype( + array: Sequence | blosc2.Array, + dtype, + casting: str = "unsafe", + copy: bool = True, + **kwargs: Any, +) -> NDArray: + """ + Copy of the array, cast to a specified type. Does not support copy = False. + + Parameters + ---------- + array: Sequence | blosc2.Array + The array to be cast to a different type. + dtype: DType-like + The desired data type to cast to. + casting: str = 'unsafe' + Controls what kind of data casting may occur. Defaults to 'unsafe' for backwards compatibility. + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + copy: bool = True + Must always be True as copy is made by default. Will be changed in a future version + + Returns + ------- + out: NDArray + New array with specified data type. + """ + return asarray(array, dtype=dtype, casting=casting, copy=copy, **kwargs) + + +def _check_ndarray_kwargs(**kwargs): # noqa: C901 + storage = kwargs.get("storage") + if storage is not None: + for key in kwargs: + if key in list(blosc2.Storage.__annotations__): + raise AttributeError( + "Cannot pass both `storage` and other kwargs already included in Storage" + ) + if isinstance(storage, blosc2.Storage): + kwargs = {**kwargs, **asdict(storage)} + else: + kwargs = {**kwargs, **storage} + else: + # Add the default storage values as long as they are not already passed + storage_dflts = asdict(blosc2.Storage(urlpath=kwargs.get("urlpath"))) # urlpath can affect defaults + # If a key appears in both operands, the one from the right-hand operand wins + kwargs = storage_dflts | kwargs + + supported_keys = [ + "chunks", + "blocks", + "cparams", + "dparams", + "meta", + "urlpath", + "contiguous", + "mode", + "mmap_mode", + "initial_mapping_size", + "storage", + "out", + "_chunksize_reduc_factor", + ] + _ = kwargs.pop("device", None) # pop device (not used, but needs to be discarded) + for key in kwargs: + if key not in supported_keys: + raise KeyError( + f"Only {supported_keys} are supported as keyword arguments, and you passed '{key}'" + ) + + if "cparams" in kwargs: + cparams = kwargs["cparams"] + if cparams is None: + kwargs["cparams"] = blosc2.CParams() + elif isinstance(cparams, blosc2.CParams): + kwargs["cparams"] = asdict(kwargs["cparams"]) + else: + if "chunks" in kwargs["cparams"]: + raise ValueError("You cannot pass chunks in cparams, use `chunks` argument instead") + if "blocks" in kwargs["cparams"]: + raise ValueError("You cannot pass chunks in cparams, use `blocks` argument instead") + kwargs["cparams"] = cparams.copy() + if "dparams" in kwargs and isinstance(kwargs["dparams"], blosc2.DParams): + kwargs["dparams"] = asdict(kwargs["dparams"]) + if blosc2.IS_WASM: + cparams = kwargs.get("cparams") + if isinstance(cparams, dict) and cparams.get("nthreads", 1) != 1: + cparams = cparams.copy() + cparams["nthreads"] = 1 + kwargs["cparams"] = cparams + dparams = kwargs.get("dparams") + if isinstance(dparams, dict) and dparams.get("nthreads", 1) != 1: + dparams = dparams.copy() + dparams["nthreads"] = 1 + kwargs["dparams"] = dparams + + return kwargs + + +def get_slice_nchunks( + schunk: blosc2.SChunk, key: tuple[(int, int)] | int | slice | Sequence[slice] +) -> np.ndarray: + """ + Get the unidimensional chunk indexes needed to obtain a + slice of a :ref:`SChunk ` or a :ref:`NDArray`. + + Parameters + ---------- + schunk: :ref:`SChunk ` or :ref:`NDArray` + The super-chunk or ndarray container. + key: tuple(int, int), int, slice or sequence of slices + For a SChunk: a tuple with the start and stop of the slice, an integer, + or a single slice. For a ndarray, sequences of slices (one per dimension) are accepted. + + Returns + ------- + out: np.ndarray + An array with the unidimensional chunk indexes. + """ + if isinstance(schunk, NDArray): + array = schunk + key, _ = process_key(key, array.shape) + start, stop, step, _ = get_ndarray_start_stop(array.ndim, key, array.shape) + if step != (1,) * array.ndim: + raise IndexError("Step parameter is not supported yet") + key = (start, stop) + return blosc2_ext.array_get_slice_nchunks(array, key) + else: + if isinstance(key, int): + key = (key, key + 1) + elif isinstance(key, slice): + if key.step not in (1, None): + raise IndexError("Only step=1 is supported") + key = (key.start, key.stop) + return blosc2_ext.schunk_get_slice_nchunks(schunk, key) + + +def indices(array: blosc2.Array, order: str | list[str] | None = None, **kwargs: Any) -> NDArray: + """ + Return the indices of a sorted array following the specified order. + + This is only valid for 1-dim structured arrays. + + Parameters + ---------- + array: :ref:`blosc2.Array` + The (structured) array to be sorted. + order: str, list of str, optional + Specifies which fields to compare first, second, etc. A single + field can be specified as a string. Not all fields need to be + specified, only the ones by which the array is to be sorted. + If None, the array is not sorted. + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + The sorted array. + """ + if not order: + # Shortcut for this relatively rare case + return arange(array.shape[0], dtype=np.int64) + + # Create a lazy array to access the sort machinery there + # This is a bit of a hack, but it is the simplest way to do it + # (the sorting mechanism in LazyExpr should be improved to avoid this) + lbool = blosc2.lazyexpr(blosc2.ones(array.shape, dtype=np.bool_)) + larr = array[lbool] + return larr.indices(order).compute(**kwargs) + + +def sort(array: blosc2.Array, order: str | list[str] | None = None, **kwargs: Any) -> NDArray: + """ + Return a sorted array following the specified order. + + This is only valid for 1-dim structured arrays. + + Parameters + ---------- + array: :ref:`blosc2.Array` + The (structured) array to be sorted. + order: str, list of str, optional + Specifies which fields to compare first, second, etc. A single + field can be specified as a string. Not all fields need to be + specified, only the ones by which the array is to be sorted. + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + + Returns + ------- + out: :ref:`NDArray` + The sorted array. + """ + if not order: + return array + + # Create a lazy array to access the sort machinery there + # This is a bit of a hack, but it is the simplest way to do it + # (the sorting mechanism in LazyExpr should be improved to avoid this) + lbool = blosc2.lazyexpr(blosc2.ones(array.shape, dtype=np.bool_)) + larr = array[lbool] + return larr.sort(order).compute(**kwargs) + + +# Class for dealing with fields in an NDArray +# This will allow to access fields by name in the dtype of the NDArray +class NDField(Operand): + def __init__(self, ndarr: NDArray, field: str): + """ + Create a new NDField. + + Parameters + ---------- + ndarr: :ref:`NDArray` + The NDArray to which assign the field. + field: str + The field's name. + + Returns + ------- + out: :ref:`NDField` + The corresponding :ref:`NDField`. + """ + if not isinstance(ndarr, NDArray): + raise TypeError("ndarr should be a NDArray!") + if not isinstance(field, str): + raise TypeError("field should be a string!") + if ndarr.dtype.fields is None: + raise TypeError("NDArray does not have a structured dtype!") + if field not in ndarr.dtype.fields: + raise TypeError(f"Field {field} not found in the dtype of the NDArray") + # Store immutable properties + self.ndarr = ndarr + self.chunks = ndarr.chunks + self.blocks = ndarr.blocks + self.field = field + self._dtype = ndarr.dtype.fields[field][0] + self.offset = ndarr.dtype.fields[field][1] + + def __repr__(self): + """ + Get a string as a representation. + + Returns + ------- + out: str + """ + return f"NDField({self.ndarr}, {self.field})" + + @property + def shape(self) -> tuple[int]: + """The shape of the associated :ref:`NDArray`.""" + return self.ndarr.shape + + @property + def dtype(self) -> np.dtype: + """The dtype of the field of associated :ref:`NDArray`.""" + return self._dtype + + @property + def schunk(self) -> blosc2.SChunk: + """The associated :ref:`SChunk `.""" + return self.ndarr.schunk + + def __getitem__(self, key: int | slice | Sequence[slice]) -> np.ndarray: + """ + Get a slice of :paramref:`self`. + + Parameters + ---------- + key: int or slice or Sequence[slice] + The slice to be retrieved. + + Returns + ------- + out: NumPy.ndarray + A NumPy array with the data slice. + + """ + # If key is a LazyExpr, decorate it with ``where`` and return it + if isinstance(key, blosc2.LazyExpr): + return key.where(self) + + if isinstance(key, str): + # Try to compute the key as a boolean expression + # Operands will be a dict with all the fields in the NDArray + operands = {field: NDField(self.ndarr, field) for field in self.ndarr.dtype.names} + expr = blosc2.lazyexpr(key, operands) + if expr.dtype != np.bool_: + raise TypeError("The expression should return a boolean array") + return expr.where(self) + # raise TypeError("This array is a NDField; use a structured NDArray for bool expressions") + + # Check if the key is in the last read cache + inmutable_key = make_key_hashable(key) + if inmutable_key in self.ndarr._last_read: + return self.ndarr._last_read[inmutable_key][self.field] + + # Do the actual read in the parent NDArray + nparr = self.ndarr[key] + # And return the field + return nparr[self.field] + + def __setitem__(self, key: int | slice | Sequence[slice], value: blosc2.Array) -> None: + """ + Set a slice of :paramref:`self` to a value. + + Parameters + ---------- + key: int or slice or Sequence[slice] + The slice to be set. + value: blosc2.Array + The value to be set. + """ + if isinstance(key, str): + raise TypeError("This array is a NDField; use a structured NDArray for bool expressions") + if not isinstance(value, np.ndarray): + value = value[:] + # Get the values in the parent NDArray + nparr = self.ndarr[key] + # Set the field + nparr[self.field] = value + # Save the values in the parent NDArray + self.ndarr[key] = nparr + + def __iter__(self): + """ + Iterate over the elements in the field. + + Returns + ------- + out: iterator + """ + return NDOuterIterator(self) + + def __len__(self) -> int: + """ + Returns the length of the first dimension of the field. + """ + return self.shape[0] + + +class OIndex: + def __init__(self, array: NDArray): + self.array = array + + def __getitem__(self, selection) -> np.ndarray: + return self.array.get_oselection_numpy(selection) + + def __setitem__(self, selection, input) -> np.ndarray: + return self.array.set_oselection_numpy(selection, input) + + +# class VIndex: +# def __init__(self, array: NDArray): +# self.array = array + +# # TODO: all this +# def __getitem__(self, selection) -> np.ndarray: +# return NotImplementedError + +# def __setitem__(self, selection, input) -> np.ndarray: +# return NotImplementedError + + +def empty_like(x: blosc2.Array, dtype=None, **kwargs) -> NDArray: + """ + Returns an uninitialized array with the same shape as an input array x. + + Parameters + ---------- + x : blosc2.Array + Input array from which to derive the output array shape. + + dtype (Optional): + Output array data type. If dtype is None, the output array data type + is inferred from x. Default: None. + + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + These arguments will be set in the resulting :ref:`NDArray`. + + Returns + ------ + out : NDArray + An array having the same shape as x and containing uninitialized data. + """ + if dtype is None: + dtype = x.dtype + return blosc2.empty(shape=x.shape, dtype=dtype, **kwargs) + + +def ones_like(x: blosc2.Array, dtype=None, **kwargs) -> NDArray: + """ + Returns an array of ones with the same shape as an input array x. + + Parameters + ---------- + x : blosc2.Array + Input array from which to derive the output array shape. + + dtype (Optional): + Output array data type. If dtype is None, the output array data type + is inferred from x. Default: None. + + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + These arguments will be set in the resulting :ref:`NDArray`. + + Returns + ------ + out : NDArray + An array having the same shape as x and containing ones. + """ + if dtype is None: + dtype = x.dtype + return blosc2.ones(shape=x.shape, dtype=dtype, **kwargs) + + +def zeros_like(x: blosc2.Array, dtype=None, **kwargs) -> NDArray: + """ + Returns an array of zeros with the same shape as an input array x. + + Parameters + ---------- + x : blosc2.Array + Input array from which to derive the output array shape. + + dtype (Optional): + Output array data type. If dtype is None, the output array data type + is inferred from x. Default: None. + + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + These arguments will be set in the resulting :ref:`NDArray`. + + Returns + ------ + out : NDArray + An array having the same shape as x and containing zeros. + """ + if dtype is None: + dtype = x.dtype + return blosc2.zeros(shape=x.shape, dtype=dtype, **kwargs) + + +def full_like(x: blosc2.Array, fill_value: bool | int | float | complex, dtype=None, **kwargs) -> NDArray: + """ + Returns an array filled with a value with the same shape as an input array x. + + Parameters + ---------- + x : blosc2.Array + Input array from which to derive the output array shape. + + fill_value: bool | int | float | complex + The fill value. + + dtype (Optional): + Output array data type. If dtype is None, the output array data type + is inferred from x. Default: None. + + kwargs: Any, optional + Keyword arguments that are supported by the :func:`empty` constructor. + These arguments will be set in the resulting :ref:`NDArray`. + + Returns + ------ + out : NDArray + An array having the same shape as x and containing the fill value. + """ + if dtype is None: + dtype = x.dtype + return blosc2.full(shape=x.shape, fill_value=fill_value, dtype=dtype, **kwargs) + + +def take(x: blosc2.Array, indices: blosc2.Array, axis: int | None = None) -> NDArray: + """ + Returns elements of an array along an axis. + + Parameters + ---------- + x: blosc2.Array + Input array. Should have one or more dimensions (axes). + + indices: array-like + Array indices. The array must be one-dimensional and have an integer data type. + + axis: int | None + Axis over which to select values. + If x is a one-dimensional array, providing an axis is optional; however, if x + has more than one dimension, providing an axis is required. Default: None. + + Returns + ------- + out: NDArray + Selected indices of x. + """ + if axis is None: + axis = 0 + if x.ndim != 1: + raise ValueError("Must specify axis parameter if x is not 1D.") + if axis < 0: + axis += x.ndim + if not isinstance(axis, (int, np.integer)): + raise ValueError("Axis must be integer.") + if isinstance(indices, list): + indices = np.asarray(indices) + if indices.ndim != 1: + raise ValueError("Indices must be 1D array.") + key = tuple(indices if i == axis else slice(None, None, 1) for i in range(x.ndim)) + # TODO: Implement fancy indexing in .slice so that this is more efficient + return blosc2.asarray(x[key]) + + +def take_along_axis(x: blosc2.Array, indices: blosc2.Array, axis: int = -1) -> NDArray: + """ + Returns elements of an array along an axis. + + Parameters + ---------- + x: blosc2.Array + Input array. Should have one or more dimensions (axes). + + indices: array-like + Array indices. The array must have same number of dimensions as x and + have an integer data type. + + axis: int + Axis over which to select values. Default: -1. + + Returns + ------- + out: NDArray + Selected indices of x. + """ + if not isinstance(axis, (int, np.integer)): + raise ValueError("Axis must be integer.") + if indices.ndim != x.ndim: + raise ValueError("Indices must have same dimensions as x.") + if axis < 0: + axis += x.ndim + if indices.shape[axis] == 0: + return blosc2.empty(x.shape[:axis] + (0,) + x.shape[axis + 1 :], dtype=x.dtype) + ones = (1,) * x.ndim + # TODO: Implement fancy indexing in .slice so that this is more efficient and possibly use oindex(?) + key = tuple( + indices if i == axis else np.arange(x.shape[i]).reshape(ones[:i] + (-1,) + ones[i + 1 :]) + for i in range(x.ndim) + ) + return blosc2.asarray(x[key]) + + +def broadcast_to(arr: blosc2.Array, shape: tuple[int, ...]) -> NDArray: + """ + Broadcast an array to a new shape. + Warning: Computes a lazyexpr, so probably a bit suboptimal + + Parameters + ---------- + arr: blosc2.Array + The array to broadcast. + + shape: tuple + The shape of the desired array. + + Returns + ------- + broadcast: NDArray + A new array with the given shape. + """ + return (arr + blosc2.zeros(shape, dtype=arr.dtype)).compute() + + +def meshgrid(*arrays: blosc2.Array, indexing: str = "xy") -> Sequence[NDArray]: + """ + Returns coordinate matrices from coordinate vectors. + + Parameters + ---------- + *arrays: blosc2.Array + An arbitrary number of one-dimensional arrays representing grid coordinates. Each array should have the same numeric data type. + + indexing: str + Cartesian 'xy' or matrix 'ij' indexing of output. If provided zero or one one-dimensional vector(s) the indexing keyword is ignored. + Default: 'xy'. + + Returns + ------- + out: (List[NDArray]) + List of N arrays, where N is the number of provided one-dimensional input arrays, with same dtype. + For N one-dimensional arrays having lengths Ni = len(xi), + + * if matrix indexing ij, then each returned array has shape (N1, N2, N3, ..., Nn). + * if Cartesian indexing xy, then each returned array has shape (N2, N1, N3, ..., Nn). + """ + out = () + shape = np.ones(len(arrays)) + first_arr = arrays[0] + myarrs = () + if indexing == "xy" and len(shape) > 1: + # switch 0th and 1st shapes around + def mygen(i): + if i not in (0, 1): + return (j for j in range(len(arrays)) if j != i) + else: + return (j for j in range(len(arrays)) if j != builtins.abs(i - 1)) + else: + mygen = lambda i: (j for j in range(len(arrays)) if j != i) # noqa : E731 + + for i, a in enumerate(arrays): + if len(a.shape) != 1 or a.dtype != first_arr.dtype: + raise ValueError("All arrays must be 1D and of same dtype.") + shape[i] = a.shape[0] + myarrs += (blosc2.expand_dims(a, tuple(mygen(i))),) # cheap, creates a view + + # handle Cartesian indexing + shape = tuple(shape) + if indexing == "xy" and len(shape) > 1: + shape = (shape[1], shape[0]) + shape[2:] + + # do broadcast + for a in myarrs: + out += (broadcast_to(a, shape),) + return out diff --git a/venv/Lib/site-packages/blosc2/proxy.py b/venv/Lib/site-packages/blosc2/proxy.py new file mode 100644 index 0000000..923cd9f --- /dev/null +++ b/venv/Lib/site-packages/blosc2/proxy.py @@ -0,0 +1,856 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from abc import ABC, abstractmethod +from collections.abc import Sequence + +try: + from numpy.typing import DTypeLike +except (ImportError, AttributeError): + # fallback to internal module (use with caution) + from numpy._typing import DTypeLike + +import numpy as np + +import blosc2 + + +class ProxyNDSource(ABC): + """ + Base interface for NDim sources in :ref:`Proxy`. + """ + + @property + @abstractmethod + def shape(self) -> tuple: + """ + The shape of the source. + """ + pass + + @property + @abstractmethod + def chunks(self) -> tuple: + """ + The chunk shape of the source. + """ + pass + + @property + @abstractmethod + def blocks(self) -> tuple: + """ + The block shape of the source. + """ + pass + + @property + @abstractmethod + def dtype(self) -> np.dtype: + """ + The dtype of the source. + """ + pass + + @property + def cparams(self) -> blosc2.CParams: + """ + The compression parameters of the source. + + This property is optional and can be overridden if the source has a + different compression configuration. + """ + return blosc2.CParams(typesize=self.dtype.itemsize) + + @abstractmethod + def get_chunk(self, nchunk: int) -> bytes: + """ + Return the compressed chunk in :paramref:`self`. + + Parameters + ---------- + nchunk: int + The unidimensional index of the chunk to retrieve. + + Returns + ------- + out: bytes object + The compressed chunk. + """ + pass + + async def aget_chunk(self, nchunk: int) -> bytes: + """ + Return the compressed chunk in :paramref:`self` asynchronously. + + Parameters + ---------- + nchunk: int + The index of the chunk to retrieve. + + Returns + ------- + out: bytes object + The compressed chunk. + + Notes + ----- + This method is optional, and only available if the source has an async + `aget_chunk` method. + """ + raise NotImplementedError( + "aget_chunk is only available if the source has an async aget_chunk method" + ) + + +class ProxySource(ABC): + """ + Base interface for sources of :ref:`Proxy` that are not NDim objects. + """ + + @property + @abstractmethod + def nbytes(self) -> int: + """ + The total number of bytes in the source. + """ + pass + + @property + @abstractmethod + def chunksize(self) -> tuple: + """ + The chunksize of the source. + """ + pass + + @property + @abstractmethod + def typesize(self) -> int: + """ + The typesize of the source. + """ + pass + + @property + def cparams(self) -> blosc2.CParams: + """ + The compression parameters of the source. + + This property is optional and can be overridden if the source has a + different compression configuration. + """ + return blosc2.CParams(typesize=self.typesize) + + @abstractmethod + def get_chunk(self, nchunk: int) -> bytes: + """ + Return the compressed chunk in :paramref:`self`. + + Parameters + ---------- + nchunk: int + The index of the chunk to retrieve. + + Returns + ------- + out: bytes object + The compressed chunk. + """ + pass + + async def aget_chunk(self, nchunk: int) -> bytes: + """ + Return the compressed chunk in :paramref:`self` asynchronously. + + Parameters + ---------- + nchunk: int + The index of the chunk to retrieve. + + Returns + ------- + out: bytes object + The compressed chunk. + + Notes + ----- + This method is optional and only available if the source has an async + `aget_chunk` method. + """ + raise NotImplementedError( + "aget_chunk is only available if the source has an async aget_chunk method" + ) + + +class Proxy(blosc2.Operand): + """Proxy (with cache support) for an object following the :ref:`ProxySource` interface. + + This can be used to cache chunks of a regular data container which follows the + :ref:`ProxySource` or :ref:`ProxyNDSource` interfaces. + """ + + def __init__( + self, src: ProxySource or ProxyNDSource, urlpath: str | None = None, mode="a", **kwargs: dict + ): + """ + Create a new :ref:`Proxy` to serve as a cache to save accessed chunks locally. + + Parameters + ---------- + src: :ref:`ProxySource` or :ref:`ProxyNDSource` + The original container. + urlpath: str, optional + The urlpath where to save the container that will work as a cache. + mode: str, optional + "a" means read/write (create if it doesn't exist); "w" means create + (overwrite if it exists). Default is "a". + kwargs: dict, optional + Keyword arguments supported: + + vlmeta: dict or None + A dictionary with different variable length metalayers. One entry per metalayer: + key: bytes or str + The name of the metalayer. + value: object + The metalayer object that will be serialized using msgpack. + + """ + self.src = src + self.urlpath = urlpath + if kwargs is None: + kwargs = {} + self._cache = kwargs.pop("_cache", None) + + if self._cache is None: + meta_val = { + "local_abspath": None, + "urlpath": None, + "caterva2_env": kwargs.pop("caterva2_env", False), + } + container = getattr(self.src, "schunk", self.src) + if hasattr(container, "urlpath"): + meta_val["local_abspath"] = container.urlpath + elif isinstance(self.src, blosc2.C2Array): + meta_val["urlpath"] = (self.src.path, self.src.urlbase, self.src.auth_token) + meta = {"proxy-source": meta_val} + if hasattr(self.src, "shape"): + self._cache = blosc2.empty( + self.src.shape, + self.src.dtype, + chunks=self.src.chunks, + blocks=self.src.blocks, + cparams=self.src.cparams, + urlpath=urlpath, + mode=mode, + meta=meta, + ) + else: + self._cache = blosc2.SChunk( + chunksize=self.src.chunksize, + cparams=self.src.cparams, + urlpath=urlpath, + mode=mode, + meta=meta, + ) + self._cache.fill_special(self.src.nbytes // self.src.typesize, blosc2.SpecialValue.UNINIT) + self._schunk_cache = getattr(self._cache, "schunk", self._cache) + vlmeta = kwargs.get("vlmeta") + if vlmeta: + for key in vlmeta: + self._schunk_cache.vlmeta[key] = vlmeta[key] + + def fetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray | blosc2.schunk.SChunk: + """ + Get the container used as cache with the requested data updated. + + Parameters + ---------- + item: slice or list of slices, optional + If not None, only the chunks that intersect with the slices + in items will be retrieved if they have not been already. + + Returns + ------- + out: :ref:`NDArray` or :ref:`SChunk` + The local container used to cache the already requested data. + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> data = np.arange(20).reshape(10, 2) + >>> ndarray = blosc2.asarray(data) + >>> proxy = blosc2.Proxy(ndarray) + >>> slice_data = proxy.fetch((slice(0, 3), slice(0, 2))) + >>> slice_data[:3, :2] + [[0 1] + [2 3] + [4 5]] + """ + if item == (): + # Full realization + for info in self._schunk_cache.iterchunks_info(): + if info.special != blosc2.SpecialValue.NOT_SPECIAL: + chunk = self.src.get_chunk(info.nchunk) + self._schunk_cache.update_chunk(info.nchunk, chunk) + else: + # Get only a slice + nchunks = blosc2.get_slice_nchunks(self._cache, item) + for info in self._schunk_cache.iterchunks_info(): + if info.nchunk in nchunks and info.special != blosc2.SpecialValue.NOT_SPECIAL: + chunk = self.src.get_chunk(info.nchunk) + self._schunk_cache.update_chunk(info.nchunk, chunk) + + return self._cache + + async def afetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray | blosc2.schunk.SChunk: + """ + Retrieve the cache container with the requested data updated asynchronously. + + Parameters + ---------- + item: slice or list of slices, optional + If provided, only the chunks intersecting with the specified slices + will be retrieved if they have not been already. + + Returns + ------- + out: :ref:`NDArray` or :ref:`SChunk` + The local container used to cache the already requested data. + + Notes + ----- + This method is only available if the :ref:`ProxySource` or :ref:`ProxyNDSource` + have an async `aget_chunk` method. + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> import asyncio + >>> from blosc2 import ProxyNDSource + >>> class MyProxySource(ProxyNDSource): + >>> def __init__(self, data): + >>> # If the next source is multidimensional, it must have the attributes: + >>> self.data = data + >>> f"Data shape: {self.shape}, Chunks: {self.chunks}" + >>> f"Blocks: {self.blocks}, Dtype: {self.dtype}" + >>> @property + >>> def shape(self): + >>> return self.data.shape + >>> @property + >>> def chunks(self): + >>> return self.data.chunks + >>> @property + >>> def blocks(self): + >>> return self.data.blocks + >>> @property + >>> def dtype(self): + >>> return self.data.dtype + >>> # This method must be present + >>> def get_chunk(self, nchunk): + >>> return self.data.get_chunk(nchunk) + >>> # This method is optional + >>> async def aget_chunk(self, nchunk): + >>> await asyncio.sleep(0.1) # Simulate an asynchronous operation + >>> return self.data.get_chunk(nchunk) + >>> data = np.arange(20).reshape(4, 5) + >>> chunks = [2, 5] + >>> blocks = [1, 5] + >>> data = blosc2.asarray(data, chunks=chunks, blocks=blocks) + >>> source = MyProxySource(data) + >>> proxy = blosc2.Proxy(source) + >>> async def fetch_data(): + >>> # Fetch a slice of the data from the proxy asynchronously + >>> slice_data = await proxy.afetch(slice(0, 2)) + >>> # Note that only data fetched is shown, the rest is uninitialized + >>> slice_data[:] + >>> asyncio.run(fetch_data()) + >>> # Using getitem to get a slice of the data + >>> result = proxy[1:2, 1:3] + >>> f"Proxy getitem: {result}" + Data shape: (4, 5), Chunks: (2, 5) + Blocks: (1, 5), Dtype: int64 + [[0 1 2 3 4] + [5 6 7 8 9] + [0 0 0 0 0] + [0 0 0 0 0]] + Proxy getitem: [[6 7]] + """ + if not callable(getattr(self.src, "aget_chunk", None)): + raise NotImplementedError("afetch is only available if the source has an aget_chunk method") + if item == (): + # Full realization + for info in self._schunk_cache.iterchunks_info(): + if info.special != blosc2.SpecialValue.NOT_SPECIAL: + chunk = await self.src.aget_chunk(info.nchunk) + self._schunk_cache.update_chunk(info.nchunk, chunk) + else: + # Get only a slice + nchunks = blosc2.get_slice_nchunks(self._cache, item) + for info in self._schunk_cache.iterchunks_info(): + if info.nchunk in nchunks and info.special != blosc2.SpecialValue.NOT_SPECIAL: + chunk = await self.src.aget_chunk(info.nchunk) + self._schunk_cache.update_chunk(info.nchunk, chunk) + + return self._cache + + def __getitem__(self, item: slice | list[slice]) -> np.ndarray: + """ + Get a slice as a numpy.ndarray using the :ref:`Proxy`. + + Parameters + ---------- + item: slice or list of slices + The slice of the desired data. + + Returns + ------- + out: numpy.ndarray + An array with the data slice. + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> data = np.arange(25).reshape(5, 5) + >>> ndarray = blosc2.asarray(data) + >>> proxy = blosc2.Proxy(ndarray) + >>> proxy[0:3, 0:3] + [[ 0 1 2] + [ 5 6 7] + [10 11 12] + [20 21 22]] + >>> proxy[2:5, 2:5] + [[12 13 14] + [17 18 19] + [22 23 24]] + """ + # Populate the cache + self.fetch(item) + return self._cache[item] + + @property + def dtype(self) -> np.dtype: + """The dtype of :paramref:`self` or None if the data is unidimensional""" + return self._cache.dtype if isinstance(self._cache, blosc2.NDArray) else None + + @property + def shape(self) -> tuple[int]: + """The shape of :paramref:`self`""" + return self._cache.shape if isinstance(self._cache, blosc2.NDArray) else len(self._cache) + + @property + def chunks(self) -> tuple[int]: # cache should have same chunks as src + """The chunks of :paramref:`self` or None if the data is not a Blosc2 NDArray""" + return self._cache.chunks if isinstance(self._cache, blosc2.NDArray) else None + + @property + def blocks(self) -> tuple[int]: # cache should have same blocks as src + """The blocks of :paramref:`self` or None if the data is not a Blosc2 NDArray""" + return self._cache.blocks if isinstance(self._cache, blosc2.NDArray) else None + + @property + def schunk(self) -> blosc2.schunk.SChunk: + """The :ref:`SChunk` of the cache""" + return self._schunk_cache + + @property + def cparams(self) -> blosc2.CParams: + """The compression parameters of the cache""" + return self._cache.cparams + + @property + def info(self) -> str: + """The info of the cache""" + if isinstance(self._cache, blosc2.NDArray): + return self._cache.info + raise NotImplementedError("info is only available if the source is a NDArray") + + def __str__(self): + return f"Proxy({self.src}, urlpath={self.urlpath})" + + @property + def vlmeta(self) -> blosc2.schunk.vlmeta: + """ + Get the vlmeta of the cache. + + See Also + -------- + :py:attr:`blosc2.schunk.SChunk.vlmeta` + """ + return self._schunk_cache.vlmeta + + @property + def fields(self) -> dict: + """ + Dictionary with the fields of :paramref:`self`. + + Returns + ------- + fields: dict + A dictionary with the fields of the :ref:`Proxy`. + + See Also + -------- + :ref:`NDField` + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> data = np.ones(16, dtype=[('field1', 'i4'), ('field2', 'f4')]).reshape(4, 4) + >>> ndarray = blosc2.asarray(data) + >>> proxy = blosc2.Proxy(ndarray) + >>> # Get a dictionary of fields from the proxy, where each field can be accessed individually + >>> fields_dict = proxy.fields + >>> for field_name, field_proxy in fields_dict.items(): + >>> print(f"Field name: {field_name}, Field data: {field_proxy}") + Field name: field1, Field data: + Field name: field2, Field data: + >>> fields_dict['field2'][:] + [[1. 1. 1. 1.] + [1. 1. 1. 1.] + [1. 1. 1. 1.] + [1. 1. 1. 1.]] + """ + _fields = getattr(self._cache, "fields", None) + if _fields is None: + return None + return {key: ProxyNDField(self, key) for key in _fields} + + +class ProxyNDField(blosc2.Operand): + def __init__(self, proxy: Proxy, field: str): + self.proxy = proxy + self.field = field + self._dtype = proxy.dtype[field] + self._shape = proxy.shape + + @property + def dtype(self) -> np.dtype: + """ + Get the data type of the :ref:`ProxyNDField`. + + Returns + ------- + out: np.dtype + The data type of the :ref:`ProxyNDField`. + """ + return self._dtype + + @property + def shape(self) -> tuple[int]: + """ + Get the shape of the :ref:`ProxyNDField`. + + Returns + ------- + out: tuple + The shape of the :ref:`ProxyNDField`. + """ + return self._shape + + def __getitem__(self, item: slice | list[slice]) -> np.ndarray: + """ + Get a slice as a numpy.ndarray using the `field` in `proxy`. + + Parameters + ---------- + item: slice or list of slices + The slice of the desired data. + + Returns + ------- + out: numpy.ndarray + An array with the data slice. + """ + # Get the data and return the corresponding field + nparr = self.proxy[item] + return nparr[self.field] + + +def _convert_dtype(dt: str | DTypeLike): + """ + Attempts to convert to blosc2.dtype (i.e. numpy dtype) + """ + if hasattr(dt, "as_numpy_dtype"): + dt = dt.as_numpy_dtype + try: + return np.dtype(dt) + except TypeError: # likely passed e.g. a torch.float64 + return np.dtype(str(dt).split(".")[1]) + except Exception as e: + raise TypeError(f"Could not parse dtype arg {dt}.") from e + + +class SimpleProxy(blosc2.Operand): + """ + Simple proxy for any data container to be used with the compute engine. + + The source must have a `shape` and `dtype` attributes; if not, + it will be converted to a NumPy array via the `np.asarray` function. + It should also have a `__getitem__` method. + + This only supports the __getitem__ method. No caching is performed. + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> a = np.arange(20, dtype=np.float32).reshape(4, 5) + >>> proxy = blosc2.SimpleProxy(a) + >>> proxy[1:3, 2:4] + [[ 7. 8.] + [12. 13.]] + """ + + def __init__(self, src, chunks: tuple | None = None, blocks: tuple | None = None): + if not hasattr(src, "shape") or not hasattr(src, "dtype"): + # If the source is not an array, convert it to NumPy + src = np.asarray(src) + if not hasattr(src, "__getitem__"): + raise TypeError("The source must have a __getitem__ method") + self._src = src + self._dtype = _convert_dtype(src.dtype) + self._shape = src.shape if isinstance(src.shape, tuple) else tuple(src.shape) + # Compute reasonable values for chunks and blocks + cparams = blosc2.CParams(clevel=0) + + def is_ints_sequence(src, attr): + seq = getattr(src, attr, None) + if not isinstance(seq, Sequence) or isinstance(seq, (str, bytes)): + return False + return all(isinstance(x, int) for x in seq) + + chunks = src.chunks if chunks is None and is_ints_sequence(src, "chunks") else chunks + blocks = src.blocks if blocks is None and is_ints_sequence(src, "blocks") else blocks + self.chunks, self.blocks = blosc2.compute_chunks_blocks( + self.shape, chunks, blocks, self.dtype, cparams=cparams + ) + + @property + def src(self): + """The source object that this proxy wraps.""" + return self._src + + @property + def shape(self): + """The shape of the source array.""" + return self._shape + + @property + def dtype(self): + """The data type of the source array.""" + return self._dtype + + @property + def ndim(self): + """The number of dimensions of the source array.""" + return len(self.shape) + + def __getitem__(self, item: slice | list[slice]) -> np.ndarray: + """ + Get a slice as a numpy.ndarray (via this proxy). + + Parameters + ---------- + item + + Returns + ------- + out: numpy.ndarray + An array with the data slice. + """ + out = self._src[item] + if not hasattr(out, "shape") or out.shape == (): + return out + else: + # avoids copy for PyTorch (JAX/Tensorflow will always copy, + # no easy way around it) + return np.asarray(out) + + +def as_simpleproxy(*arrs: Sequence[blosc2.Array]) -> tuple[SimpleProxy | blosc2.Operand]: + """ + Convert an Array object which fulfills Array protocol into SimpleProxy. If x is already a + blosc2.Operand simply returns object. + + Parameters + ---------- + arrs: Sequence[blosc2.Array] + Objects fulfilling Array protocol. + + Returns + ------- + out: tuple[blosc2.SimpleProxy | blosc2.Operand] + Objects with minimal interface for blosc2 LazyExpr computations. + """ + out = () + for x in arrs: + if isinstance(x, blosc2.Operand): + out += (x,) + else: + out += (SimpleProxy(x),) + return out[0] if len(out) == 1 else out + + +def jit(func=None, *, out=None, disable=False, **kwargs): # noqa: C901 + """ + Prepare a function so that it can be used with the Blosc2 compute engine. + + The inputs of the function can be any combination of NumPy/NDArray arrays + and scalars. The function will be called with the NumPy arrays replaced by + :ref:`SimpleProxy` objects, whereas NDArray objects will be used as is. + + The returned value will be a NDArray if appropriate kwargs are provided + (e.g. `cparams=`). Else, the return value will be a NumPy array + (if the function returns a NumPy array). If `out` is provided, + the result will be computed and stored in the `out` array + + Parameters + ---------- + func: callable + The function to be prepared for the Blosc2 compute engine. + out: np.ndarray, NDArray, optional + The output array where the result will be stored. + disable: bool, optional + If True, the decorator is disabled and the original function is returned unchanged. + Default is False. + **kwargs: dict, optional + Additional keyword arguments supported by the :func:`empty` constructor. + + Returns + ------- + wrapper + + Notes + ----- + * Although many NumPy functions are supported, some may not be implemented yet. + If you find a function that is not supported, please open an issue. + * `out` and `kwargs` parameters are not supported for all expressions + (e.g. when using a reduction as the last function). In this case, you can + still use the `out` parameter of the reduction function for some custom + control over the output. + + Examples + -------- + >>> import numpy as np + >>> import blosc2 + >>> @blosc2.jit + >>> def compute_expression(a, b, c): + >>> return np.sum(((a ** 3 + np.sin(a * 2)) > 2 * c) & (b > 0), axis=1) + >>> a = np.arange(20, dtype=np.float32).reshape(4, 5) + >>> b = np.arange(20).reshape(4, 5) + >>> c = np.arange(5) + >>> compute_expression(a, b, c) + [5 5 5 5] + """ + + def decorator(func): + if disable: + return func + + def wrapper(*args, **func_kwargs): + # Get some kwargs in decorator for SimpleProxy constructor + proxy_kwargs = {"chunks": kwargs.get("chunks"), "blocks": kwargs.get("blocks")} + + # Wrap the arguments in SimpleProxy objects if they are not NDArrays + new_args = [] + for arg in args: + if issubclass(type(arg), blosc2.Operand): + new_args.append(arg) + else: + new_args.append(SimpleProxy(arg, **proxy_kwargs)) + # The same for the keyword arguments + for key, value in func_kwargs.items(): + if issubclass(type(value), blosc2.Operand): + continue + func_kwargs[key] = SimpleProxy(value, **proxy_kwargs) + + # Call function with the new arguments + retval = func(*new_args, **func_kwargs) + + # Treat return value + # If it is a numpy array, return it as is + if isinstance(retval, np.ndarray): + if kwargs and any(kwargs[key] is not None for key in kwargs): + # But if kwargs are provided, return a NDArray instead + return blosc2.asarray(retval, **kwargs) + return retval + + # In some instances, the return value is not a LazyExpr + # (e.g. using a reduction as the last function, and using an `out` param) + if not isinstance(retval, blosc2.LazyExpr): + return retval + + # If the return value is a LazyExpr, compute it + if out is not None: + return retval.compute(out=out, **kwargs) + if kwargs and any(kwargs[key] is not None for key in kwargs): + return retval.compute(**kwargs) + # If no kwargs are provided, return a numpy array + return retval[()] + + return wrapper + + if func is None: + return decorator + else: + return decorator(func) + + +class PandasUdfEngine: + @staticmethod + def _ensure_numpy_data(data): + if not isinstance(data, np.ndarray): + try: + data = data.values + except AttributeError as err: + raise ValueError( + "blosc2.jit received an object of type {data.__name__}, which is not supported. " + "Try casting your Series or DataFrame to a NumPy dtype." + ) from err + return data + + @classmethod + def map(cls, data, func, args, kwargs, decorator, skip_na): + """ + JIT a NumPy array element-wise. In the case of Blosc2, functions are + expected to be vectorized NumPy operations, so the function is called + with the NumPy array as the function parameter, instead of calling the + function once for each element. + """ + raise NotImplementedError("The Blosc2 engine does not support map. Use apply instead.") + + @classmethod + def apply(cls, data, func, args, kwargs, decorator, axis): + """ + JIT a NumPy array by column or row. In the case of Blosc2, functions are + expected to be vectorized NumPy operations, so the function is called + with the NumPy array as the function parameter, instead of calling the + function once for each column or row. + """ + data = cls._ensure_numpy_data(data) + func = decorator(func) + if data.ndim == 1 or axis is None: + # pandas Series.apply or pipe + return func(data, *args, **kwargs) + elif axis in (0, "index"): + # pandas apply(axis=0) column-wise + result = [func(data[:, row_idx], *args, **kwargs) for row_idx in range(data.shape[1])] + return np.vstack(result).transpose() + elif axis in (1, "columns"): + # pandas apply(axis=1) row-wise + result = [func(data[col_idx, :], *args, **kwargs) for col_idx in range(data.shape[0])] + return np.vstack(result) + else: + raise NotImplementedError(f"Unknown axis '{axis}'. Use one of 0, 1 or None.") + + +jit.__pandas_udf__ = PandasUdfEngine diff --git a/venv/Lib/site-packages/blosc2/schunk.py b/venv/Lib/site-packages/blosc2/schunk.py new file mode 100644 index 0000000..cd2dbe9 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/schunk.py @@ -0,0 +1,1738 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +from __future__ import annotations + +import os +import pathlib +import zipfile +from collections import namedtuple +from collections.abc import Iterator, Mapping, MutableMapping +from dataclasses import asdict, replace +from typing import Any, NamedTuple + +import numpy as np +from msgpack import packb, unpackb + +import blosc2 +from blosc2 import SpecialValue, blosc2_ext +from blosc2.info import InfoReporter + + +class vlmeta(MutableMapping, blosc2_ext.vlmeta): + """ + Class providing access to user metadata on an :ref:`SChunk`. + It is available via the `.vlmeta` property of an :ref:`SChunk`. + """ + + def __init__(self, schunk, urlpath, mode, mmap_mode, initial_mapping_size): + self.urlpath = urlpath + self.mode = mode + self.mmap_mode = mmap_mode + self.initial_mapping_size = initial_mapping_size + super().__init__(schunk) + + def __setitem__(self, name, content): + blosc2_ext.check_access_mode(self.urlpath, self.mode) + # If name is a slice, assume that content is a dictionary and copy all the items + if isinstance(name, slice): + if name.start is None and name.stop is None: + for k, v in content.items(): + self.set_vlmeta(k, v) + return + raise NotImplementedError("Slicing is not supported, unless [:]") + cparams = {"typesize": 1} + content = packb( + content, + default=blosc2_ext.encode_tuple, + strict_types=True, + use_bin_type=True, + ) + super().set_vlmeta(name, content, **cparams) + + def __getitem__(self, name): + if isinstance(name, slice): + if name.start is None and name.stop is None: + # Return all the vlmetalayers + return self.getall() + raise NotImplementedError("Slicing is not supported, unless [:]") + return unpackb(super().get_vlmeta(name), list_hook=blosc2_ext.decode_tuple) + + def __delitem__(self, name): + blosc2_ext.check_access_mode(self.urlpath, self.mode) + super().del_vlmeta(name) + + def __len__(self): + return super().nvlmetalayers() + + def __iter__(self): + yield from super().get_names() + + def getall(self): + """ + Return all the variable length metalayers as a dictionary + + """ + return super().to_dict() + + def __repr__(self): + return repr(self.getall()) + + def __str__(self): + return str(self.getall()) + + +class Meta(Mapping): + """ + Class providing access to fixed-length metadata on an :ref:`SChunk`. + It is available via the `.meta` property of an :ref:`SChunk`. + """ + + def get(self, key: str, default: Any = None) -> Any: + """Return the value for `key` if `key` is in the dictionary, else return `default`. + If `default` is not given, it defaults to ``None``.""" + return self.get(key, default) + + def __init__(self, schunk): + self.schunk = schunk + + def __contains__(self, key: str) -> bool: + """Check if the `key` metalayer exists or not.""" + return blosc2_ext.meta__contains__(self.schunk, key) + + def __delitem__(self, key: str) -> None: + raise NotImplementedError("Cannot remove a metalayer") + + def __setitem__(self, key: str, value: bytes) -> None: + """Update the `key` metalayer with `value`. + + Parameters + ---------- + key: str + The name of the metalayer to update. + value: bytes + The buffer containing the new content for the metalayer. + + ..warning: Note that the *length* of the metalayer cannot change, + otherwise an exception will be raised. + """ + value = packb(value, default=blosc2_ext.encode_tuple, strict_types=True, use_bin_type=True) + blosc2_ext.meta__setitem__(self.schunk, key, value) + + def __getitem__(self, item: str | slice) -> bytes | dict[str, bytes]: + """Return the specified metalayer. + + Parameters + ---------- + item: str or slice + The name of the metalayer to return. If a slice is passed, + and start and stop are None ([:]), all the metalayers are returned; + else, a NotImplementedError is raised. + + Returns + ------- + bytes or dict + The buffer containing the metalayer information. If a slice is passed, + a dictionary with all the metalayers is returned. + """ + if isinstance(item, slice): + if item.start is None and item.stop is None: + return self.getall() + raise NotImplementedError("Slicing is not supported, unless [:]") + if self.__contains__(item): + return unpackb( + blosc2_ext.meta__getitem__(self.schunk, item), + list_hook=blosc2_ext.decode_tuple, + ) + else: + raise KeyError(f"{item} not found") + + def keys(self) -> list[str]: + """Return the metalayers keys.""" + return blosc2_ext.meta_keys(self.schunk) + + def values(self): + raise NotImplementedError("Values can not be accessed") + + def items(self): + raise NotImplementedError("Items can not be accessed") + + def __iter__(self) -> Iterator[str]: + """Iter over the keys of the metalayers.""" + return iter(self.keys()) + + def __len__(self) -> int: + """Return the number of metalayers.""" + return blosc2_ext.meta__len__(self.schunk) + + def getall(self): + """ + Return all the variable length metalayers as a dictionary + + """ + return {key: self[key] for key in self.keys()} + + def __repr__(self): + return repr(self.getall()) + + def __str__(self): + return str(self.getall()) + + +class SChunk(blosc2_ext.SChunk): + def __init__( # noqa: C901 + self, + chunksize: int | None = None, + data: object = None, + **kwargs: dict | blosc2.CParams | blosc2.Storage | blosc2.DParams, + ) -> None: + """Create a new super-chunk, or open an existing one. + + Parameters + ---------- + chunksize: int, optional + The size, in bytes, of the chunks in the super-chunk. If not provided, + it is set automatically to a reasonable value. + + data: bytes-like object, optional + The data to be split into different chunks of size :paramref:`chunksize`. + If None, the Schunk instance will be empty initially. + + kwargs: dict, optional + Storage parameters. The default values are in :class:`blosc2.Storage`. + Supported keyword arguments: + storage: :class:`blosc2.Storage` or dict + All the storage parameters that you want to use as + a :class:`blosc2.Storage` or dict instance. + cparams: :class:`blosc2.CParams` or dict + All the compression parameters that you want to use as + a :class:`blosc2.CParams` or dict instance. + dparams: :class:`blosc2.DParams` or dict + All the decompression parameters that you want to use as + a :class:`blosc2.DParams` or dict instance. + others: Any + If `storage` is not passed, all the parameters of a :class:`blosc2.Storage` + can be passed as keyword arguments. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> import os.path + >>> import shutil + >>> import tempfile + >>> cparams = blosc2.CParams() + >>> dparams = blosc2.DParams() + >>> storage = blosc2.Storage(contiguous=True) + >>> schunk = blosc2.SChunk(cparams=cparams, dparams=dparams, storage=storage) + + In the following, we will write and read a super-chunk to and from disk + via memory-mapped files. + + >>> a = np.arange(3, dtype=np.int64) + >>> chunksize = a.size * a.itemsize + >>> n_chunks = 2 + >>> tmpdirname = tempfile.mkdtemp() + >>> urlpath = os.path.join(tmpdirname, 'schunk.b2frame') + + Optional: we intend to write 2 chunks of 24 bytes each, and we expect + the compressed size to be smaller than the original size. Therefore, we + generously set the initial size of the mapping to 48 bytes + effectively avoiding remappings. + + >>> initial_mapping_size = chunksize * n_chunks + >>> schunk_mmap = blosc2.SChunk( + ... chunksize=chunksize, + ... mmap_mode="w+", + ... initial_mapping_size=initial_mapping_size, + ... urlpath=urlpath, + ... ) + >>> schunk_mmap.append_data(a) + 1 + >>> schunk_mmap.append_data(a * 2) + 2 + + Optional: explicitly close the file and free the mapping. + + >>> del schunk_mmap + + Reading the data back again via memory-mapped files: + + >>> schunk_mmap = blosc2.open(urlpath, mmap_mode="r") + >>> np.frombuffer(schunk_mmap.decompress_chunk(0), dtype=np.int64).tolist() + [0, 1, 2] + >>> np.frombuffer(schunk_mmap.decompress_chunk(1), dtype=np.int64).tolist() + [0, 2, 4] + >>> shutil.rmtree(tmpdirname) + """ + # Check only allowed kwarg are passed + allowed_kwargs = [ + "urlpath", + "contiguous", + "cparams", + "dparams", + "_schunk", + "meta", + "mode", + "mmap_mode", + "initial_mapping_size", + "_is_view", + "storage", + ] + for kwarg in kwargs: + if kwarg not in allowed_kwargs: + raise ValueError(f"{kwarg} is not supported as keyword argument") + if kwargs.get("storage") is not None: + if any(key in list(blosc2.Storage.__annotations__) for key in kwargs): + raise AttributeError( + "Cannot pass both `storage` and other kwargs already included in Storage" + ) + storage = kwargs.get("storage") + if isinstance(storage, blosc2.Storage): + kwargs = {**kwargs, **asdict(storage)} + else: + kwargs = {**kwargs, **storage} + + if isinstance(kwargs.get("cparams"), blosc2.CParams): + kwargs["cparams"] = asdict(kwargs.get("cparams")) + + if isinstance(kwargs.get("dparams"), blosc2.DParams): + kwargs["dparams"] = asdict(kwargs.get("dparams")) + + urlpath = kwargs.get("urlpath") + if "contiguous" not in kwargs: + # Make contiguous true for disk, else sparse (for in-memory performance) + kwargs["contiguous"] = urlpath is not None + + # This a private param to get an SChunk from a blosc2_schunk* + sc = kwargs.pop("_schunk", None) + + # If not passed, set a sensible typesize + itemsize = data.itemsize if data is not None and hasattr(data, "itemsize") else 1 + if "cparams" in kwargs: + if "typesize" not in kwargs["cparams"]: + cparams = kwargs.pop("cparams").copy() + cparams["typesize"] = itemsize + kwargs["cparams"] = cparams + else: + kwargs["cparams"] = {"typesize": itemsize} + if blosc2.IS_WASM: + # wasm32 runtime is effectively single-threaded for Blosc operations. + cparams = kwargs.get("cparams") + if isinstance(cparams, dict) and cparams.get("nthreads", 1) != 1: + cparams = cparams.copy() + cparams["nthreads"] = 1 + kwargs["cparams"] = cparams + dparams = kwargs.get("dparams") + if isinstance(dparams, dict) and dparams.get("nthreads", 1) != 1: + dparams = dparams.copy() + dparams["nthreads"] = 1 + kwargs["dparams"] = dparams + + # chunksize handling + if chunksize is None: + chunksize = 2**24 + if data is not None: + if hasattr(data, "itemsize"): + chunksize = data.size * data.itemsize + # Make that a multiple of typesize + chunksize = chunksize // data.itemsize * data.itemsize + else: + chunksize = len(data) + # Use a cap of 256 MB (modern boxes should all have this RAM available) + if chunksize > 2**28: + chunksize = 2**28 + + super().__init__(_schunk=sc, chunksize=chunksize, data=data, **kwargs) + self._vlmeta = vlmeta( + super().c_schunk, self.urlpath, self.mode, self.mmap_mode, self.initial_mapping_size + ) + self._cparams = super().get_cparams() + self._dparams = super().get_dparams() + + @property + def cparams(self) -> blosc2.CParams: + """ + :class:`blosc2.CParams` instance with the compression parameters. + """ + return self._cparams + + @cparams.setter + def cparams(self, value: blosc2.CParams) -> None: + if blosc2.IS_WASM and value.nthreads != 1: + value = replace(value, nthreads=1) + super().update_cparams(value) + self._cparams = super().get_cparams() + + @property + def dparams(self) -> blosc2.DParams: + """ + :class:`blosc2.DParams` instance with the decompression parameters. + """ + return self._dparams + + @dparams.setter + def dparams(self, value: blosc2.DParams) -> None: + if blosc2.IS_WASM and value.nthreads != 1: + value = replace(value, nthreads=1) + super().update_dparams(value) + self._dparams = super().get_dparams() + + @property + def meta(self) -> Meta: + """ + Access to the fixed-length metadata of the `SChunk`. + """ + return Meta(self) + + @property + def vlmeta(self) -> vlmeta: + """ + Access to the variable-length metadata of the `SChunk`. + """ + return self._vlmeta + + @property + def chunkshape(self) -> int: + """ + Number of elements per chunk. + """ + return self.chunksize // self.typesize + + @property + def chunksize(self) -> int: + """ + Number of bytes in each chunk. + """ + return super().chunksize + + @property + def blocksize(self) -> int: + """The block size (in bytes).""" + return super().blocksize + + @property + def nchunks(self) -> int: + """The number of chunks.""" + return super().nchunks + + @property + def cratio(self) -> float: + """ + Compression ratio. + """ + if self.cbytes == 0: + return 0.0 + return self.nbytes / self.cbytes + + @property + def nbytes(self) -> int: + """ + Amount of uncompressed data bytes. + """ + return super().nbytes + + @property + def cbytes(self) -> int: + """ + Amount of compressed data bytes (data size + chunk headers size). + """ + return super().cbytes + + @property + def typesize(self) -> int: + """ + Type size of the `SChunk`. + """ + return super().typesize + + @property + def urlpath(self) -> str: + """ + Path where the `SChunk` is stored. + """ + return super().urlpath + + @property + def contiguous(self) -> bool: + """ + Whether the `SChunk` is stored contiguously or sparsely. + """ + return super().contiguous + + @property + def info(self) -> InfoReporter: + """ + Print information about this schunk. + + Examples + -------- + >>> schunk = blosc2.SChunk(data=b"a large, repeated string" * 1000) + >>> schunk.info + type : SChunk + chunksize : 24000 + blocksize : 0 + typesize : 1 + nbytes : 24000 + cbytes : 82 + cratio : 292.68 + cparams : CParams(codec=, codec_meta=0, clevel=1, use_dict=False, typesize=1, + : nthreads=8, blocksize=0, splitmode=, + : filters=[, , , + : , , ], filters_meta=[0, + : 0, 0, 0, 0, 0], tuner=) + dparams : DParams(nthreads=8) + """ + return InfoReporter(self) + + @property + def info_items(self) -> list: + """A list of tuples with the information about this schunk. + Each tuple contains the name of the attribute and its value. + """ + items = [] + items += [("type", f"{self.__class__.__name__}")] + items += [("chunksize", self.chunksize)] + items += [("blocksize", self.blocksize)] + items += [("typesize", self.typesize)] + items += [("nbytes", self.nbytes)] + items += [("cbytes", self.cbytes)] + items += [("cratio", f"{self.cratio:.2f}")] + items += [("cparams", self.cparams)] + items += [("dparams", self.dparams)] + return items + + def append_data(self, data: object) -> int: + """Append a data buffer to the SChunk. + + The data buffer must be of size `chunksize` specified in + :func:`SChunk.__init__ `. + + Parameters + ---------- + data: bytes-like object + The data to be compressed and added as a chunk. + + Returns + ------- + out: int + The number of chunks in the SChunk. + + Raises + ------ + RunTimeError + If the :paramref:`data` could not be appended. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> schunk = blosc2.SChunk(chunksize=200*1000*4) + >>> data = np.arange(200 * 1000, dtype='int32') + >>> schunk.append_data(data) + 1 + """ + blosc2_ext.check_access_mode(self.urlpath, self.mode) + return super().append_data(data) + + def fill_special( + self, + nitems: int, + special_value: blosc2.SpecialValue, + value: bytes | int | float | bool | None = None, + ) -> int: + """Fill the SChunk with a special value. The SChunk must be empty. + + Parameters + ---------- + nitems: int + The number of items to fill with the special value. + special_value: SpecialValue + The special value to be used for filling the SChunk. + value: bytes, int, float, bool (optional) + The value to fill the SChunk. This parameter is only supported if + :paramref:`special_value` is ``blosc2.SpecialValue.VALUE``. + + Returns + ------- + out: int + The number of chunks in the SChunk. + + Raises + ------ + RunTimeError + If the SChunk could not be filled with the special value. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> import time + >>> nitems = 100_000_000 + >>> dtype = np.dtype(np.float64) + >>> # Measure the time to create SChunk from a NumPy array + >>> t0 = time.time() + >>> data = np.full(nitems, np.pi, dtype) + >>> cparams = blosc2.CParams(typesize=dtype.itemsize) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) + >>> t = (time.time() - t0) * 1000. + >>> f"Time creating a schunk with a numpy array: {t:10.3f} ms" + Time creating a schunk with a numpy array: 710.273 ms + >>> # Measure the time to create SChunk using fill_special + >>> t0 = time.time() + >>> cparams = blosc2.CParams(typesize=dtype.itemsize) + >>> schunk = blosc2.SChunk(cparams=cparams) + >>> schunk.fill_special(nitems, blosc2.SpecialValue.VALUE, np.pi) + >>> t = (time.time() - t0) * 1000. + >>> f"Time passing directly the value to `fill_special`: {t:10.3f} ms" + Time passing directly the value to `fill_special`: 2.109 ms + """ + if not isinstance(special_value, SpecialValue) or special_value == SpecialValue.NOT_SPECIAL: + raise TypeError("special_value must be a SpecialValue instance other than NOT_SPECIAL") + if special_value == SpecialValue.VALUE and value is None: + raise ValueError("value cannot be None when special_value is VALUE") + + nchunks = super().fill_special(nitems, special_value.value, value) + if nchunks < 0: + raise RuntimeError("Unable to fill with special values") + return nchunks + + def decompress_chunk(self, nchunk: int, dst: object = None) -> str | bytes: + """Decompress the chunk given by its index :paramref:`nchunk`. + + Parameters + ---------- + nchunk: int + The index of the chunk that will be decompressed. + dst: NumPy object or bytearray + The destination NumPy object or bytearray to fill, the length + of which must be greater than 0. The user must ensure + that it has enough capacity to host the decompressed + chunk. Default is None, meaning that a new bytes object + is created, filled and returned. + + Returns + ------- + out: str or bytes + The decompressed chunk as a Python str or bytes object if + :paramref:`dst` is `None`. Otherwise, it returns `None` because the + result will already be in :paramref:`dst`. + + Raises + ------ + RunTimeError + If a problem is detected. + + Examples + -------- + >>> import blosc2 + >>> cparams = blosc2.CParams(typesize=1) + >>> schunk = blosc2.SChunk(cparams=cparams) + >>> buffer = b"wermqeoir23" + >>> schunk.append_data(buffer) + 1 + >>> schunk.decompress_chunk(0) + b'wermqeoir23' + >>> # Construct a mutable bytearray object + >>> bytes_obj = bytearray(len(buffer)) + >>> schunk.decompress_chunk(0, dst=bytes_obj) + >>> bytes_obj == buffer + True + """ + return super().decompress_chunk(nchunk, dst) + + def get_chunk(self, nchunk: int) -> bytes: + """Return the compressed chunk that is in the SChunk. + + Parameters + ---------- + nchunk: int + The index of the chunk that will be returned. + + Returns + ------- + out: bytes object + The compressed chunk. + + Raises + ------ + RunTimeError + If a problem is detected. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> # Create an SChunk with 3 chunks + >>> nchunks = 3 + >>> data = np.arange(200 * 1000 * nchunks, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) + >>> # Retrieve the first chunk (index 0) + >>> chunk = schunk.get_chunk(0) + >>> # Check the type and length of the compressed chunk + >>> type(chunk) + + >>> len(chunk) + 10552 + """ + return super().get_chunk(nchunk) + + def delete_chunk(self, nchunk: int) -> int: + """Delete the specified chunk from the SChunk. + + Parameters + ---------- + nchunk: int + The index of the chunk that will be removed. + + Returns + ------- + out: int + The number of chunks in the SChunk. + + Raises + ------ + RunTimeError + If a problem is detected. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> # Create an SChunk with 3 chunks + >>> nchunks = 3 + >>> data = np.arange(200 * 1000 * nchunks, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, data=data, cparams=cparams) + >>> # Check the number of chunks before deletion + >>> schunk.nchunks + 3 + >>> # Delete the second chunk (index 1) + >>> schunk.delete_chunk(1) + >>> # Check the number of chunks after deletion + >>> schunk.nchunks + 2 + """ + blosc2_ext.check_access_mode(self.urlpath, self.mode) + return super().delete_chunk(nchunk) + + def insert_chunk(self, nchunk: int, chunk: bytes) -> int: + """Insert an already compressed chunk into the SChunk. + + Parameters + ---------- + nchunk: int + The index at which the chunk will be inserted. + chunk: bytes object + The compressed chunk. + + Returns + ------- + out: int + The number of chunks in the SChunk. + + Raises + ------ + RunTimeError + If a problem is detected. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> # Create an SChunk with 2 chunks + >>> data = np.arange(400 * 1000, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(chunksize=200*1000*4, data=data, cparams=cparams) + >>> # Get a compressed chunk from the SChunk + >>> chunk = schunk.get_chunk(0) + >>> # Insert a chunk in the second position (index 1)" + >>> schunk.insert_chunk(1, chunk) + >>> # Verify the total number of chunks after insertion + >>> schunk.nchunks + 3 + """ + blosc2_ext.check_access_mode(self.urlpath, self.mode) + return super().insert_chunk(nchunk, chunk) + + def insert_data(self, nchunk: int, data: object, copy: bool) -> int: + """Insert the data in the specified position in the SChunk. + + Parameters + ---------- + nchunk: int + The index at which the chunk will be inserted. + data: bytes object + The data that will be compressed and inserted as a chunk. + copy: bool + Whether to make an internal copy of the chunk to insert it or not. + + Returns + ------- + out: int + The number of chunks in the SChunk. + + Raises + ------ + RunTimeError + If a problem is detected. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> # Create an SChunk with 2 chunks + >>> data = np.arange(400 * 1000, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(chunksize=200*1000*4, data=data, cparams=cparams) + >>> # Create a new array to insert into the second chunk of the SChunk + >>> new_data = np.arange(200 * 1000, dtype=np.int32) + >>> # Insert the new data at position 1, compressing it + >>> schunk.insert_data(1, new_data, copy=True) + >>> # Verify the total number of chunks after insertion + >>> schunk.nchunks + 3 + """ + blosc2_ext.check_access_mode(self.urlpath, self.mode) + return super().insert_data(nchunk, data, copy) + + def update_chunk(self, nchunk: int, chunk: bytes) -> int: + """Update an existing chunk in the SChunk. + + Parameters + ---------- + nchunk: int + The index of the chunk to be updated. + chunk: bytes object + The new compressed chunk that will replace the old chunk's content. + + Returns + ------- + out: int + The number of chunks in the SChunk. + + Raises + ------ + RunTimeError + If a problem is detected. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> nchunks = 5 + >>> chunk_size = 200 * 1000 * 4 + >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams=cparams) + >>> f"Initial number of chunks: {schunk.nchunks}" + Initial number of chunks: 5 + >>> c_index = 1 + >>> new_data = np.full(chunk_size // 4, fill_value=c_index, dtype=np.int32).tobytes() + >>> compressed_data = blosc2.compress2(new_data, typesize=4) + >>> # Update the 2nd chunk (index 1) with new data + >>> nchunks = schunk.update_chunk(c_index, compressed_data) + >>> f"Number of chunks after update: {nchunks}" + Number of chunks after update: 5 + """ + blosc2_ext.check_access_mode(self.urlpath, self.mode) + return super().update_chunk(nchunk, chunk) + + def update_data(self, nchunk: int, data: object, copy: bool) -> int: + """Update the chunk in the specified position with the given data. + + Parameters + ---------- + nchunk: int + The index of the chunk to be updated. + data: bytes object + The data to be compressed and will replace the old chunk. + copy: bool + Whether to make an internal copy of the chunk before updating it. + + Returns + ------- + out: int + The number of chunks in the SChunk. + + Raises + ------ + RunTimeError + If a problem is detected. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> nchunks = 4 + >>> chunk_size = 200 * 1000 * 4 + >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams=cparams) + >>> f"Initial number of chunks: {schunk.nchunks}" + Initial number of chunks: 4 + >>> c_index = 1 # Update the 2nd chunk (index 1) + >>> new_data = np.full(chunk_size // 4, fill_value=c_index, dtype=np.int32).tobytes() + >>> nchunks = schunk.update_data(c_index, new_data, copy=True) + >>> f"Number of chunks after update: {schunk.nchunks}" + Number of chunks after update: 4 + """ + blosc2_ext.check_access_mode(self.urlpath, self.mode) + nchunks = super().nchunks + return super().update_data(nchunk, data, copy) if nchunks > 0 else nchunks + + def get_slice(self, start: int = 0, stop: int | None = None, out: object = None) -> str | bytes | None: + """Get a slice from :paramref:`start` to :paramref:`stop`. + + Parameters + ---------- + start: int + The starting index of the slice. Default is 0. + stop: int + The ending index of the slice (exclusive). + Default is until the SChunk ends. + out: bytes-like object or bytearray + The target object (supporting the + `Buffer Protocol `_) to fill. + Verify that the buffer has enough space for the decompressed data. + If `None` is provided, a new bytes object will be created, filled, + and returned. + + Returns + ------- + out: str or bytes or None + The decompressed slice a Python str or bytes object if + :paramref:`out` is `None`. Otherwise, it returns `None` since the result + will already be in :paramref:`out`. + + Raises + ------ + ValueError + If the size to get is negative. + If there is not enough space in :paramref:`out`. + If :paramref:`start` is greater or equal to the number of items in the SChunk. + RunTimeError + If a problem is detected. + + See Also + -------- + :func:`__getitem__` + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> nchunks = 4 + >>> chunk_size = 200 * 1000 * 4 + >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) + >>> # Define the slice parameters + >>> start_index = 200 * 1000 + >>> stop_index = 2 * 200 * 1000 + >>> # Prepare an output buffer + >>> slice_size = stop_index - start_index + >>> out_buffer = bytearray(slice_size * 4) # Ensure the buffer is large enough + >>> result = schunk.get_slice(start=start_index, stop=stop_index, out=out_buffer) + >>> # Convert bytearray to NumPy array for easier inspection + >>> slice_array = np.frombuffer(out_buffer, dtype=np.int32) + >>> f"Slice data: {slice_array[:10]} ..." # Print the first 10 elements + Slice data: [200000 200001 200002 200003 200004 200005 200006 200007 200008 200009] ... + """ + return super().get_slice(start, stop, out) + + def __len__(self) -> int: + """ + Return the number of items in the SChunk. + """ + return self.nbytes // self.typesize + + def __getitem__(self, item: int | slice) -> str | bytes: + """Get a slice from the SChunk. + + Parameters + ---------- + item: int or slice + The index or slice for the data. Note that the step parameter is not honored. + + Returns + ------- + out: str or bytes + The decompressed slice as a Python str or bytes object. + + Raises + ------ + ValueError + If the size to get is negative. + If :paramref:`item`.start is greater than or equal to the number of + items in the SChunk. + RunTimeError + If a problem is detected. + IndexError + If `step` is not 1. + + See Also + -------- + :func:`get_slice` + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> nchunks = 4 + >>> chunk_size = 200 * 1000 * 4 + >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams=cparams) + >>> # Use __getitem__ to retrieve the same slice of data from the SChunk + >>> res = schunk[150:155] + >>> f"Slice data: {np.frombuffer(res, dtype=np.int32)}" + Slice data: [150 151 152 153 154] + """ + if isinstance(item, int): + if item == -1: + return self.get_slice(item) + return self.get_slice(item, item + 1) + if item.step is not None and item.step != 1: + raise IndexError("`step` must be 1") + return self.get_slice(item.start, item.stop) + + def __setitem__(self, key: int | slice, value: object) -> None: + """Set slice to :paramref:`value`. + + Parameters + ---------- + key: int or slice + The index of the slice to update. Note that step parameter is not honored. + value: bytes-like object + An object supporting the + `Buffer Protocol `_ used to + fill the slice. + + Returns + ------- + out: None + + Raises + ------ + ValueError + If the object cannot be modified. + If the size to get is negative. + If there is not enough space in :paramref:`value` to update the slice. + If :paramref:`start` is greater than the number of items in the SChunk. + RunTimeError + If a problem is detected. + IndexError + If `step` is not 1. + + Notes + ----- + This method can also be used to append new data if :paramref:`key`.stop + is greater than the number of items in the SChunk. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> nchunks = 4 + >>> chunk_size = 200 * 1000 * 4 + >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) + >>> # Create a new array of values to update the slice (values from 1000 to 1999 multiplied by 2) + >>> start_ = 1000 + >>> stop = 2000 + >>> new_values = np.arange(start_, stop, dtype=np.int32) * 2 + >>> schunk[start_:stop] = new_values + >>> # Retrieve the updated slice using the slicing syntax + >>> retrieved_slice = np.frombuffer(schunk[start_:stop], dtype=np.int32) + >>> f"First 10 values of the updated slice: {retrieved_slice[:10]}" + >>> f"Last 10 values of the updated slice: {retrieved_slice[-10:]}" + First 10 values of the updated slice: [2000 2002 2004 2006 2008 2010 2012 2014 2016 2018] + Last 10 values of the updated slice: [3980 3982 3984 3986 3988 3990 3992 3994 3996 3998] + """ + if key.step is not None and key.step != 1: + raise IndexError("`step` must be 1") + blosc2_ext.check_access_mode(self.urlpath, self.mode) + return super().set_slice(start=key.start, stop=key.stop, value=value) + + def to_cframe(self) -> bytes: + """Get a bytes object containing the serialized :ref:`SChunk` instance. + + Returns + ------- + out: bytes + The buffer containing the serialized :ref:`SChunk` instance. + + See Also + -------- + :func:`~blosc2.schunk_from_cframe` + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> nchunks = 4 + >>> chunk_size = 200 * 1000 * 4 + >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) + >>> # Serialize the SChunk instance to a bytes object + >>> serialized_schunk = schunk.to_cframe() + >>> f"Serialized SChunk length: {len(serialized_schunk)} bytes" + Serialized SChunk length: 14129 bytes + >>> # Create a new SChunk from the serialized data + >>> deserialized_schunk = blosc2.schunk_from_cframe(serialized_schunk) + >>> start = 500 + >>> stop = 505 + >>> sl_bytes = deserialized_schunk[start:stop] + >>> sl = np.frombuffer(sl_bytes, dtype=np.int32) + >>> res = data[start:stop] + >>> f"Original slice: {res}" + Original slice: [500 501 502 503 504] + >>> f"Deserialized slice: {sl}" + Deserialized slice: [500 501 502 503 504] + """ + return super().to_cframe() + + def iterchunks(self, dtype: np.dtype) -> Iterator[np.ndarray]: + """ + Iterate over the :paramref:`self` chunks of the SChunk. + + Parameters + ---------- + dtype: np.dtype + The data type to use for the decompressed chunks. + + Yields + ------ + chunk: NumPy ndarray + The decompressed chunk. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> # Create sample data and an SChunk + >>> data = np.arange(400 * 1000, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) + >>> # Iterate over chunks using the iterchunks method + >>> for chunk in schunk.iterchunks(dtype=np.int32): + >>> f"Chunk shape: {chunk.shape} " + >>> f"First 5 elements of chunk: {chunk[:5]}" + Chunk shape: (400000,) + First 5 elements of chunk: [0 1 2 3 4] + """ + out = np.empty(self.chunkshape, dtype) + for i in range(0, len(self), self.chunkshape): + self.get_slice(i, i + self.chunkshape, out) + yield out + + def iterchunks_info( + self, + ) -> Iterator[ + NamedTuple( + "info", + nchunk=int, + cratio=float, + special=blosc2.SpecialValue, + repeated_value=bytes | None, + lazychunk=bytes, + ) + ]: + """ + Iterate over the chunks of the SChunk, providing info on index and special values. + + Yields + ------ + info: namedtuple + A namedtuple with the following fields: + + nchunk: int + The index of the chunk. + cratio: float + The compression ratio of the chunk. + special: :class:`~blosc2.SpecialValue` + The special value enum of the chunk; if 0, the chunk is not special. + repeated_value: bytes or None + The repeated value for the chunk; if not SpecialValue.VALUE, it is None. + lazychunk: bytes + A buffer with the complete lazy chunk. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> # Create sample data and an SChunk + >>> data = np.arange(400 * 1000, dtype=np.int32) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) + >>> # Iterate over chunks and print detailed information + >>> for chunk_info in schunk.iterchunks_info(): + >>> f"Chunk index: {chunk_info.nchunk}" + >>> f"Compression ratio: {chunk_info.cratio:.2f}" + >>> f"Special value: {chunk_info.special.name}" + >>> f"Repeated value: {chunk_info.repeated_value[:10] if chunk_info.repeated_value else None}" + Chunk index: 0 + Compression ratio: 223.56 + Special value: NOT_SPECIAL + Repeated value: None + """ + ChunkInfo = namedtuple("ChunkInfo", ["nchunk", "cratio", "special", "repeated_value", "lazychunk"]) + for nchunk in range(self.nchunks): + lazychunk = self.get_lazychunk(nchunk) + # Blosc2 flags are encoded at the end of the header + # (see https://github.com/Blosc/c-blosc2/blob/main/README_CHUNK_FORMAT.rst) + is_special = (lazychunk[31] & 0x70) >> 4 + special = SpecialValue(is_special) + # The special value is encoded at the end of the header + repeated_value = lazychunk[32:] if special == SpecialValue.VALUE else None + # Compression ratio (nbytes and cbytes are little-endian) + cratio = ( + np.frombuffer(lazychunk[4:8], dtype=" None: + """Decorator to set a function as a postfilter. + + The postfilter function will be executed each time after decompressing + blocks of data. It will receive three parameters: + + * the input `ndarray` to be read from + * the output `ndarray` to be filled out + * the offset inside the `SChunk` instance where the corresponding block begins (see example below). + + Parameters + ---------- + input_dtype: np.dtype + Data type of the input that will receive the postfilter function. + output_dtype: np.dtype + Data type of the output that will receive and fill the postfilter function. + If None (default) it will be set to :paramref:`input_dtype`. + + Returns + ------- + out: None + + Notes + ----- + * `nthreads` must be 1 when decompressing. + + * The :paramref:`input_dtype` itemsize must be the same as the + :paramref:`output_dtype` itemsize. + + See Also + -------- + :meth:`remove_postfilter` + :meth:`prefilter` + + Examples + -------- + .. code-block:: python + + # Create SChunk + input_dtype = np.dtype(np.int64) + cparams = blosc2.CParams(typesize=input_dtype.itemsize) + dparams = blosc2.DParams(nthreads=1) + schunk = blosc2.SChunk( + chunksize=20_000 * input_dtype.itemsize, cparams=cparams, dparams=dparams + ) + + + # Create postfilter and associate it to the schunk + @schunk.postfilter(input_dtype) + def postfilter(input, output, offset): + output[:] = offset + np.arange(input.size) + """ + + def initialize(func): + super(SChunk, self)._set_postfilter(func, input_dtype, output_dtype) + + def exec_func(*args): + func(*args) + + return exec_func + + return initialize + + def remove_postfilter(self, func_name: str, _new_ctx: bool = True) -> None: + """Remove the postfilter from the `SChunk` instance. + + Parameters + ---------- + func_name: str + The name of the postfilter function to remove. + + Returns + ------- + out: None + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> dtype = np.dtype(np.int32) + >>> cparams = blosc2.CParams(typesize=dtype.itemsize) + >>> dparams = blosc2.DParams(nthreads=1) + >>> data = np.arange(500, dtype=np.int32) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams, dparams=dparams) + >>> # Define the postfilter function + >>> @schunk.postfilter(dtype) + >>> def postfilter(input, output, offset): + >>> output[:] = input + offset + np.arange(input.size) + >>> out = np.empty(data.size, dtype=dtype) + >>> schunk.get_slice(out=out) + >>> f"Data slice with postfilter applied (first 8 elements): {out[:8]}" + Data slice with postfilter applied (first 8 elements): [ 0 2 4 6 8 10 12 14] + >>> schunk.remove_postfilter('postfilter') + >>> retrieved_data = np.empty(data.size, dtype=dtype) + >>> schunk.get_slice(out=retrieved_data) + >>> f"Original data (first 8 elements): {data[:8]}" + Original data (first 8 elements): [0 1 2 3 4 5 6 7] + """ + return super().remove_postfilter(func_name) + + def filler(self, inputs_tuple: tuple[tuple], schunk_dtype: np.dtype, nelem: int | None = None) -> None: + """Decorator to set a filler function. + + This function will fill :paramref:`self` according to :paramref:`nelem`. + It will receive three parameters: a tuple with the inputs as `ndarrays` + from which to read, the `ndarray` to fill :paramref:`self` and the + offset inside the `SChunk` instance where the corresponding block + begins (see example below). + + Parameters + ---------- + inputs_tuple: tuple of tuples + Tuple containing a tuple for each argument that the function will receive, along with their + corresponding np.dtype. + Supported operand types are :ref:`SChunk`, `ndarray` and + Python scalars. + schunk_dtype: np.dtype + The data type to use to fill :paramref:`self`. + nelem: int + Number of elements to append to :paramref:`self`. If None (default) it + will be the number of elements from the operands. + + Returns + ------- + out: None + + Notes + ----- + * Compression `nthreads` must be 1 when using this. + * This does not need to be removed from the created `SChunk` instance. + + See Also + -------- + :meth:`prefilter` + + Examples + -------- + .. code-block:: python + + # Set the compression and decompression parameters + schunk_dtype = np.dtype(np.float64) + cparams = blosc2.CParams(typesize=schunk_dtype.itemsize, nthreads=1) + # Create empty SChunk + schunk = blosc2.SChunk(chunksize=20_000 * schunk_dtype.itemsize, cparams=cparams) + + # Create operands + op_dtype = np.dtype(np.int32) + data = np.full(20_000 * 3, 12, dtype=op_dtype) + schunk_op = blosc2.SChunk(chunksize=20_000 * op_dtype.itemsize, data=data) + + + # Create filler + @schunk.filler(((schunk_op, op_dtype), (np.e, np.float32)), schunk_dtype) + def filler(inputs_tuple, output, offset): + output[:] = inputs_tuple[0] - inputs_tuple[1] + + """ + + def initialize(func): + if self.nbytes != 0: + raise ValueError("Cannot apply a filler to a non empty SChunk") + nelem_ = blosc2_ext.nelem_from_inputs(inputs_tuple, nelem) + super(SChunk, self)._set_filler(func, id(inputs_tuple), schunk_dtype) + chunksize = self.chunksize + written_nbytes = 0 + nbytes = nelem_ * self.typesize + while written_nbytes < nbytes: + chunk = np.zeros(chunksize // self.typesize, dtype=schunk_dtype) + self.append_data(chunk) + written_nbytes += chunksize + if (nbytes - written_nbytes) < self.chunksize: + chunksize = nbytes - written_nbytes + self.remove_prefilter(func.__name__) + + def exec_func(*args): + func(*args) + + return exec_func + + return initialize + + def prefilter(self, input_dtype: np.dtype, output_dtype: np.dtype = None) -> None: + """Decorator to set a function as a prefilter. + + This function will be executed each time before compressing the data. + It will receive three parameters: + + * The actual data as a `ndarray` from which to read, + * The `ndarray` to be filled, + * The offset inside the `SChunk` instance where the corresponding block begins (see example below). + + Parameters + ---------- + input_dtype: np.dtype + Data type of the input that will be processed the prefilter function. + output_dtype: np.dtype, optional + Data type of the output that will be filled by the prefilter function. + If None (default), it will be the same as :paramref:`input_dtype`. + + Returns + ------- + out: None + + Notes + ----- + * `nthreads` must be 1 when compressing. + + * The :paramref:`input_dtype` itemsize must be the same as the + :paramref:`output_dtype` itemsize. + + See Also + -------- + :meth:`remove_prefilter` + :meth:`postfilter` + :meth:`filler` + + Examples + -------- + .. code-block:: python + + # Set the compression and decompression parameters + input_dtype = np.dtype(np.int32) + output_dtype = np.dtype(np.float32) + cparams = blosc2.CParams(typesize=output_dtype.itemsize, nthreads=1) + # Create schunk + schunk = blosc2.SChunk(chunksize=200 * 1000 * input_dtype.itemsize, cparams=cparams) + + + # Set prefilter with decorator + @schunk.prefilter(input_dtype, output_dtype) + def prefilter(input, output, offset): + output[:] = input - np.pi + """ + + def initialize(func): + super(SChunk, self)._set_prefilter(func, input_dtype, output_dtype) + + def exec_func(*args): + func(*args) + + return exec_func + + return initialize + + def remove_prefilter(self, func_name: str, _new_ctx: bool = True) -> None: + """Remove the prefilter from the `SChunk` instance. + + Parameters + ---------- + func_name: str + Name of the prefilter function. + + Returns + ------- + out: None + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> dtype = np.dtype(np.int32) + >>> cparams = blosc2.CParams(typesize=dtype.itemsize, nthreads=1) + >>> data = np.arange(1000, dtype=np.int32) + >>> output_dtype = np.float32 + >>> schunk = blosc2.SChunk(cparams=cparams) + >>> # Define the prefilter function + >>> @schunk.prefilter(dtype, output_dtype) + >>> def prefilter(input, output, offset): + >>> output[:] = input - np.pi + >>> schunk[:1000] = data + >>> # Retrieve and convert compressed data with the prefilter to a NumPy array. + >>> compressed_array_with_filter = np.frombuffer(schunk.get_slice(), dtype=output_dtype) + >>> f"Compressed data with prefilter applied (first 8 elements): {compressed_array_with_filter[:8]}" + Compressed data with prefilter applied (first 8 elements): [-3.1415927 -2.1415927 -1.1415926 -0.14159265 0.8584073 1.8584074 + 2.8584073 3.8584073 ] + >>> schunk.remove_prefilter('prefilter') + >>> schunk[:1000] = data + >>> compressed_array_without_filter = np.frombuffer(schunk.get_slice(), dtype=dtype) + >>> f"Compressed data without prefilter (first 8 elements): {compressed_array_without_filter[:8]}" + Compressed data without prefilter (first 8 elements): [0. 1. 2. 3. 4. 5. 6. 7.] + """ + return super().remove_prefilter(func_name) + + def __dealloc__(self): + super().__dealloc__() + + +def _meta_from_store(urlpath, offset): + """Try to read the SChunk meta from a store path (b2e, b2d, or b2z).""" + + def _open_meta(path, off=0): + try: + return blosc2.blosc2_ext.open(path, mode="r", offset=off).meta + except Exception: + return None + + if urlpath.endswith(".b2e") and offset == 0: + return _open_meta(urlpath) + if urlpath.endswith(".b2d") and os.path.isdir(urlpath): + embed_path = os.path.join(urlpath, "embed.b2e") + if os.path.exists(embed_path): + return _open_meta(embed_path) + if urlpath.endswith(".b2z") and os.path.isfile(urlpath): + try: + with open(urlpath, "rb") as f, zipfile.ZipFile(f) as zf: + for info in zf.infolist(): + if info.filename == "embed.b2e": + f.seek(info.header_offset) + local_header = f.read(30) + filename_len = int.from_bytes(local_header[26:28], "little") + extra_len = int.from_bytes(local_header[28:30], "little") + data_offset = info.header_offset + 30 + filename_len + extra_len + return _open_meta(urlpath, data_offset) + except Exception: + pass + return None + + +def _store_from_extension(urlpath, mode, offset, **kwargs): + """Dispatch to the right store constructor based on file extension.""" + if urlpath.endswith(".b2d"): + if offset != 0: + raise ValueError("Offset must be 0 for DictStore") + from blosc2.dict_store import DictStore + + return DictStore(urlpath, mode=mode, **kwargs) + if urlpath.endswith(".b2z"): + if offset != 0: + raise ValueError("Offset must be 0 for TreeStore") + from blosc2.tree_store import TreeStore + + return TreeStore(urlpath, mode=mode, **kwargs) + if urlpath.endswith(".b2e"): + if offset != 0: + raise ValueError("Offset must be 0 for EmbedStore") + from blosc2.embed_store import EmbedStore + + return EmbedStore(urlpath, mode=mode, **kwargs) + return None + + +def _open_special_store(urlpath, mode, offset, **kwargs): + # Meta-based detection has priority over extension + schunk_meta = _meta_from_store(urlpath, offset) + if schunk_meta is not None: + if "b2embed" in schunk_meta: + if offset != 0: + raise ValueError("Offset must be 0 for EmbedStore") + from blosc2.embed_store import EmbedStore + + return EmbedStore(urlpath, mode=mode, **kwargs) + if "b2dict" in schunk_meta: + if offset != 0: + raise ValueError("Offset must be 0 for DictStore") + from blosc2.dict_store import DictStore + + return DictStore(urlpath, mode=mode, **kwargs) + if "b2tree" in schunk_meta: + if offset != 0: + raise ValueError("Offset must be 0 for TreeStore") + from blosc2.tree_store import TreeStore + + return TreeStore(urlpath, mode=mode, **kwargs) + + return _store_from_extension(urlpath, mode, offset, **kwargs) + + +def _set_default_dparams(kwargs): + dparams = kwargs.get("dparams") + if dparams is None: + # Use multiple threads for decompression by default, unless we are in WASM + # (does not support threads). The only drawback for using multiple threads + # is that access time will be slower because of the overhead of spawning threads + # (but could be fixed in the future with more intelligent thread pools). + dparams = ( + blosc2.DParams(nthreads=blosc2.nthreads) if not blosc2.IS_WASM else blosc2.DParams(nthreads=1) + ) + kwargs["dparams"] = dparams + if blosc2.IS_WASM: + dparams = kwargs.get("dparams") + if isinstance(dparams, blosc2.DParams) and dparams.nthreads != 1: + dparams = asdict(dparams) + dparams["nthreads"] = 1 + kwargs["dparams"] = dparams + elif isinstance(dparams, dict) and dparams.get("nthreads", 1) != 1: + dparams = dparams.copy() + dparams["nthreads"] = 1 + kwargs["dparams"] = dparams + + +def _process_opened_object(res): + meta = getattr(res, "schunk", res).meta + if "proxy-source" in meta: + proxy_src = meta["proxy-source"] + if proxy_src["local_abspath"] is not None: + src = blosc2.open(proxy_src["local_abspath"]) + return blosc2.Proxy(src, _cache=res) + elif proxy_src["urlpath"] is not None: + src = blosc2.C2Array(proxy_src["urlpath"][0], proxy_src["urlpath"][1], proxy_src["urlpath"][2]) + return blosc2.Proxy(src, _cache=res) + elif not proxy_src["caterva2_env"]: + raise RuntimeError("Could not find the source when opening a Proxy") + + if isinstance(res, blosc2.NDArray) and "LazyArray" in res.schunk.meta: + return blosc2._open_lazyarray(res) + else: + return res + + +def open( + urlpath: str | pathlib.Path | blosc2.URLPath, mode: str = "a", offset: int = 0, **kwargs: dict +) -> ( + blosc2.SChunk + | blosc2.NDArray + | blosc2.C2Array + | blosc2.LazyArray + | blosc2.Proxy + | blosc2.DictStore + | blosc2.TreeStore + | blosc2.EmbedStore +): + """Open a persistent :ref:`SChunk`, :ref:`NDArray`, a remote :ref:`C2Array`, + a :ref:`Proxy`, a :ref:`DictStore`, :ref:`EmbedStore`, or :ref:`TreeStore`. + + See the `Notes` section for more info on opening `Proxy` objects. + + Parameters + ---------- + urlpath: str | pathlib.Path | :ref:`URLPath` + The path where the :ref:`SChunk` (or :ref:`NDArray`) + is stored. If it is a remote array, a :ref:`URLPath` must be passed. + mode: str, optional + Persistence mode: 'r' means read only (must exist); + 'a' means read/write (create if it doesn't exist); + 'w' means create (overwrite if it exists). Default is 'a'. + offset: int, optional + An offset in the file where super-chunk or array data is located + (e.g. in a file containing several such objects). + kwargs: dict, optional + mmap_mode: str, optional + If set, the file will be memory-mapped instead of using the default + I/O functions and the `mode` argument will be ignored. + For more info, see :class:`blosc2.Storage`. Please note that the `w+` mode, which + can be used to create new files, is not supported here since only existing files + can be opened. You can use :func:`SChunk.__init__ ` + to create new files. + initial_mapping_size: int, optional + The initial size of the memory mapping. For more info, see :class:`blosc2.Storage`. + cparams: dict + A dictionary with the compression parameters, which are the same that can be + used in the :func:`~blosc2.compress2` function. + Typesize and blocksize cannot be changed. + dparams: dict + A dictionary with the decompression parameters, which are the same that can + be used in the :func:`~blosc2.decompress2` function. + + Returns + ------- + out: :ref:`SChunk`, :ref:`NDArray`, :ref:`C2Array`, :ref:`DictStore`, :ref:`EmbedStore`, or :ref:`TreeStore` + The object found in the path. + + Notes + ----- + * This is just a 'logical' open, so there is no `close()` counterpart because + currently, there is no need for it. + + * If :paramref:`urlpath` is a :ref:`URLPath` instance, :paramref:`mode` + must be 'r', :paramref:`offset` must be 0, and kwargs cannot be passed. + + * If the original object saved in :paramref:`urlpath` is a :ref:`Proxy`, + this function will only return a :ref:`Proxy` if its source is a local + :ref:`SChunk`, :ref:`NDArray` or a remote :ref:`C2Array`. Otherwise, + it will return the Python-Blosc2 container used to cache the data which + can be a :ref:`SChunk` or a :ref:`NDArray` and may not have all the data + initialized (e.g. if the user has not accessed to it yet). + + * When opening a :ref:`LazyExpr` keep in mind the note above regarding operands. + + Examples + -------- + >>> import blosc2 + >>> import numpy as np + >>> import os + >>> import tempfile + >>> tmpdirname = tempfile.mkdtemp() + >>> urlpath = os.path.join(tmpdirname, 'b2frame') + >>> storage = blosc2.Storage(contiguous=True, urlpath=urlpath, mode="w") + >>> nelem = 20 * 1000 + >>> nchunks = 5 + >>> chunksize = nelem * 4 // nchunks + >>> data = np.arange(nelem, dtype="int32") + >>> # Create SChunk and append data + >>> schunk = blosc2.SChunk(chunksize=chunksize, data=data.tobytes(), storage=storage) + >>> # Open SChunk + >>> sc_open = blosc2.open(urlpath=urlpath) + >>> for i in range(nchunks): + ... dest = np.empty(nelem // nchunks, dtype=data.dtype) + ... schunk.decompress_chunk(i, dest) + ... dest1 = np.empty(nelem // nchunks, dtype=data.dtype) + ... sc_open.decompress_chunk(i, dest1) + ... np.array_equal(dest, dest1) + True + True + True + True + True + + To open the same schunk memory-mapped, we simply need to pass the `mmap_mode` parameter: + + >>> sc_open_mmap = blosc2.open(urlpath=urlpath, mmap_mode="r") + >>> sc_open.nchunks == sc_open_mmap.nchunks + True + >>> all(sc_open.decompress_chunk(i, dest1) == sc_open_mmap.decompress_chunk(i, dest1) for i in range(nchunks)) + True + """ + if isinstance(urlpath, blosc2.URLPath): + if mode != "r" or offset != 0 or kwargs != {}: + raise NotImplementedError( + "Cannot open a C2Array with mode != 'r', or offset != 0 or some kwargs" + ) + return blosc2.C2Array(urlpath.path, urlbase=urlpath.urlbase, auth_token=urlpath.auth_token) + + if isinstance(urlpath, pathlib.PurePath): + urlpath = str(urlpath) + + special = _open_special_store(urlpath, mode, offset, **kwargs) + if special is not None: + return special + + if not os.path.exists(urlpath): + raise FileNotFoundError(f"No such file or directory: {urlpath}") + + _set_default_dparams(kwargs) + res = blosc2_ext.open(urlpath, mode, offset, **kwargs) + + return _process_opened_object(res) diff --git a/venv/Lib/site-packages/blosc2/storage.py b/venv/Lib/site-packages/blosc2/storage.py new file mode 100644 index 0000000..4383511 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/storage.py @@ -0,0 +1,255 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +import contextlib +import warnings +from dataclasses import asdict, dataclass, field, fields + +import blosc2 + + +def default_nthreads(): + return blosc2.nthreads + + +def default_filters(): + return [ + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.SHUFFLE, + ] + + +def default_filters_meta(): + return [0] * 6 + + +@dataclass +class CParams: + """Dataclass for hosting the different compression parameters. + + Parameters + ---------- + codec: :class:`Codec` or int + The compressor code. Default is :py:obj:`Codec.ZSTD `. + codec_meta: int + The metadata for the compressor code. Default is 0. + clevel: int + The compression level from 0 (no compression) to 9 + (maximum compression). Default is 1. + use_dict: bool + Whether to use dictionaries when compressing + (only for :py:obj:`blosc2.Codec.ZSTD `). Default is `False`. + typesize: int + The data type size, ranging from 1 to 255. Default is 8. + nthreads: int + The number of threads to use internally. By default, the + value of :py:obj:`blosc2.nthreads` is used. If not set with + :func:`blosc2.set_nthreads`, blosc2 computes a good guess for it. + blocksize: int + The requested size of the compressed blocks. If set to 0 (the default) + blosc2 will choose the size automatically. + splitmode: :class:`SplitMode` + The split mode for the blocks. + The default value is :py:obj:`SplitMode.AUTO_SPLIT `. + filters: :class:`Filter` or int list or None + The sequence of filters. Default: [:py:obj:`Filter.NOFILTER `, + :py:obj:`Filter.NOFILTER `, :py:obj:`Filter.NOFILTER `, :py:obj:`Filter.NOFILTER `, + :py:obj:`Filter.NOFILTER `, :py:obj:`Filter.SHUFFLE `]. + filters_meta: list + The metadata for filters. Default: `[0, 0, 0, 0, 0, 0]`. + tuner: :class:`Tuner` + The tuner to use. Default: :py:obj:`Tuner.STUNE `. + """ + + codec: blosc2.Codec | int = blosc2.Codec.ZSTD + codec_meta: int = 0 + clevel: int = 5 + use_dict: bool = False + typesize: int = 8 + nthreads: int = field(default_factory=default_nthreads) + blocksize: int = 0 + splitmode: blosc2.SplitMode = blosc2.SplitMode.AUTO_SPLIT + filters: list[blosc2.Filter | int] = field(default_factory=default_filters) + filters_meta: list[int] = field(default_factory=default_filters_meta) + tuner: blosc2.Tuner = blosc2.Tuner.STUNE + + def __post_init__(self): + # C2Array sends metadata (like codec, filters, splitmode and tuner) as ints + if not isinstance(self.codec, blosc2.Codec): + with contextlib.suppress(ValueError): + # User-defined codecs may have no entries in Codec + self.codec = blosc2.Codec(self.codec) + if not isinstance(self.splitmode, blosc2.SplitMode): + with contextlib.suppress(ValueError): + self.splitmode = blosc2.SplitMode(self.splitmode) + if not isinstance(self.tuner, blosc2.Tuner): + with contextlib.suppress(ValueError): + self.tuner = blosc2.Tuner(self.tuner) + + if len(self.filters) > 6: + raise ValueError("Number of filters exceeds 6") + if len(self.filters) < len(self.filters_meta): + self.filters_meta = self.filters_meta[: len(self.filters)] + # There is no need to raise a warning here + # warnings.warn("Changed `filters_meta` length to match `filters` length") + if len(self.filters) > len(self.filters_meta): + raise ValueError("Number of filters cannot exceed number of filters meta") + + for i, filter_i in enumerate(self.filters): + if not isinstance(filter_i, blosc2.Filter): + with contextlib.suppress(ValueError): + # User-defined filters may have no entries in Filter + self.filters[i] = blosc2.Filter(filter_i) + if self.filters_meta[i] == 0 and self.filters[i] == blosc2.Filter.BYTEDELTA: + self.filters_meta[i] = self.typesize + + +@dataclass +class DParams: + """Dataclass for hosting the different decompression parameters. + + Parameters + ---------- + nthreads: int + The number of threads to use internally. By default, the + value of :py:obj:`blosc2.nthreads` is used. If not set with + :func:`blosc2.set_nthreads`, blosc2 computes a good guess for it. + """ + + nthreads: int = field(default_factory=default_nthreads) + + +@dataclass +class Storage: + """Dataclass for hosting the different storage parameters. + + Parameters + ---------- + contiguous: bool + Indicates whether the chunks are stored contiguously. + Default is True when :paramref:`urlpath` is not None; + False otherwise. + urlpath: str or pathlib.Path, optional + If the storage is persistent, the name of the file (when + `contiguous = True`) or the directory (if `contiguous = False`). + If the storage is in-memory, then this field is `None`. + mode: str, optional + Persistence mode: 'r' means read only (must exist); + 'a' means read/write (create if it doesn't exist); + 'w' means create (overwrite if it exists). Default is 'a'. + mmap_mode: str, optional + If set, the file will be memory-mapped instead of using the default + I/O functions and the `mode` argument will be ignored. The memory-mapping + modes are similar to those used by the + `numpy.memmap `_ + function, but it is possible to extend the file: + + .. list-table:: + :widths: 10 90 + :header-rows: 1 + + * - mode + - description + * - 'r' + - Open an existing file for reading only. + * - 'r+' + - Open an existing file for reading and writing. Use this mode if you want + to append data to an existing schunk file. + * - 'w+' + - Create or overwrite an existing file for reading and writing. Use this + mode if you want to create a new schunk. + * - 'c' + - Open an existing file in copy-on-write mode: all changes affect the data + in memory but changes are not saved to disk. The file on disk is + read-only. On Windows, the size of the mapping cannot change. + + Only contiguous storage can be memory-mapped. Hence, `urlpath` must point to a + file (and not a directory). + + .. note:: + Memory-mapped files are opened once, and their contents remain in (virtual) + memory for the lifetime of the schunk. Using memory-mapped I/O can be faster + than the default I/O functions, depending on the use case. While + reading performance is generally better, writing performance may be + slower in some cases on certain systems. Memory-mapped files + can be especially beneficial when operating with network file systems + (like NFS). + + This is currently a beta feature (especially for write operations) and we + recommend trying it out and reporting any issues you may encounter. + + initial_mapping_size: int, optional + The initial size of the mapping for the memory-mapped file when writes are + allowed (r+ w+, or c mode). Once a file is memory-mapped and extended beyond the + initial mapping size, the file must be remapped, which may be expensive. This + parameter allows decoupling the mapping size from the actual file size to + reserve memory early for future writes and avoid remappings. The memory is only + reserved virtually and does not occupy physical memory unless actual writes + occur. Since the virtual address space is large enough, it is ok to be generous + with this parameter (with special consideration on Windows, see note below). + For best performance, set this to the maximum expected size of the compressed + data (see example in :obj:`SChunk.__init__ `). + The size is in bytes. + + Default: 1 GiB. + + .. note:: + On Windows, the size of the mapping is directly coupled to the file size. + When the schunk is destroyed, the file size will be truncated to the + actual size of the schunk. + + meta: dict or None + A dictionary with different metalayers. Each entry represents a metalayer: + + key: bytes or str + The name of the metalayer. + value: object + The metalayer object that will be serialized using msgpack. + """ + + contiguous: bool = None + urlpath: str = None + mode: str = "a" + mmap_mode: str = None + initial_mapping_size: int = None + meta: dict = None + + def __post_init__(self): + if self.contiguous is None: + self.contiguous = self.urlpath is not None + # Check for None values + for f in fields(self): + if getattr(self, f.name) is None and f.name not in [ + "urlpath", + "mmap_mode", + "initial_mapping_size", + "meta", + ]: + setattr(self, f.name, getattr(Storage(), f.name)) + warnings.warn(f"`{f.name}` field value changed from `None` to `{getattr(self, f.name)}`") + + +# Defaults for compression params +cparams_dflts = asdict(CParams()) +""" +Compression params defaults. +""" + +# Defaults for decompression params +dparams_dflts = asdict(DParams()) +""" +Decompression params defaults. +""" +# Default for storage +storage_dflts = asdict(Storage()) +""" +Storage params defaults. This is meant only for :ref:`SChunk ` or :ref:`NDArray `. +""" diff --git a/venv/Lib/site-packages/blosc2/tree_store.py b/venv/Lib/site-packages/blosc2/tree_store.py new file mode 100644 index 0000000..6aad816 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/tree_store.py @@ -0,0 +1,700 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +import contextlib +import os +from collections.abc import Iterator, MutableMapping +from typing import TYPE_CHECKING + +import numpy as np + +import blosc2 +from blosc2.dict_store import DictStore +from blosc2.schunk import SChunk + +if TYPE_CHECKING: + from blosc2.c2array import C2Array + from blosc2.ndarray import NDArray + + +class vlmetaProxy(MutableMapping): + """Proxy for SChunk.vlmeta to control access and slicing. + + - Ensures `vlmeta[:]` returns a dict of {name: value} using decoded values. + - Enforces TreeStore read-only mode for set/del operations. + - Delegates iteration and length to the underlying vlmeta object. + """ + + def __init__(self, tstore: "TreeStore", inner_vlmeta): + self._tstore = tstore + self._inner = inner_vlmeta + + def __setitem__(self, key, value): + if self._tstore.mode == "r": + raise ValueError("TreeStore is in read-only mode") + + # Ensure the vlmeta SChunk is persisted before any write operation. + # This handles the case where vlmeta is being created lazily. + # Use DictStore's methods directly to bypass TreeStore's vlmeta filtering + if not DictStore.__contains__(self._tstore, self._tstore._vlmeta_key): + DictStore.__setitem__(self._tstore, self._tstore._vlmeta_key, self._tstore._vlmeta) + + # Support bulk set via [:] + if isinstance(key, slice): + if key.start is None and key.stop is None: + # Merge/update existing values instead of replacing + for k, v in value.items(): + self._inner[k] = v + # Persist once after bulk update + self._tstore._persist_vlmeta() + return + raise NotImplementedError("Slicing is not supported, unless [:]") + + self._inner[key] = value + # Persist changes in the embed store snapshot + self._tstore._persist_vlmeta() + + def __getitem__(self, key): + # Support bulk get via [:] + if isinstance(key, slice): + if key.start is None and key.stop is None: + # Build a Python dict to ensure keys are str and values decoded + return {name: self._inner[name] for name in self._inner} + raise NotImplementedError("Slicing is not supported, unless [:]") + return self._inner[key] + + def __delitem__(self, key): + if self._tstore.mode == "r": + raise ValueError("TreeStore is in read-only mode") + self._inner.__delitem__(key) + # Persist changes in the embed store snapshot + self._tstore._persist_vlmeta() + + def __iter__(self): + return iter(self._inner) + + def __len__(self): + return len(self._inner) + + +class TreeStore(DictStore): + """ + A hierarchical tree-based storage container for Blosc2 data. + + Extends :class:`blosc2.DictStore` with strict hierarchical key validation + and tree traversal capabilities. Keys must follow a hierarchical structure + using '/' as separator and always start with '/'. If user passes a key + that doesn't start with '/', it will be automatically added. + + It supports the same arguments as :class:`blosc2.DictStore`. + + Parameters + ---------- + localpath : str + Local path for the directory (`.b2d`) or file (`.b2z`); other extensions + are not supported. If a directory is specified, it will be treated as + a Blosc2 directory format (B2DIR). If a file is specified, it + will be treated as a Blosc2 zip format (B2ZIP). + mode : str, optional + File mode ('r', 'w', 'a'). Default is 'a'. + tmpdir : str or None, optional + Temporary directory to use when working with `.b2z` files. If None, + a system temporary directory will be managed. Default is None. + cparams : dict or None, optional + Compression parameters for the internal embed store. + If None, the default Blosc2 parameters are used. + dparams : dict or None, optional + Decompression parameters for the internal embed store. + If None, the default Blosc2 parameters are used. + storage : blosc2.Storage or None, optional + Storage properties for the internal embed store. + If None, the default Blosc2 storage properties are used. + threshold : int, optional + Threshold for the array size (bytes) to be kept in the embed store. + If the *compressed* array size is below this threshold, it will be + stored in the embed store instead of as a separate file. If None, + in-memory arrays are stored in the embed store and on-disk arrays + are stored as separate files. + C2Array objects will always be stored in the embed store, + regardless of their size. + + Examples + -------- + >>> tstore = TreeStore(localpath="my_tstore.b2z", mode="w") + >>> # Create a hierarchy. Data is stored in leaf nodes. + >>> # Structural nodes like /child0 and /child0/child1 are created automatically. + >>> tstore["/child0/leaf1"] = np.array([1, 2, 3]) + >>> tstore["/child0/child1/leaf2"] = np.array([4, 5, 6]) + >>> tstore["/child0/child2"] = np.array([7, 8, 9]) + >>> + >>> # Walk the tree structure + >>> for path, children, nodes in tstore.walk("/child0"): + ... print(f"Path: {path}, Children: {sorted(children)}, Nodes: {sorted(nodes)}") + Path: /child0, Children: ['/child0/child1'], Nodes: ['/child0/child2', '/child0/leaf1'] + Path: /child0/child1, Children: [], Nodes: ['/child0/child1/leaf2'] + >>> + >>> # Get a subtree view + >>> subtree = tstore.get_subtree("/child0") + >>> sorted(list(subtree.keys())) + ['/child1/leaf2', '/child2', '/leaf1'] + + """ + + # For some reason, we had to revert the explicit parametrisation of the + # constructor to make benchmarks working again. + def __init__(self, *args, _from_parent_store=None, **kwargs): + """Initialize TreeStore with subtree support. + + It supports the same arguments as :class:`blosc2.DictStore`. + """ + if _from_parent_store is not None: + # This is a subtree view, copy state from parent + self.__dict__.update(_from_parent_store.__dict__) + else: + # Call initialization and mark this storage as a b2tree object + super().__init__(*args, **kwargs, _storage_meta={"b2tree": {"version": 1}}) + + self.subtree_path = "" # Empty string means full tree + + def _is_vlmeta_key(self, key: str) -> bool: + """Check if a key is a vlmeta key that should be hidden from regular access.""" + return key.endswith("/__vlmeta__") + + def _translate_key_to_full(self, key: str) -> str: + """Translate subtree-relative key to full tree key.""" + if not self.subtree_path: + return key + if key == "/": + return self.subtree_path + else: + return self.subtree_path + key + + def _translate_key_from_full(self, full_key: str) -> str | None: + """Translate full tree key to subtree-relative key.""" + if not self.subtree_path: + return full_key + if full_key == self.subtree_path: + return "/" + elif full_key.startswith(self.subtree_path + "/"): + return full_key[len(self.subtree_path) :] + else: + # Key is not within this subtree + return None + + def _validate_key(self, key: str) -> str: + """Validate and normalize hierarchical key structure. + + Parameters + ---------- + key : str + The key to validate and normalize. + + Returns + ------- + normalized_key : str + The normalized key with leading '/' added if missing. + + Raises + ------ + ValueError + If key doesn't follow hierarchical rules. + """ + if not isinstance(key, str): + raise ValueError(f"Key must be a string, got {type(key)}") + + # Auto-add leading '/' if missing + if not key.startswith("/"): + key = "/" + key + + if key != "/" and key.endswith("/"): + raise ValueError(f"Key cannot end with '/' (except for root), got: {key}") + + if "//" in key: + raise ValueError(f"Key cannot contain empty path segments '//', got: {key}") + + # Additional validation for special characters that might cause issues + invalid_chars = ["\0", "\n", "\r", "\t"] + for char in invalid_chars: + if char in key: + raise ValueError(f"Key cannot contain invalid character {char!r}, got: {key}") + + return key + + def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None: + """Add a node with hierarchical key validation. + + Parameters + ---------- + key : str + Hierarchical node key. + value : np.ndarray or blosc2.NDArray or blosc2.C2Array or blosc2.SChunk + to store. + + Raises + ------ + ValueError + If key doesn't follow hierarchical structure rules, if trying to + assign to a structural path that already has children, or if trying + to add a child to a path that already contains data. + """ + key = self._validate_key(key) + + # Check if this key already has children (is a structural subtree) + children = self.get_children(key) + if children: + raise ValueError( + f"Cannot assign array to structural path '{key}' that already has children: {children}" + ) + + # Check if we're trying to add a child to a path that already has data + # Extract parent path from the key + if key != "/": + parent_path = "/".join(key.split("/")[:-1]) + if not parent_path: # Handle case where parent is root + parent_path = "/" + + full_parent_key = self._translate_key_to_full(parent_path) + if super().__contains__(full_parent_key): + raise ValueError( + f"Cannot add child '{key}' to path '{parent_path}' that already contains data" + ) + + full_key = self._translate_key_to_full(key) + super().__setitem__(full_key, value) + + def __getitem__(self, key: str) -> "NDArray | C2Array | SChunk | TreeStore": + """Retrieve a node or subtree view. + + If the key points to a subtree (intermediate path with children), + returns a TreeStore view of that subtree. If the key points to + a final node (leaf), returns the stored array or schunk. + + Parameters + ---------- + key : str + Hierarchical node key. + + Returns + ------- + out : blosc2.NDArray or blosc2.C2Array or blosc2.SChunk or TreeStore + The stored array/chunk if key is a leaf node, or a TreeStore subtree view + if key is an intermediate path with children. + + Raises + ------ + KeyError + If key is not found. + ValueError + If key doesn't follow hierarchical structure rules. + """ + key = self._validate_key(key) + if self._is_vlmeta_key(key): + raise KeyError(f"Key '{key}' not found; vlmeta keys are not directly accessible.") + + full_key = self._translate_key_to_full(key) + + # Check if this key has children (is a subtree) + children = self.get_children(key) + + # Check if the key exists as an actual data node + key_exists_as_data = super().__contains__(full_key) + + if children: + # If it has children, return a subtree view + return self.get_subtree(key) + elif key_exists_as_data: + # If no children but exists as data, it's a leaf node - get the actual data + return super().__getitem__(full_key) + else: + # Key doesn't exist at all + raise KeyError(f"Key '{key}' not found") + + def __delitem__(self, key: str) -> None: + """Remove a node or subtree. + + If the key points to a subtree (intermediate path with children), + removes all nodes in that subtree recursively. If the key points to a final + node (leaf), removes only that node. + + Parameters + ---------- + key : str + Hierarchical node key. + + Raises + ------ + KeyError + If key is not found. + ValueError + If key doesn't follow hierarchical structure rules. + """ + key = self._validate_key(key) + + if self._is_vlmeta_key(key): + raise KeyError(f"Key '{key}' not found; vlmeta keys are not directly accessible.") + + # Check if the key exists (either as data or as a structural node with descendants) + full_key = self._translate_key_to_full(key) + key_exists_as_data = super().__contains__(full_key) + descendants = self.get_descendants(key) + + if not key_exists_as_data and not descendants: + raise KeyError(f"Key '{key}' not found") + + # Collect all keys to delete (leaf nodes only, since structural nodes don't exist as data) + keys_to_delete = [] + + # If the key itself has data, include it + if key_exists_as_data: + keys_to_delete.append(key) + + # Add all descendant leaf nodes (only those that actually exist as data) + for descendant in descendants: + full_descendant_key = self._translate_key_to_full(descendant) + if super().__contains__(full_descendant_key): + keys_to_delete.append(descendant) + + # Delete all data keys in the subtree + for k in keys_to_delete: + full_key_to_delete = self._translate_key_to_full(k) + super().__delitem__(full_key_to_delete) + + def __contains__(self, key: str) -> bool: + """Check if a key exists. + + Parameters + ---------- + key : str + Hierarchical node key. + + Returns + ------- + exists : bool + True if key exists, False otherwise. + """ + try: + key = self._validate_key(key) + if self._is_vlmeta_key(key): + return False + full_key = self._translate_key_to_full(key) + return super().__contains__(full_key) + except ValueError: + return False + + def keys(self): + """Return all keys in the current subtree view.""" + if not self.subtree_path: + all_keys = set(super().keys()) + else: + all_keys = set() + for full_key in super().keys(): # noqa: SIM118 + relative_key = self._translate_key_from_full(full_key) + if relative_key is not None: + all_keys.add(relative_key) + + # Filter out vlmeta keys + all_keys = {key for key in all_keys if not self._is_vlmeta_key(key)} + + # Also include structural paths (intermediate nodes that have children but no data) + structural_keys = set() + for key in all_keys: + # For each leaf key, add all its parent paths + parts = key.split("/")[1:] # Remove empty first element from split + current_path = "" + for part in parts[:-1]: # Exclude the leaf itself + current_path = current_path + "/" + part if current_path else "/" + part + if current_path and current_path != "/" and current_path not in all_keys: + structural_keys.add(current_path) + + return all_keys | structural_keys + + def __iter__(self) -> Iterator[str]: + """Iterate over keys, excluding vlmeta keys.""" + return iter(self.keys()) + + def items(self) -> Iterator[tuple[str, "NDArray | C2Array | SChunk | TreeStore"]]: + """Return key-value pairs in the current subtree view.""" + for key in self.keys(): + yield key, self[key] + + def get_children(self, path: str) -> list[str]: + """Get direct children of a given path. + + Parameters + ---------- + path : str + The parent path to get children for. + + Returns + ------- + children : list[str] + List of direct child paths. + """ + path = self._validate_key(path) + + if path == "/": + prefix = "/" + else: + prefix = path + "/" + + prefix_len = len(prefix) + children_names = set() + + for key in self.keys(): + if self._is_vlmeta_key(key): + continue # Should be already filtered by self.keys(), but for safety + if key.startswith(prefix): + # e.g. key = /hierarchy/level1/data, prefix = /hierarchy/ + # rest = level1/data + rest = key[prefix_len:] + # child_name = level1 + child_name = rest.split("/")[0] + children_names.add(child_name) + + if path == "/": + return sorted(["/" + name for name in children_names]) + else: + return sorted([path + "/" + name for name in children_names]) + + def get_descendants(self, path: str) -> list[str]: + """Get all descendants of a given path. + + Parameters + ---------- + path : str + The parent path to get descendants for. + + Returns + ------- + descendants : list[str] + List of all descendant paths. + """ + path = self._validate_key(path) + + if path == "/": + prefix = "/" + else: + prefix = path + "/" + + descendants = set() + + # Get all leaf nodes under this path + for key in self.keys(): + if self._is_vlmeta_key(key): + continue # Should be already filtered by self.keys(), but for safety + if key.startswith(prefix) and key != path: + descendants.add(key) + + return sorted(descendants) + + def walk(self, path: str = "/", topdown: bool = True) -> Iterator[tuple[str, list[str], list[str]]]: + """Walk the tree structure. + + Similar to os.walk(), this visits all structural nodes in the hierarchy, + yielding information about each level. Returns relative names, not full paths. + + Parameters + ---------- + path : str, optional + The root path to start walking from. Default is "/". + topdown : bool, optional + If True (default), traverse top-down (yield parent before children). + If False, traverse bottom-up (yield children before parent), mimicking os.walk(topdown=False). + + Yields + ------ + path : str + Current path being walked. + children : list[str] + List of child directory names (structural nodes that have descendants). + These are just the names, not full paths. + nodes : list[str] + List of leaf node names (nodes that contain data). + These are just the names, not full paths. + + Examples + -------- + >>> for path, children, nodes in tstore.walk("/child0", topdown=True): + ... print(f"Path: {path}, Children: {children}, Nodes: {nodes}") + """ + path = self._validate_key(path) + + # Get all direct children of this path + direct_children = self.get_children(path) + + # Separate children into directories (have descendants) and leaf nodes + children_dirs = [] + leaf_nodes = [] + + for child in direct_children: + child_descendants = self.get_descendants(child) + if child_descendants: + # Extract just the name from the full path + child_name = child.split("/")[-1] + children_dirs.append(child_name) + else: + # Extract just the name from the full path + child_name = child.split("/")[-1] + leaf_nodes.append(child_name) + + # Validate and normalize names to ensure robustness + # 1) Enforce that returned names are simple (no '/') + children_dirs = [ + name for name in children_dirs if isinstance(name, str) and "/" not in name and name != "" + ] + leaf_nodes = [ + name for name in leaf_nodes if isinstance(name, str) and "/" not in name and name != "" + ] + + # 2) Ensure leaf nodes correspond to actual data nodes in the underlying store + valid_leaf_nodes: list[str] = [] + for name in leaf_nodes: + # Compose subtree-relative child path + child_rel_path = path + "/" + name if path != "/" else "/" + name + # Translate to full key in the backing store and verify it's a data node + full_key = self._translate_key_to_full(child_rel_path) + if super().__contains__(full_key): + valid_leaf_nodes.append(name) + leaf_nodes = valid_leaf_nodes + + if topdown: + # Yield current level first (pre-order) + yield path, children_dirs, leaf_nodes + + # Recursively walk child directories (structural nodes) + for child in direct_children: + child_descendants = self.get_descendants(child) + if child_descendants: + yield from self.walk(child, topdown=topdown) + + if not topdown: + # Yield current level after children (post-order) + yield path, children_dirs, leaf_nodes + + def get_subtree(self, path: str) -> "TreeStore": + """Create a subtree view with the specified path as root. + + Parameters + ---------- + path : str + The path that will become the root of the subtree view (relative to current subtree, + will be normalized to start with '/' if missing). + + Returns + ------- + subtree : TreeStore + A new TreeStore instance that presents the subtree as if `path` were the root. + + Examples + -------- + >>> tstore["/child0/child1/data"] = np.array([1, 2, 3]) + >>> tstore["/child0/child1/grandchild"] = np.array([4, 5, 6]) + >>> subtree = tstore.get_subtree("/child0/child1") + >>> list(subtree.keys()) + ['/data', '/grandchild'] + >>> subtree["/grandchild"][:] + array([4, 5, 6]) + + Notes + ----- + This is equivalent to `tstore[path]` when path is a structural path. + """ + path = self._validate_key(path) + full_path = self._translate_key_to_full(path) + + # Create a new TreeStore instance that shares the same underlying storage + # but with a different subtree_path + subtree = TreeStore(_from_parent_store=self) + subtree.subtree_path = full_path + + return subtree + + @property + def vlmeta(self) -> MutableMapping: + """Access variable-length metadata for the TreeStore or current subtree. + + Returns a proxy to the vlmeta attribute of an internal SChunk stored at + '/__vlmeta__' for the root tree, or '/__vlmeta__' for subtrees. + The SChunk is created on-demand if it doesn't exist. + + Notes + ----- + The metadata is stored as vlmeta of an internal SChunk, ensuring robust + serialization and persistence. This mirrors SChunk.vlmeta behavior, with + additional guarantees: + - Bulk get via `[:]` always returns a dict with string keys and decoded values. + - Read-only protection is enforced at the TreeStore level. + - Each subtree has its own independent vlmeta storage. + """ + # Create vlmeta key based on subtree_path + if not self.subtree_path: + # Root tree uses global vlmeta + vlmeta_key = "/__vlmeta__" + else: + # Subtree uses path-specific vlmeta: /__vlmeta__ + vlmeta_key = f"{self.subtree_path}/__vlmeta__" + + # Use super().__contains__ to bypass our own filtering logic + if super().__contains__(vlmeta_key): + # Load the current snapshot from the store to ensure freshness + self._vlmeta = super().__getitem__(vlmeta_key) + else: + # Create a new, empty SChunk in memory. It will be persisted on first write. + self._vlmeta = blosc2.SChunk() + + # Store the key for _persist_vlmeta method + self._vlmeta_key = vlmeta_key + + # Return a fresh proxy that wraps the latest inner vlmeta + return vlmetaProxy(self, self._vlmeta.vlmeta) + + def _persist_vlmeta(self) -> None: + """Persist current vlmeta SChunk into the store. + + This is needed because the EmbedStore keeps a serialized snapshot of + stored objects; mutating the in-memory SChunk does not automatically + update the snapshot. We emulate an update by deleting and re-adding + the object in the embed store. + """ + if hasattr(self, "_vlmeta_key"): + vlmeta_key = self._vlmeta_key + # Only embedded case is expected; handle it safely. + if hasattr(self, "_estore") and vlmeta_key in self._estore: + # Replace the stored snapshot + with contextlib.suppress(KeyError): + del self._estore[vlmeta_key] + self._estore[vlmeta_key] = self._vlmeta + + +if __name__ == "__main__": + # Example usage + localpath = "example_tstore.b2z" + + with TreeStore(localpath, mode="w") as tstore: + # Create a hierarchical structure. + # Note: data is stored in leaf nodes, not structural nodes. + tstore["/child0/data_node"] = np.array([1, 2, 3]) + tstore["/child0/child1/data_node"] = np.array([4, 5, 6]) + tstore["/child0/child2"] = np.array([7, 8, 9]) + tstore["/child0/child1/grandchild"] = np.array([10, 11, 12]) + tstore["/other"] = np.array([13, 14, 15]) + + print("TreeStore keys:", sorted(tstore.keys())) + + # Test subtree view + root_subtree = tstore["/child0"] + root_subtree.vlmeta["foo"] = "bar" + print("Subtree keys:", sorted(root_subtree.keys())) + print("Subtree vlmeta:", root_subtree.vlmeta) + + # Walk the tree + for path, children, nodes in root_subtree.walk("/"): + print(f"Path: {path}, Children: {children}, Nodes: {nodes}") + + # Clean up + if os.path.exists(localpath): + os.remove(localpath) diff --git a/venv/Lib/site-packages/blosc2/utils.py b/venv/Lib/site-packages/blosc2/utils.py new file mode 100644 index 0000000..6a72f2e --- /dev/null +++ b/venv/Lib/site-packages/blosc2/utils.py @@ -0,0 +1,1083 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +import ast +import builtins +import inspect +import math +import warnings +from itertools import product + +import ndindex +import numpy as np +from ndindex.subindex_helpers import ceiling +from numpy import broadcast_shapes + +import blosc2 + +# NumPy version and a convenient boolean flag +NUMPY_GE_2_0 = np.__version__ >= "2.0" +# handle different numpy versions +if NUMPY_GE_2_0: # array-api compliant + nplshift = np.bitwise_left_shift + nprshift = np.bitwise_right_shift + npbinvert = np.bitwise_invert + npvecdot = np.vecdot + nptranspose = np.permute_dims + if hasattr(np, "cumulative_sum"): + npcumsum = np.cumulative_sum + npcumprod = np.cumulative_prod + else: + npcumsum = np.cumsum + npcumprod = np.cumprod +else: # not array-api compliant + nplshift = np.left_shift + nprshift = np.right_shift + npbinvert = np.bitwise_not + nptranspose = np.transpose + npcumsum = np.cumsum + npcumprod = np.cumprod + + def npvecdot(a, b, axis=-1): + return np.einsum("...i,...i->...", np.moveaxis(np.conj(a), axis, -1), np.moveaxis(b, axis, -1)) + + +def _string_contains(a, b): + return np.char.find(a, b) >= 0 + + +def _string_startswith(a, b): + return np.char.startswith(a, b) + + +def _string_lower(a): + return np.char.lower(a) + + +def _string_upper(a): + return np.char.upper(a) + + +def _string_endswith(a, b): + return np.char.endswith(a, b) + + +def _format_expr_scalar(value): + if isinstance(value, np.generic): + value = value.item() + if isinstance(value, str | bytes): + return repr(value) + return value + + +global safe_numpy_globals +# Use numpy eval when running in WebAssembly +safe_numpy_globals = {"np": np} +# Add all first-level numpy functions +safe_numpy_globals.update( + {name: getattr(np, name) for name in dir(np) if callable(getattr(np, name)) and not name.startswith("_")} +) + +if not NUMPY_GE_2_0: # handle non-array-api compliance + safe_numpy_globals["acos"] = np.arccos + safe_numpy_globals["acosh"] = np.arccosh + safe_numpy_globals["asin"] = np.arcsin + safe_numpy_globals["asinh"] = np.arcsinh + safe_numpy_globals["atan"] = np.arctan + safe_numpy_globals["atanh"] = np.arctanh + safe_numpy_globals["atan2"] = np.arctan2 + safe_numpy_globals["permute_dims"] = np.transpose + safe_numpy_globals["pow"] = np.power + safe_numpy_globals["bitwise_left_shift"] = np.left_shift + safe_numpy_globals["bitwise_right_shift"] = np.right_shift + safe_numpy_globals["bitwise_invert"] = np.bitwise_not + safe_numpy_globals["concat"] = np.concatenate + safe_numpy_globals["matrix_transpose"] = np.transpose + safe_numpy_globals["vecdot"] = npvecdot + safe_numpy_globals["cumulative_sum"] = npcumsum + safe_numpy_globals["cumulative_prod"] = npcumprod + +# handle different naming conventions between numpy and blosc2 +safe_numpy_globals["contains"] = _string_contains +safe_numpy_globals["startswith"] = _string_startswith +safe_numpy_globals["endswith"] = _string_endswith +safe_numpy_globals["upper"] = _string_upper +safe_numpy_globals["lower"] = _string_lower + + +elementwise_funcs = [ + "abs", + "acos", + "acosh", + "add", + "arccos", + "arccosh", + "arcsin", + "arcsinh", + "arctan", + "arctan2", + "arctanh", + "asin", + "asinh", + "atan", + "atan2", + "atanh", + "bitwise_and", + "bitwise_invert", + "bitwise_left_shift", + "bitwise_or", + "bitwise_right_shift", + "bitwise_xor", + "broadcast_to", + "ceil", + "clip", + "conj", + "contains", + "copysign", + "cos", + "cosh", + "divide", + "endswith", + "equal", + "exp", + "expm1", + "floor", + "floor_divide", + "greater", + "greater_equal", + "hypot", + "imag", + "isfinite", + "isinf", + "isnan", + "less_equal", + "less", + "log", + "log1p", + "log2", + "log10", + "logaddexp", + "logical_and", + "logical_not", + "logical_or", + "logical_xor", + "lower", + "maximum", + "minimum", + "multiply", + "negative", + "nextafter", + "not_equal", + "positive", + "pow", + "real", + "reciprocal", + "remainder", + "round", + "sign", + "signbit", + "sin", + "sinh", + "sqrt", + "square", + "startswith", + "subtract", + "tan", + "tanh", + "trunc", + "upper", + "where", +] + +linalg_funcs = [ + "concat", + "diagonal", + "expand_dims", + "matmul", + "matrix_transpose", + "outer", + "permute_dims", + "squeeze", + "stack", + "tensordot", + "transpose", + "vecdot", +] + +linalg_attrs = ["T", "mT"] +reducers = [ + "sum", + "prod", + "min", + "max", + "std", + "mean", + "var", + "any", + "all", + "count_nonzero", + "argmax", + "argmin", + "cumulative_sum", + "cumulative_prod", +] + +# All the available constructors and reducers necessary for the (string) expression evaluator +constructors = [ + "asarray", + "arange", + "copy", + "linspace", + "fromiter", + "zeros", + "ones", + "empty", + "full", + "frombuffer", + "full_like", + "zeros_like", + "ones_like", + "empty_like", + "eye", + "nans", + "ndarray_from_cframe", + "uninit", + "meshgrid", +] + +# Note that, as reshape is accepted as a method too, it should always come last in the list +constructors += ["reshape"] + + +# --- Shape utilities --- +def linalg_shape(func_name, args, kwargs): # noqa: C901 + # --- Linear algebra and tensor manipulation --- + a = args[0] if args else None + if a is None or any(s is None for s in a): + return None + b = args[1] if len(args) > 1 else None + axis = kwargs.get("axis", None) + axes = kwargs.get("axes", None) + offset = kwargs.get("offset", 0) + + # --- concat --- + if func_name == "concat": + shapes = args[0] + if axis is None and len(args) > 1: + axis = args[1] + + # Coerce axis to int if tuple single-element + axis = 0 if axis is None else axis + # normalize negative axis + axis = axis + len(shapes[0]) if axis < 0 else axis + concat_dim = builtins.sum(s[axis] for s in shapes) + return tuple(s if i != axis else concat_dim for i, s in enumerate(shapes[0])) + + # --- diagonal --- + elif func_name == "diagonal": + axis1 = len(a) - 2 + axis2 = len(a) - 1 + new_shape = [d for i, d in enumerate(a) if i not in (axis1, axis2)] + d1, d2 = a[axis1], a[axis2] + diag_len = builtins.max(0, min(d1, d2) - abs(offset)) + new_shape.append(diag_len) + return tuple(new_shape) + + # --- expand_dims --- + elif func_name == "expand_dims": + # positional axis may be second positional argument + if axis is None and len(args) > 1: + axis = args[1] + if axis is None: + axis = 0 + axis = [axis] if isinstance(axis, int) else axis + new_shape = list(a) + for ax in sorted(axis): + ax = ax if ax >= 0 else len(new_shape) + ax + 1 + new_shape.insert(ax, 1) + return tuple(new_shape) + + # --- matmul --- + elif func_name == "matmul": + if b is None: + return None + x1_is_vector = False + x2_is_vector = False + if len(a) == 1: + a = (1,) + a # (N,) -> (1, N) + x1_is_vector = True + if len(b) == 1: + b += (1,) # (M,) -> (M, 1) + x2_is_vector = True + batch = broadcast_shapes(a[:-2], b[:-2]) + shape = batch + if not x1_is_vector: + shape += (a[-2],) + if not x2_is_vector: + shape += (b[-1],) + return shape + + # --- matrix_transpose --- + elif func_name == "matrix_transpose": + if len(a) < 2: + return a + return a[:-2] + (a[-1], a[-2]) + + # --- outer --- + elif func_name == "outer": + if b is None: + return None + return a + b + + # --- permute_dims --- + elif func_name == "permute_dims": + if axes is None and len(args) > 1: + axes = args[1] + if axes is None: + axes = tuple(reversed(range(len(a)))) + return tuple(a[i] for i in axes) + + # --- squeeze --- + elif func_name == "squeeze": + if axis is None and len(args) > 1: + axis = args[1] + if axis is None: + return tuple(d for d in a if d != 1) + if isinstance(axis, int): + axis = (axis,) + axis = tuple(ax if ax >= 0 else len(a) + ax for ax in axis) + return tuple(d for i, d in enumerate(a) if i not in axis or d != 1) + + # --- stack --- + elif func_name == "stack": + # detect axis as last positional if candidate + elems = args[0] + if axis is None and len(args) > 1: + axis = args[1] + if axis is None: + axis = 0 + return elems[0][:axis] + (len(elems),) + elems[0][axis:] + + # --- tensordot --- + elif func_name == "tensordot": + if axes is None and len(args) > 2: + axes = args[2] + if axes is None: + axes = 2 + if b is None: + return None + if isinstance(axes, int): + a_rest = a[:-axes] + b_rest = b[axes:] + else: + a_axes, b_axes = axes + a_rest = tuple(d for i, d in enumerate(a) if i not in a_axes) + b_rest = tuple(d for i, d in enumerate(b) if i not in b_axes) + return a_rest + b_rest + + # --- transpose --- + elif func_name in ("transpose", "T", "mT"): + return a[:-2] + (a[-1], a[-2]) + + # --- vecdot --- + elif func_name == "vecdot": + if axis is None and len(args) > 2: + axis = args[2] + if axis is None: + axis = -1 + if b is None: + return None + a_axis = axis + len(a) + b_axis = axis + len(b) + a_rem = tuple(d for i, d in enumerate(a) if i != a_axis) + b_rem = tuple(d for i, d in enumerate(b) if i != b_axis) + return broadcast_shapes(a_rem, b_rem) + else: + return None + + +def reduce_shape(shape, axis, keepdims): + """Reduce shape along given axis or axes (collapse dimensions).""" + if shape is None: + return None # unknown shape + + # full reduction + if axis is None: + return (1,) * len(shape) if keepdims else () + + # normalize to tuple + if isinstance(axis, int): + axes = (axis,) + else: + axes = tuple(axis) + + # normalize negative axes + axes = tuple(a + len(shape) if a < 0 else a for a in axes) + + if keepdims: + return tuple(d if i not in axes else 1 for i, d in enumerate(shape)) + else: + return tuple(d for i, d in enumerate(shape) if i not in axes) + + +def slice_shape(shape, slices): + """Infer shape after slicing.""" + if shape is None: + return None + result = [] + for dim, sl in zip(shape, slices, strict=False): + if isinstance(sl, int): # indexing removes the axis + continue + if isinstance(sl, slice): + start = sl.start or 0 + stop = sl.stop if sl.stop is not None else dim + step = sl.step or 1 + length = max(0, (stop - start + (step - 1)) // step) + result.append(length) + else: + raise ValueError(f"Unsupported slice type: {sl}") + result.extend(shape[len(slices) :]) # untouched trailing dims + return tuple(result) + + +def elementwise(*args): + """All args must broadcast elementwise.""" + if None in args: + return None + return broadcast_shapes(*args) + + +def cumulative_shape(x, axis=None, include_initial=False, out=None): + if axis is None: + if len(x) == 1: + axis = 0 + else: + raise ValueError("axis can only be None for 1D arrays") + return tuple(d + 1 if (i == axis and include_initial) else d for i, d in enumerate(x)) + + +# --- Function registry --- +REDUCTIONS = { # ignore out arg + func: cumulative_shape + if func in {"cumulative_sum", "cumulative_prod"} + else lambda x, axis=None, keepdims=False, out=None: reduce_shape(x, axis, keepdims) + for func in reducers + # any unknown function will default to elementwise +} + + +# --- AST Shape Inferencer --- +class ShapeInferencer(ast.NodeVisitor): + def __init__(self, shapes): + self.shapes = shapes + + def visit_Name(self, node): + if node.id not in self.shapes: + raise ValueError(f"Unknown symbol: {node.id}") + s = self.shapes[node.id] + if isinstance(s, tuple): + return s + else: # passed a scalar value + return () + + def visit_Attribute(self, node): + obj_shape = self.visit(node.value) + attr = node.attr + if attr == "reshape": + if node.args: + shape_arg = node.args[-1] + if isinstance(shape_arg, ast.Tuple): + return tuple(self._lookup_value(e) for e in shape_arg.elts) + return () + elif attr in ("T", "mT"): + return linalg_shape(attr, (obj_shape,), {}) + return None + + def visit_Call(self, node): # noqa : C901 + # Extract full function name (support np.func, blosc2.func) + func_name = getattr(node.func, "id", None) + attr_name = getattr(node.func, "attr", None) + module_name = getattr(getattr(node.func, "value", None), "id", None) + + # Handle namespaced calls like np.func or blosc2.func + if module_name in ("np", "blosc2"): + qualified_name = f"{module_name}.{attr_name}" + else: + qualified_name = attr_name or func_name + + base_name = qualified_name.split(".")[-1] + + # --- Recursive method-chain support --- + obj_shape = None + if isinstance(node.func, ast.Attribute) and module_name not in ( + "np", + "blosc2", + ): # check if genuine method and not module func + obj_shape = self.visit(node.func.value) + + args = [self.visit(arg) for arg in node.args] + # If it's a method call, prepend the object shape + if obj_shape is not None and attr_name == base_name: + args.insert(0, obj_shape) + + # --- Parse keyword args --- + kwargs = {} + for kw in node.keywords: + kwargs[kw.arg] = self._lookup_value(kw.value) + + # ------- handle linear algebra --------------- + if base_name in linalg_funcs: + return linalg_shape(base_name, args, kwargs) + + # ------- handle constructors --------------- + if base_name in constructors: + # shape kwarg directly provided + if "shape" in kwargs: + val = kwargs["shape"] + return val if isinstance(val, tuple) else (val,) + + # ---- array constructors like zeros, ones, full, etc. ---- + elif base_name in ( + "zeros", + "ones", + "empty", + "full", + "full_like", + "zeros_like", + "empty_like", + "ones_like", + "nans", + ): + if node.args: + shape_arg = node.args[0] + if isinstance(shape_arg, ast.Tuple): + shape = tuple(self._lookup_value(e) for e in shape_arg.elts) + elif isinstance(shape_arg, ast.Constant): + shape = (shape_arg.value,) + else: + shape = self._lookup_value(shape_arg) + shape = shape if isinstance(shape, tuple) else (shape,) + return shape + + # ---- arange ---- + elif base_name == "arange": + start = self._lookup_value(node.args[0]) if node.args else 0 + stop = self._lookup_value(node.args[1]) if len(node.args) > 1 else None + step = self._lookup_value(node.args[2]) if len(node.args) > 2 else 1 + shape = self._lookup_value(node.args[4]) if len(node.args) > 4 else kwargs.get("shape") + + if shape is not None: + return shape if isinstance(shape, tuple) else (shape,) + + # Fallback to numeric difference if possible + if stop is None: + stop, start = start, 0 + try: + NUM = max(math.ceil((stop - start) / step), 0) + except Exception: + # symbolic or non-numeric: unknown 1D + return ((),) + return (NUM,) + + # ---- linspace ---- + elif base_name == "linspace": + num = self._lookup_value(node.args[2]) if len(node.args) > 2 else kwargs.get("num") + shape = self._lookup_value(node.args[5]) if len(node.args) > 5 else kwargs.get("shape") + if shape is not None: + return shape if isinstance(shape, tuple) else (shape,) + if num is not None: + return (num,) + raise ValueError("linspace requires either shape or num argument") + + elif base_name in {"frombuffer", "fromiter"}: + count = kwargs.get("count") + return (count,) if count else () + + elif base_name == "eye": + N = self._lookup_value(node.args[0]) + M = self._lookup_value(node.args[1]) if len(node.args) > 1 else kwargs.get("M") + return (N, N) if M is None else (N, M) + + elif base_name == "reshape": + if node.args: + shape_arg = node.args[-1] + if isinstance(shape_arg, ast.Tuple): + return tuple(self._lookup_value(e) for e in shape_arg.elts) + return () + + else: + raise ValueError(f"Unrecognized constructor or missing shape argument for {func_name}") + + # --- Special-case .slice((slice(...), ...)) --- + if attr_name == "slice": + if not node.args: + raise ValueError(".slice() requires an argument") + slice_arg = node.args[0] + if isinstance(slice_arg, ast.Tuple): + slices = [self._eval_slice(s) for s in slice_arg.elts] + else: + slices = [self._eval_slice(slice_arg)] + return slice_shape(obj_shape, slices) + + if base_name in REDUCTIONS: + return REDUCTIONS[base_name](*args, **kwargs) + + shapes = [s for s in args if s is not None] + if base_name not in elementwise_funcs: + warnings.warn( + f"Function shape parser not implemented for {base_name}.", UserWarning, stacklevel=2 + ) + # default to elementwise but print warning that function not defined explicitly + return elementwise(*shapes) if shapes else () + + def visit_Compare(self, node): + shapes = [self.visit(node.left)] + [self.visit(c) for c in node.comparators] + return elementwise(*shapes) + + def visit_Constant(self, node): + return () if not hasattr(node.value, "shape") else node.value.shape + + def visit_Tuple(self, node): + return tuple(self.visit(arg) for arg in node.elts) + + def visit_List(self, node): + return self.visit_Tuple(node) + + def visit_BinOp(self, node): + left = self.visit(node.left) + right = self.visit(node.right) + return elementwise(left, right) + + def visit_UnaryOp(self, node): + return self.visit(node.operand) + + def _eval_slice(self, node): + if isinstance(node, ast.Slice): + return slice( + node.lower.value if node.lower else None, + node.upper.value if node.upper else None, + node.step.value if node.step else None, + ) + elif isinstance(node, ast.Call) and getattr(node.func, "id", None) == "slice": + # handle explicit slice() constructor + args = [a.value if isinstance(a, ast.Constant) else None for a in node.args] + return slice(*args) + elif isinstance(node, ast.Constant): + return node.value + else: + raise ValueError(f"Unsupported slice expression: {ast.dump(node)}") + + def _lookup_value(self, node): # noqa : C901 + """Look up a value in self.shapes if node is a variable name, else constant value.""" + # Name -> lookup in shapes mapping + if isinstance(node, ast.Name): + return self.shapes.get(node.id, None) + + # Constant -> return its value + if isinstance(node, ast.Constant): + return node.value + + # Tuple of constants / expressions + if isinstance(node, ast.Tuple): + vals = [] + for e in node.elts: + v = self._lookup_value(e) + vals.append(v) + return tuple(vals) + + # Unary operations (e.g. -1) + if isinstance(node, ast.UnaryOp): + # handle negative constants like -1 + if isinstance(node.op, ast.USub): + val = self._lookup_value(node.operand) + if isinstance(val, (int, float)): + return -val + # handle + (USub) if needed + if isinstance(node.op, ast.UAdd): + return self._lookup_value(node.operand) + return None + + # Simple binary ops with constant operands (e.g. 1+2) + if isinstance(node, ast.BinOp): + left = self._lookup_value(node.left) + right = self._lookup_value(node.right) + if left is None or right is None: + return None + try: + if isinstance(node.op, ast.Add): + return left + right + if isinstance(node.op, ast.Sub): + return left - right + if isinstance(node.op, ast.Mult): + return left * right + if isinstance(node.op, ast.FloorDiv): + return left // right + if isinstance(node.op, ast.Div): + return left / right + if isinstance(node.op, ast.Mod): + return left % right + except Exception: + return None + return None + + # fallback + return None + + +# --- Public API --- +def infer_shape(expr, shapes): + tree = ast.parse(expr, mode="eval") + inferencer = ShapeInferencer(shapes) + return inferencer.visit(tree.body) + + +class MyChunkRange: + def __init__(self, start, stop, step=1, n=1): + self.start = start + self.stop = stop + self.step = step + self.n = n + + def __iter__(self): + for k in range(math.ceil((self.stop - self.start) / self.step)): + yield (self.start + k * self.step) // self.n + + +def slice_to_chunktuple(s, n): + # Adapted from _slice_iter in ndindex.ChunkSize.as_subchunks. + start, stop, step = s.start, s.stop, s.step + if step < 0: + temp = stop + stop = start + 1 + start = temp + 1 + step = -step # get positive steps + if step > n: + return MyChunkRange(start, stop, step, n) + else: + return range(start // n, ceiling(stop, n)) + + +def _get_selection(ctuple, ptuple, chunks): + # we assume that at least one element of chunk intersects with the slice + # (as a consequence of only looping over intersecting chunks) + # ptuple is global slice, ctuple is chunk coords (in units of chunks) + pselection = () + for i, s, csize in zip(ctuple, ptuple, chunks, strict=True): + # we need to advance to first element within chunk that intersects with slice, not + # necessarily the first element of chunk + # i * csize = s.start + n*step + k, already added n+1 elements, k in [1, step] + if s.step > 0: + np1 = (i * csize - s.start + s.step - 1) // s.step # gives (n + 1) + # can have n = -1 if s.start > i * csize, but never < -1 since have to intersect with chunk + pselection += ( + slice( + builtins.max( + s.start, s.start + np1 * s.step + ), # start+(n+1)*step gives i*csize if k=step + builtins.min(csize * (i + 1), s.stop), + s.step, + ), + ) + else: + # (i + 1) * csize = s.start + n*step + k, already added n+1 elements, k in [step+1, 0] + np1 = ((i + 1) * csize - s.start + s.step) // s.step # gives (n + 1) + # can have n = -1 if s.start < (i + 1) * csize, but never < -1 since have to intersect with chunk + pselection += ( + slice( + builtins.min(s.start, s.start + np1 * s.step), # start+n*step gives (i+1)*csize if k=0 + builtins.max(csize * i - 1, s.stop), # want to include csize * i + s.step, + ), + ) + + # selection relative to coordinates of out (necessarily out_step = 1 as we work through out chunk-by-chunk of self) + # when added n + 1 elements + # ps.start = pt.start + step * (n+1) => n = (ps.start - pt.start - sign) // step + # hence, out_start = n + 1 + # ps.stop = pt.start + step * (out_stop - 1) + k, k in [step, -1] or [1, step] + # => out_stop = (ps.stop - pt.start - sign) // step + 1 + out_pselection = () + i = 0 + for ps, pt in zip(pselection, ptuple, strict=True): + sign_ = np.sign(pt.step) + n = (ps.start - pt.start - sign_) // pt.step + out_start = n + 1 + # ps.stop always positive except for case where get full array (it is then -1 since desire 0th element) + out_stop = None if ps.stop == -1 else (ps.stop - pt.start - sign_) // pt.step + 1 + out_pselection += ( + slice( + out_start, + out_stop, + 1, + ), + ) + i += 1 + + loc_selection = tuple( # is s.stop is None, get whole chunk so s.start - 0 + slice(0, s.stop - s.start, s.step) + if s.step > 0 + else slice(s.start if s.stop == -1 else s.start - s.stop, None, s.step) + for s in pselection + ) # local coords of loaded part of chunk + + return out_pselection, pselection, loc_selection + + +def _get_local_slice(prior_selection, post_selection, chunk_bounds): + chunk_begin, chunk_end = chunk_bounds + # +1 for negative steps as have to include start (exclude stop) + locbegin = np.hstack( + ( + [s.start if s.step > 0 else s.stop + 1 for s in prior_selection], + chunk_begin, + [s.start if s.step > 0 else s.stop + 1 for s in post_selection], + ), + casting="unsafe", + dtype="int64", + ) + locend = np.hstack( + ( + [s.stop if s.step > 0 else s.start + 1 for s in prior_selection], + chunk_end, + [s.stop if s.step > 0 else s.start + 1 for s in post_selection], + ), + casting="unsafe", + dtype="int64", + ) + return locbegin, locend + + +def _sliced_chunk_iter(chunks, idx, shape, axis=None, nchunk=False): + """ + If nchunk is True, retrun at iterator over the number of the chunk. + """ + ratio = np.ceil(np.asarray(shape) / np.asarray(chunks)).astype(np.int64) + idx = ndindex.ndindex(idx).expand(shape) + if axis is not None: + idx = tuple(a for i, a in enumerate(idx.args) if i != axis) + (idx.args[axis],) + chunks_ = tuple(a for i, a in enumerate(chunks) if i != axis) + (chunks[axis],) + else: + chunks_ = chunks + idx_iter = iter(idx) # iterate over tuple of slices in order + chunk_iter = iter(chunks_) # iterate over chunk_shape in order + + iters = [] + while True: + try: + i = next(idx_iter) # slice along axis + n = next(chunk_iter) # chunklen along dimension + except StopIteration: + break + if not isinstance(i, ndindex.Slice): + raise ValueError("Only slices may be used with axis arg") + + def _slice_iter(s, n): + a, N, m = s.args + if m > n: + yield from ((a + k * m) // n for k in range(ceiling(N - a, m))) + else: + yield from range(a // n, ceiling(N, n)) + + iters.append(_slice_iter(i, n)) + + def _indices(iters): + my_list = [ndindex.Slice(None, None)] * len(chunks) + for p in product(*iters): + # p increments over arg axis first before other axes + # p = (...., -1, axis) + if axis is None: + my_list = [ + ndindex.Slice(cs * ci, min(cs * (ci + 1), n), 1) + for n, cs, ci in zip(shape, chunks, p, strict=True) + ] + else: + my_list[:axis] = [ + ndindex.Slice(cs * ci, min(cs * (ci + 1), n), 1) + for n, cs, ci in zip(shape[:axis], chunks[:axis], p[:axis], strict=True) + ] + n, cs, ci = shape[axis], chunks[axis], p[-1] + my_list[axis] = ndindex.Slice(cs * ci, min(cs * (ci + 1), n), 1) + my_list[axis + 1 :] = [ + ndindex.Slice(cs * ci, min(cs * (ci + 1), n), 1) + for n, cs, ci in zip(shape[axis + 1 :], chunks[axis + 1 :], p[axis:-1], strict=True) + ] + if nchunk: + yield builtins.sum( + c.start // chunks[i] * np.prod(ratio[i + 1 :]) for i, c in enumerate(my_list) + ) + else: + yield ndindex.Tuple(*my_list) + + yield from _indices(iters) + + +def get_intersecting_chunks(idx, shape, chunks, axis=None): + if len(chunks) != len(shape): + raise ValueError("chunks must be same length as shape!") + if 0 in chunks: # chunk is whole array so just return full tuple to do loop once + return (ndindex.ndindex(...).expand(shape),) + chunk_size = ndindex.ChunkSize(chunks) + if axis is None: + return chunk_size.as_subchunks(idx, shape) # if _slice is (), returns all chunks + + # special algorithm to iterate over axis first (adapted from ndindex source) + return _sliced_chunk_iter(chunks, idx, shape, axis) + + +def get_chunks_idx(shape, chunks): + chunks_idx = tuple(math.ceil(s / c) for s, c in zip(shape, chunks, strict=True)) + nchunks = math.prod(chunks_idx) + return chunks_idx, nchunks + + +def process_key(key, shape): + key = ndindex.ndindex(key).expand(shape).raw + mask = tuple( + isinstance(k, int) for k in key + ) # mask to track dummy dims introduced by int -> slice(k, k+1) + key = tuple(slice(k, k + 1, None) if isinstance(k, int) else k for k in key) # key is slice, None, int + return key, mask + + +def is_inside_ne_evaluate() -> bool: + """ + Whether the current code is being executed from an ne_evaluate call + """ + # Get the current call stack + stack = inspect.stack() + return builtins.any(frame_info.function in {"ne_evaluate"} for frame_info in stack) + + +def _incomplete_lazyfunc(func) -> None: + """Decorator for lazy functions with incomplete numexpr/miniexpr coverage. + + This function will force eager execution when called from ne_evaluate. + + Returns + ------- + out: None + + Examples + -------- + .. code-block:: python + + @incomplete_lazyfunc() + def filler(inputs_tuple, output, offset): + output[:] = inputs_tuple[0] - inputs_tuple[1] + + """ + + def wrapper(*args, **kwargs): + if is_inside_ne_evaluate(): # haven't been able to use miniexpr so use numpy + return safe_numpy_globals[func.__name__](*args, **kwargs) + return func(*args, **kwargs) + + return wrapper + + +def check_smaller_shape(value_shape, shape, slice_shape, slice_): + """Check whether the shape of the value is smaller than the shape of the array. + + This follows the NumPy broadcasting rules. + """ + # slice_shape must be as long as shape + if len(slice_shape) != len(slice_): + raise ValueError("slice_shape must be as long as slice_") + no_nones_shape = tuple(sh for sh, s in zip(slice_shape, slice_, strict=True) if s is not None) + no_nones_slice = tuple(s for sh, s in zip(slice_shape, slice_, strict=True) if s is not None) + is_smaller_shape = any( + s > (1 if i >= len(value_shape) else value_shape[i]) for i, s in enumerate(no_nones_shape) + ) + slice_past_bounds = any( + s.stop > (1 if i >= len(value_shape) else value_shape[i]) for i, s in enumerate(no_nones_slice) + ) + return len(value_shape) < len(shape) or is_smaller_shape or slice_past_bounds + + +def _compute_smaller_slice(larger_shape, smaller_shape, larger_slice): + smaller_slice = [] + diff_dims = len(larger_shape) - len(smaller_shape) + + for i in range(len(larger_shape)): + if i < diff_dims: + # For leading dimensions of the larger array that the smaller array doesn't have, + # we don't add anything to the smaller slice + pass + else: + # For dimensions that both arrays have, the slice for the smaller array should be + # the same as the larger array unless the smaller array's size along that dimension + # is 1, in which case we use None to indicate the full slice + if smaller_shape[i - diff_dims] != 1: + smaller_slice.append(larger_slice[i]) + else: + smaller_slice.append(slice(0, larger_shape[i])) + + return tuple(smaller_slice) + + +# A more compact version of the function above, albeit less readable +def compute_smaller_slice(larger_shape, smaller_shape, larger_slice): + """ + Returns the slice of the smaller array that corresponds to the slice of the larger array. + """ + j_small = len(smaller_shape) - 1 + j_large = len(larger_shape) - 1 + smaller_shape_nones = [] + larger_shape_nones = [] + for s in reversed(larger_slice): + if s is None: + smaller_shape_nones.append(1) + larger_shape_nones.append(1) + else: + if j_small >= 0: + smaller_shape_nones.append(smaller_shape[j_small]) + j_small -= 1 + if j_large >= 0: + larger_shape_nones.append(larger_shape[j_large]) + j_large -= 1 + smaller_shape_nones.reverse() + larger_shape_nones.reverse() + diff_dims = len(larger_shape_nones) - len(smaller_shape_nones) + return tuple( + None + if larger_slice[i] is None + else ( + larger_slice[i] if smaller_shape_nones[i - diff_dims] != 1 else slice(0, larger_shape_nones[i]) + ) + for i in range(diff_dims, len(larger_shape_nones)) + ) + + +def _get_chunk_operands(operands, cslice, chunk_operands, shape): + # Get the starts and stops for the slice + cslice_shape = tuple(s.stop - s.start for s in cslice) + starts = [s.start if s.start is not None else 0 for s in cslice] + stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, cslice_shape, strict=True)] + unit_steps = np.all([s.step == 1 for s in cslice]) + # Get the slice of each operand + for key, value in operands.items(): + if np.isscalar(value): + chunk_operands[key] = value + continue + if value.shape == (): + chunk_operands[key] = value[()] + continue + if check_smaller_shape(value.shape, shape, cslice_shape, cslice): + # We need to fetch the part of the value that broadcasts with the operand + smaller_slice = compute_smaller_slice(shape, value.shape, cslice) + chunk_operands[key] = value[smaller_slice] + continue + # If key is in operands, we can reuse the buffer + if ( + key in chunk_operands + and cslice_shape == chunk_operands[key].shape + and isinstance(value, blosc2.NDArray) + and unit_steps + ): + value.get_slice_numpy(chunk_operands[key], (starts, stops)) + continue + chunk_operands[key] = value[cslice] diff --git a/venv/Lib/site-packages/blosc2/version.py b/venv/Lib/site-packages/blosc2/version.py new file mode 100644 index 0000000..2daba18 --- /dev/null +++ b/venv/Lib/site-packages/blosc2/version.py @@ -0,0 +1,2 @@ +__version__ = "4.1.2" +__array_api_version__ = "2024.12" diff --git a/venv/Lib/site-packages/cpuinfo/__init__.py b/venv/Lib/site-packages/cpuinfo/__init__.py new file mode 100644 index 0000000..f0ae1ce --- /dev/null +++ b/venv/Lib/site-packages/cpuinfo/__init__.py @@ -0,0 +1,5 @@ + +import sys +from cpuinfo.cpuinfo import * + + diff --git a/venv/Lib/site-packages/cpuinfo/__main__.py b/venv/Lib/site-packages/cpuinfo/__main__.py new file mode 100644 index 0000000..09ebf76 --- /dev/null +++ b/venv/Lib/site-packages/cpuinfo/__main__.py @@ -0,0 +1,5 @@ + +import cpuinfo + +cpuinfo.main() + diff --git a/venv/Lib/site-packages/cpuinfo/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/cpuinfo/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..84b4d32 Binary files /dev/null and b/venv/Lib/site-packages/cpuinfo/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/cpuinfo/__pycache__/__main__.cpython-311.pyc b/venv/Lib/site-packages/cpuinfo/__pycache__/__main__.cpython-311.pyc new file mode 100644 index 0000000..292e726 Binary files /dev/null and b/venv/Lib/site-packages/cpuinfo/__pycache__/__main__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/cpuinfo/__pycache__/cpuinfo.cpython-311.pyc b/venv/Lib/site-packages/cpuinfo/__pycache__/cpuinfo.cpython-311.pyc new file mode 100644 index 0000000..a5b24e5 Binary files /dev/null and b/venv/Lib/site-packages/cpuinfo/__pycache__/cpuinfo.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/cpuinfo/cpuinfo.py b/venv/Lib/site-packages/cpuinfo/cpuinfo.py new file mode 100644 index 0000000..ea2f90e --- /dev/null +++ b/venv/Lib/site-packages/cpuinfo/cpuinfo.py @@ -0,0 +1,2827 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +# Copyright (c) 2014-2022 Matthew Brennan Jones +# Py-cpuinfo gets CPU info with pure Python +# It uses the MIT License +# It is hosted at: https://github.com/workhorsy/py-cpuinfo +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +CPUINFO_VERSION = (9, 0, 0) +CPUINFO_VERSION_STRING = '.'.join([str(n) for n in CPUINFO_VERSION]) + +import os, sys +import platform +import multiprocessing +import ctypes + + +CAN_CALL_CPUID_IN_SUBPROCESS = True + +g_trace = None + + +class Trace(object): + def __init__(self, is_active, is_stored_in_string): + self._is_active = is_active + if not self._is_active: + return + + from datetime import datetime + from io import StringIO + + if is_stored_in_string: + self._output = StringIO() + else: + date = datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f") + self._output = open('cpuinfo_trace_{0}.trace'.format(date), 'w') + + self._stdout = StringIO() + self._stderr = StringIO() + self._err = None + + def header(self, msg): + if not self._is_active: return + + from inspect import stack + frame = stack()[1] + file = frame[1] + line = frame[2] + self._output.write("{0} ({1} {2})\n".format(msg, file, line)) + self._output.flush() + + def success(self): + if not self._is_active: return + + from inspect import stack + frame = stack()[1] + file = frame[1] + line = frame[2] + + self._output.write("Success ... ({0} {1})\n\n".format(file, line)) + self._output.flush() + + def fail(self, msg): + if not self._is_active: return + + from inspect import stack + frame = stack()[1] + file = frame[1] + line = frame[2] + + if isinstance(msg, str): + msg = ''.join(['\t' + line for line in msg.split('\n')]) + '\n' + + self._output.write(msg) + self._output.write("Failed ... ({0} {1})\n\n".format(file, line)) + self._output.flush() + elif isinstance(msg, Exception): + from traceback import format_exc + err_string = format_exc() + self._output.write("\tFailed ... ({0} {1})\n".format(file, line)) + self._output.write(''.join(['\t\t{0}\n'.format(n) for n in err_string.split('\n')]) + '\n') + self._output.flush() + + def command_header(self, msg): + if not self._is_active: return + + from inspect import stack + frame = stack()[3] + file = frame[1] + line = frame[2] + self._output.write("\t{0} ({1} {2})\n".format(msg, file, line)) + self._output.flush() + + def command_output(self, msg, output): + if not self._is_active: return + + self._output.write("\t\t{0}\n".format(msg)) + self._output.write(''.join(['\t\t\t{0}\n'.format(n) for n in output.split('\n')]) + '\n') + self._output.flush() + + def keys(self, keys, info, new_info): + if not self._is_active: return + + from inspect import stack + frame = stack()[2] + file = frame[1] + line = frame[2] + + # List updated keys + self._output.write("\tChanged keys ({0} {1})\n".format(file, line)) + changed_keys = [key for key in keys if key in info and key in new_info and info[key] != new_info[key]] + if changed_keys: + for key in changed_keys: + self._output.write('\t\t{0}: {1} to {2}\n'.format(key, info[key], new_info[key])) + else: + self._output.write('\t\tNone\n') + + # List new keys + self._output.write("\tNew keys ({0} {1})\n".format(file, line)) + new_keys = [key for key in keys if key in new_info and key not in info] + if new_keys: + for key in new_keys: + self._output.write('\t\t{0}: {1}\n'.format(key, new_info[key])) + else: + self._output.write('\t\tNone\n') + + self._output.write('\n') + self._output.flush() + + def write(self, msg): + if not self._is_active: return + + self._output.write(msg + '\n') + self._output.flush() + + def to_dict(self, info, is_fail): + return { + 'output' : self._output.getvalue(), + 'stdout' : self._stdout.getvalue(), + 'stderr' : self._stderr.getvalue(), + 'info' : info, + 'err' : self._err, + 'is_fail' : is_fail + } + +class DataSource(object): + bits = platform.architecture()[0] + cpu_count = multiprocessing.cpu_count() + is_windows = platform.system().lower() == 'windows' + arch_string_raw = platform.machine() + uname_string_raw = platform.uname()[5] + can_cpuid = True + + @staticmethod + def has_proc_cpuinfo(): + return os.path.exists('/proc/cpuinfo') + + @staticmethod + def has_dmesg(): + return len(_program_paths('dmesg')) > 0 + + @staticmethod + def has_var_run_dmesg_boot(): + uname = platform.system().strip().strip('"').strip("'").strip().lower() + return 'linux' in uname and os.path.exists('/var/run/dmesg.boot') + + @staticmethod + def has_cpufreq_info(): + return len(_program_paths('cpufreq-info')) > 0 + + @staticmethod + def has_sestatus(): + return len(_program_paths('sestatus')) > 0 + + @staticmethod + def has_sysctl(): + return len(_program_paths('sysctl')) > 0 + + @staticmethod + def has_isainfo(): + return len(_program_paths('isainfo')) > 0 + + @staticmethod + def has_kstat(): + return len(_program_paths('kstat')) > 0 + + @staticmethod + def has_sysinfo(): + uname = platform.system().strip().strip('"').strip("'").strip().lower() + is_beos = 'beos' in uname or 'haiku' in uname + return is_beos and len(_program_paths('sysinfo')) > 0 + + @staticmethod + def has_lscpu(): + return len(_program_paths('lscpu')) > 0 + + @staticmethod + def has_ibm_pa_features(): + return len(_program_paths('lsprop')) > 0 + + @staticmethod + def has_wmic(): + returncode, output = _run_and_get_stdout(['wmic', 'os', 'get', 'Version']) + return returncode == 0 and len(output) > 0 + + @staticmethod + def cat_proc_cpuinfo(): + return _run_and_get_stdout(['cat', '/proc/cpuinfo']) + + @staticmethod + def cpufreq_info(): + return _run_and_get_stdout(['cpufreq-info']) + + @staticmethod + def sestatus_b(): + return _run_and_get_stdout(['sestatus', '-b']) + + @staticmethod + def dmesg_a(): + return _run_and_get_stdout(['dmesg', '-a']) + + @staticmethod + def cat_var_run_dmesg_boot(): + return _run_and_get_stdout(['cat', '/var/run/dmesg.boot']) + + @staticmethod + def sysctl_machdep_cpu_hw_cpufrequency(): + return _run_and_get_stdout(['sysctl', 'machdep.cpu', 'hw.cpufrequency']) + + @staticmethod + def isainfo_vb(): + return _run_and_get_stdout(['isainfo', '-vb']) + + @staticmethod + def kstat_m_cpu_info(): + return _run_and_get_stdout(['kstat', '-m', 'cpu_info']) + + @staticmethod + def sysinfo_cpu(): + return _run_and_get_stdout(['sysinfo', '-cpu']) + + @staticmethod + def lscpu(): + return _run_and_get_stdout(['lscpu']) + + @staticmethod + def ibm_pa_features(): + import glob + + ibm_features = glob.glob('/proc/device-tree/cpus/*/ibm,pa-features') + if ibm_features: + return _run_and_get_stdout(['lsprop', ibm_features[0]]) + + @staticmethod + def wmic_cpu(): + return _run_and_get_stdout(['wmic', 'cpu', 'get', 'Name,CurrentClockSpeed,L2CacheSize,L3CacheSize,Description,Caption,Manufacturer', '/format:list']) + + @staticmethod + def winreg_processor_brand(): + processor_brand = _read_windows_registry_key(r"Hardware\Description\System\CentralProcessor\0", "ProcessorNameString") + return processor_brand.strip() + + @staticmethod + def winreg_vendor_id_raw(): + vendor_id_raw = _read_windows_registry_key(r"Hardware\Description\System\CentralProcessor\0", "VendorIdentifier") + return vendor_id_raw + + @staticmethod + def winreg_arch_string_raw(): + arch_string_raw = _read_windows_registry_key(r"SYSTEM\CurrentControlSet\Control\Session Manager\Environment", "PROCESSOR_ARCHITECTURE") + return arch_string_raw + + @staticmethod + def winreg_hz_actual(): + hz_actual = _read_windows_registry_key(r"Hardware\Description\System\CentralProcessor\0", "~Mhz") + hz_actual = _to_decimal_string(hz_actual) + return hz_actual + + @staticmethod + def winreg_feature_bits(): + feature_bits = _read_windows_registry_key(r"Hardware\Description\System\CentralProcessor\0", "FeatureSet") + return feature_bits + + +def _program_paths(program_name): + paths = [] + exts = filter(None, os.environ.get('PATHEXT', '').split(os.pathsep)) + for p in os.environ['PATH'].split(os.pathsep): + p = os.path.join(p, program_name) + if os.access(p, os.X_OK): + paths.append(p) + for e in exts: + pext = p + e + if os.access(pext, os.X_OK): + paths.append(pext) + return paths + +def _run_and_get_stdout(command, pipe_command=None): + from subprocess import Popen, PIPE + + g_trace.command_header('Running command "' + ' '.join(command) + '" ...') + + # Run the command normally + if not pipe_command: + p1 = Popen(command, stdout=PIPE, stderr=PIPE, stdin=PIPE) + # Run the command and pipe it into another command + else: + p2 = Popen(command, stdout=PIPE, stderr=PIPE, stdin=PIPE) + p1 = Popen(pipe_command, stdin=p2.stdout, stdout=PIPE, stderr=PIPE) + p2.stdout.close() + + # Get the stdout and stderr + stdout_output, stderr_output = p1.communicate() + stdout_output = stdout_output.decode(encoding='UTF-8') + stderr_output = stderr_output.decode(encoding='UTF-8') + + # Send the result to the logger + g_trace.command_output('return code:', str(p1.returncode)) + g_trace.command_output('stdout:', stdout_output) + + # Return the return code and stdout + return p1.returncode, stdout_output + +def _read_windows_registry_key(key_name, field_name): + g_trace.command_header('Reading Registry key "{0}" field "{1}" ...'.format(key_name, field_name)) + + try: + import _winreg as winreg + except ImportError as err: + try: + import winreg + except ImportError as err: + pass + + key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, key_name) + value = winreg.QueryValueEx(key, field_name)[0] + winreg.CloseKey(key) + g_trace.command_output('value:', str(value)) + return value + +# Make sure we are running on a supported system +def _check_arch(): + arch, bits = _parse_arch(DataSource.arch_string_raw) + if not arch in ['X86_32', 'X86_64', 'ARM_7', 'ARM_8', + 'PPC_64', 'S390X', 'MIPS_32', 'MIPS_64', + "RISCV_32", "RISCV_64"]: + raise Exception("py-cpuinfo currently only works on X86 " + "and some ARM/PPC/S390X/MIPS/RISCV CPUs.") + +def _obj_to_b64(thing): + import pickle + import base64 + + a = thing + b = pickle.dumps(a) + c = base64.b64encode(b) + d = c.decode('utf8') + return d + +def _b64_to_obj(thing): + import pickle + import base64 + + try: + a = base64.b64decode(thing) + b = pickle.loads(a) + return b + except Exception: + return {} + +def _utf_to_str(input): + if isinstance(input, list): + return [_utf_to_str(element) for element in input] + elif isinstance(input, dict): + return {_utf_to_str(key): _utf_to_str(value) + for key, value in input.items()} + else: + return input + +def _copy_new_fields(info, new_info): + keys = [ + 'vendor_id_raw', 'hardware_raw', 'brand_raw', 'hz_advertised_friendly', 'hz_actual_friendly', + 'hz_advertised', 'hz_actual', 'arch', 'bits', 'count', + 'arch_string_raw', 'uname_string_raw', + 'l2_cache_size', 'l2_cache_line_size', 'l2_cache_associativity', + 'stepping', 'model', 'family', + 'processor_type', 'flags', + 'l3_cache_size', 'l1_data_cache_size', 'l1_instruction_cache_size' + ] + + g_trace.keys(keys, info, new_info) + + # Update the keys with new values + for key in keys: + if new_info.get(key, None) and not info.get(key, None): + info[key] = new_info[key] + elif key == 'flags' and new_info.get('flags'): + for f in new_info['flags']: + if f not in info['flags']: info['flags'].append(f) + info['flags'].sort() + +def _get_field_actual(cant_be_number, raw_string, field_names): + for line in raw_string.splitlines(): + for field_name in field_names: + field_name = field_name.lower() + if ':' in line: + left, right = line.split(':', 1) + left = left.strip().lower() + right = right.strip() + if left == field_name and len(right) > 0: + if cant_be_number: + if not right.isdigit(): + return right + else: + return right + + return None + +def _get_field(cant_be_number, raw_string, convert_to, default_value, *field_names): + retval = _get_field_actual(cant_be_number, raw_string, field_names) + + # Convert the return value + if retval and convert_to: + try: + retval = convert_to(retval) + except Exception: + retval = default_value + + # Return the default if there is no return value + if retval is None: + retval = default_value + + return retval + +def _to_decimal_string(ticks): + try: + # Convert to string + ticks = '{0}'.format(ticks) + # Sometimes ',' is used as a decimal separator + ticks = ticks.replace(',', '.') + + # Strip off non numbers and decimal places + ticks = "".join(n for n in ticks if n.isdigit() or n=='.').strip() + if ticks == '': + ticks = '0' + + # Add decimal if missing + if '.' not in ticks: + ticks = '{0}.0'.format(ticks) + + # Remove trailing zeros + ticks = ticks.rstrip('0') + + # Add one trailing zero for empty right side + if ticks.endswith('.'): + ticks = '{0}0'.format(ticks) + + # Make sure the number can be converted to a float + ticks = float(ticks) + ticks = '{0}'.format(ticks) + return ticks + except Exception: + return '0.0' + +def _hz_short_to_full(ticks, scale): + try: + # Make sure the number can be converted to a float + ticks = float(ticks) + ticks = '{0}'.format(ticks) + + # Scale the numbers + hz = ticks.lstrip('0') + old_index = hz.index('.') + hz = hz.replace('.', '') + hz = hz.ljust(scale + old_index+1, '0') + new_index = old_index + scale + hz = '{0}.{1}'.format(hz[:new_index], hz[new_index:]) + left, right = hz.split('.') + left, right = int(left), int(right) + return (left, right) + except Exception: + return (0, 0) + +def _hz_friendly_to_full(hz_string): + try: + hz_string = hz_string.strip().lower() + hz, scale = (None, None) + + if hz_string.endswith('ghz'): + scale = 9 + elif hz_string.endswith('mhz'): + scale = 6 + elif hz_string.endswith('hz'): + scale = 0 + + hz = "".join(n for n in hz_string if n.isdigit() or n=='.').strip() + if not '.' in hz: + hz += '.0' + + hz, scale = _hz_short_to_full(hz, scale) + + return (hz, scale) + except Exception: + return (0, 0) + +def _hz_short_to_friendly(ticks, scale): + try: + # Get the raw Hz as a string + left, right = _hz_short_to_full(ticks, scale) + result = '{0}.{1}'.format(left, right) + + # Get the location of the dot, and remove said dot + dot_index = result.index('.') + result = result.replace('.', '') + + # Get the Hz symbol and scale + symbol = "Hz" + scale = 0 + if dot_index > 9: + symbol = "GHz" + scale = 9 + elif dot_index > 6: + symbol = "MHz" + scale = 6 + elif dot_index > 3: + symbol = "KHz" + scale = 3 + + # Get the Hz with the dot at the new scaled point + result = '{0}.{1}'.format(result[:-scale-1], result[-scale-1:]) + + # Format the ticks to have 4 numbers after the decimal + # and remove any superfluous zeroes. + result = '{0:.4f} {1}'.format(float(result), symbol) + result = result.rstrip('0') + return result + except Exception: + return '0.0000 Hz' + +def _to_friendly_bytes(input): + import re + + if not input: + return input + input = "{0}".format(input) + + formats = { + r"^[0-9]+B$" : 'B', + r"^[0-9]+K$" : 'KB', + r"^[0-9]+M$" : 'MB', + r"^[0-9]+G$" : 'GB' + } + + for pattern, friendly_size in formats.items(): + if re.match(pattern, input): + return "{0} {1}".format(input[ : -1].strip(), friendly_size) + + return input + +def _friendly_bytes_to_int(friendly_bytes): + input = friendly_bytes.lower() + + formats = [ + {'gib' : 1024 * 1024 * 1024}, + {'mib' : 1024 * 1024}, + {'kib' : 1024}, + + {'gb' : 1024 * 1024 * 1024}, + {'mb' : 1024 * 1024}, + {'kb' : 1024}, + + {'g' : 1024 * 1024 * 1024}, + {'m' : 1024 * 1024}, + {'k' : 1024}, + {'b' : 1}, + ] + + try: + for entry in formats: + pattern = list(entry.keys())[0] + multiplier = list(entry.values())[0] + if input.endswith(pattern): + return int(input.split(pattern)[0].strip()) * multiplier + + except Exception as err: + pass + + return friendly_bytes + +def _parse_cpu_brand_string(cpu_string): + # Just return 0 if the processor brand does not have the Hz + if not 'hz' in cpu_string.lower(): + return ('0.0', 0) + + hz = cpu_string.lower() + scale = 0 + + if hz.endswith('mhz'): + scale = 6 + elif hz.endswith('ghz'): + scale = 9 + if '@' in hz: + hz = hz.split('@')[1] + else: + hz = hz.rsplit(None, 1)[1] + + hz = hz.rstrip('mhz').rstrip('ghz').strip() + hz = _to_decimal_string(hz) + + return (hz, scale) + +def _parse_cpu_brand_string_dx(cpu_string): + import re + + # Find all the strings inside brackets () + starts = [m.start() for m in re.finditer(r"\(", cpu_string)] + ends = [m.start() for m in re.finditer(r"\)", cpu_string)] + insides = {k: v for k, v in zip(starts, ends)} + insides = [cpu_string[start+1 : end] for start, end in insides.items()] + + # Find all the fields + vendor_id, stepping, model, family = (None, None, None, None) + for inside in insides: + for pair in inside.split(','): + pair = [n.strip() for n in pair.split(':')] + if len(pair) > 1: + name, value = pair[0], pair[1] + if name == 'origin': + vendor_id = value.strip('"') + elif name == 'stepping': + stepping = int(value.lstrip('0x'), 16) + elif name == 'model': + model = int(value.lstrip('0x'), 16) + elif name in ['fam', 'family']: + family = int(value.lstrip('0x'), 16) + + # Find the Processor Brand + # Strip off extra strings in brackets at end + brand = cpu_string.strip() + is_working = True + while is_working: + is_working = False + for inside in insides: + full = "({0})".format(inside) + if brand.endswith(full): + brand = brand[ :-len(full)].strip() + is_working = True + + # Find the Hz in the brand string + hz_brand, scale = _parse_cpu_brand_string(brand) + + # Find Hz inside brackets () after the brand string + if hz_brand == '0.0': + for inside in insides: + hz = inside + for entry in ['GHz', 'MHz', 'Hz']: + if entry in hz: + hz = "CPU @ " + hz[ : hz.find(entry) + len(entry)] + hz_brand, scale = _parse_cpu_brand_string(hz) + break + + return (hz_brand, scale, brand, vendor_id, stepping, model, family) + +def _parse_dmesg_output(output): + try: + # Get all the dmesg lines that might contain a CPU string + lines = output.split(' CPU0:')[1:] + \ + output.split(' CPU1:')[1:] + \ + output.split(' CPU:')[1:] + \ + output.split('\nCPU0:')[1:] + \ + output.split('\nCPU1:')[1:] + \ + output.split('\nCPU:')[1:] + lines = [l.split('\n')[0].strip() for l in lines] + + # Convert the lines to CPU strings + cpu_strings = [_parse_cpu_brand_string_dx(l) for l in lines] + + # Find the CPU string that has the most fields + best_string = None + highest_count = 0 + for cpu_string in cpu_strings: + count = sum([n is not None for n in cpu_string]) + if count > highest_count: + highest_count = count + best_string = cpu_string + + # If no CPU string was found, return {} + if not best_string: + return {} + + hz_actual, scale, processor_brand, vendor_id, stepping, model, family = best_string + + # Origin + if ' Origin=' in output: + fields = output[output.find(' Origin=') : ].split('\n')[0] + fields = fields.strip().split() + fields = [n.strip().split('=') for n in fields] + fields = [{n[0].strip().lower() : n[1].strip()} for n in fields] + + for field in fields: + name = list(field.keys())[0] + value = list(field.values())[0] + + if name == 'origin': + vendor_id = value.strip('"') + elif name == 'stepping': + stepping = int(value.lstrip('0x'), 16) + elif name == 'model': + model = int(value.lstrip('0x'), 16) + elif name in ['fam', 'family']: + family = int(value.lstrip('0x'), 16) + + # Features + flag_lines = [] + for category in [' Features=', ' Features2=', ' AMD Features=', ' AMD Features2=']: + if category in output: + flag_lines.append(output.split(category)[1].split('\n')[0]) + + flags = [] + for line in flag_lines: + line = line.split('<')[1].split('>')[0].lower() + for flag in line.split(','): + flags.append(flag) + flags.sort() + + # Convert from GHz/MHz string to Hz + hz_advertised, scale = _parse_cpu_brand_string(processor_brand) + + # If advertised hz not found, use the actual hz + if hz_advertised == '0.0': + scale = 6 + hz_advertised = _to_decimal_string(hz_actual) + + info = { + 'vendor_id_raw' : vendor_id, + 'brand_raw' : processor_brand, + + 'stepping' : stepping, + 'model' : model, + 'family' : family, + 'flags' : flags + } + + if hz_advertised and hz_advertised != '0.0': + info['hz_advertised_friendly'] = _hz_short_to_friendly(hz_advertised, scale) + info['hz_actual_friendly'] = _hz_short_to_friendly(hz_actual, scale) + + if hz_advertised and hz_advertised != '0.0': + info['hz_advertised'] = _hz_short_to_full(hz_advertised, scale) + info['hz_actual'] = _hz_short_to_full(hz_actual, scale) + + return {k: v for k, v in info.items() if v} + except Exception as err: + g_trace.fail(err) + #raise + + return {} + +def _parse_arch(arch_string_raw): + import re + + arch, bits = None, None + arch_string_raw = arch_string_raw.lower() + + # X86 + if re.match(r'^i\d86$|^x86$|^x86_32$|^i86pc$|^ia32$|^ia-32$|^bepc$', arch_string_raw): + arch = 'X86_32' + bits = 32 + elif re.match(r'^x64$|^x86_64$|^x86_64t$|^i686-64$|^amd64$|^ia64$|^ia-64$', arch_string_raw): + arch = 'X86_64' + bits = 64 + # ARM + elif re.match(r'^armv8-a|aarch64|arm64$', arch_string_raw): + arch = 'ARM_8' + bits = 64 + elif re.match(r'^armv7$|^armv7[a-z]$|^armv7-[a-z]$|^armv6[a-z]$', arch_string_raw): + arch = 'ARM_7' + bits = 32 + elif re.match(r'^armv8$|^armv8[a-z]$|^armv8-[a-z]$', arch_string_raw): + arch = 'ARM_8' + bits = 32 + # PPC + elif re.match(r'^ppc32$|^prep$|^pmac$|^powermac$', arch_string_raw): + arch = 'PPC_32' + bits = 32 + elif re.match(r'^powerpc$|^ppc64$|^ppc64le$', arch_string_raw): + arch = 'PPC_64' + bits = 64 + # SPARC + elif re.match(r'^sparc32$|^sparc$', arch_string_raw): + arch = 'SPARC_32' + bits = 32 + elif re.match(r'^sparc64$|^sun4u$|^sun4v$', arch_string_raw): + arch = 'SPARC_64' + bits = 64 + # S390X + elif re.match(r'^s390x$', arch_string_raw): + arch = 'S390X' + bits = 64 + elif arch_string_raw == 'mips': + arch = 'MIPS_32' + bits = 32 + elif arch_string_raw == 'mips64': + arch = 'MIPS_64' + bits = 64 + # RISCV + elif re.match(r'^riscv$|^riscv32$|^riscv32be$', arch_string_raw): + arch = 'RISCV_32' + bits = 32 + elif re.match(r'^riscv64$|^riscv64be$', arch_string_raw): + arch = 'RISCV_64' + bits = 64 + + return (arch, bits) + +def _is_bit_set(reg, bit): + mask = 1 << bit + is_set = reg & mask > 0 + return is_set + + +def _is_selinux_enforcing(trace): + # Just return if the SE Linux Status Tool is not installed + if not DataSource.has_sestatus(): + trace.fail('Failed to find sestatus.') + return False + + # Run the sestatus, and just return if it failed to run + returncode, output = DataSource.sestatus_b() + if returncode != 0: + trace.fail('Failed to run sestatus. Skipping ...') + return False + + # Figure out if explicitly in enforcing mode + for line in output.splitlines(): + line = line.strip().lower() + if line.startswith("current mode:"): + if line.endswith("enforcing"): + return True + else: + return False + + # Figure out if we can execute heap and execute memory + can_selinux_exec_heap = False + can_selinux_exec_memory = False + for line in output.splitlines(): + line = line.strip().lower() + if line.startswith("allow_execheap") and line.endswith("on"): + can_selinux_exec_heap = True + elif line.startswith("allow_execmem") and line.endswith("on"): + can_selinux_exec_memory = True + + trace.command_output('can_selinux_exec_heap:', can_selinux_exec_heap) + trace.command_output('can_selinux_exec_memory:', can_selinux_exec_memory) + + return (not can_selinux_exec_heap or not can_selinux_exec_memory) + +def _filter_dict_keys_with_empty_values(info, acceptable_values = {}): + filtered_info = {} + for key in info: + value = info[key] + + # Keep if value is acceptable + if key in acceptable_values: + if acceptable_values[key] == value: + filtered_info[key] = value + continue + + # Filter out None, 0, "", (), {}, [] + if not value: + continue + + # Filter out (0, 0) + if value == (0, 0): + continue + + # Filter out -1 + if value == -1: + continue + + # Filter out strings that start with "0.0" + if type(value) == str and value.startswith('0.0'): + continue + + filtered_info[key] = value + + return filtered_info + +class ASM(object): + def __init__(self, restype=None, argtypes=(), machine_code=[]): + self.restype = restype + self.argtypes = argtypes + self.machine_code = machine_code + self.prochandle = None + self.mm = None + self.func = None + self.address = None + self.size = 0 + + def compile(self): + machine_code = bytes.join(b'', self.machine_code) + self.size = ctypes.c_size_t(len(machine_code)) + + if DataSource.is_windows: + # Allocate a memory segment the size of the machine code, and make it executable + size = len(machine_code) + # Alloc at least 1 page to ensure we own all pages that we want to change protection on + if size < 0x1000: size = 0x1000 + MEM_COMMIT = ctypes.c_ulong(0x1000) + PAGE_READWRITE = ctypes.c_ulong(0x4) + pfnVirtualAlloc = ctypes.windll.kernel32.VirtualAlloc + pfnVirtualAlloc.restype = ctypes.c_void_p + self.address = pfnVirtualAlloc(None, ctypes.c_size_t(size), MEM_COMMIT, PAGE_READWRITE) + if not self.address: + raise Exception("Failed to VirtualAlloc") + + # Copy the machine code into the memory segment + memmove = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t)(ctypes._memmove_addr) + if memmove(self.address, machine_code, size) < 0: + raise Exception("Failed to memmove") + + # Enable execute permissions + PAGE_EXECUTE = ctypes.c_ulong(0x10) + old_protect = ctypes.c_ulong(0) + pfnVirtualProtect = ctypes.windll.kernel32.VirtualProtect + res = pfnVirtualProtect(ctypes.c_void_p(self.address), ctypes.c_size_t(size), PAGE_EXECUTE, ctypes.byref(old_protect)) + if not res: + raise Exception("Failed VirtualProtect") + + # Flush Instruction Cache + # First, get process Handle + if not self.prochandle: + pfnGetCurrentProcess = ctypes.windll.kernel32.GetCurrentProcess + pfnGetCurrentProcess.restype = ctypes.c_void_p + self.prochandle = ctypes.c_void_p(pfnGetCurrentProcess()) + # Actually flush cache + res = ctypes.windll.kernel32.FlushInstructionCache(self.prochandle, ctypes.c_void_p(self.address), ctypes.c_size_t(size)) + if not res: + raise Exception("Failed FlushInstructionCache") + else: + from mmap import mmap, MAP_PRIVATE, MAP_ANONYMOUS, PROT_WRITE, PROT_READ, PROT_EXEC + + # Allocate a private and executable memory segment the size of the machine code + machine_code = bytes.join(b'', self.machine_code) + self.size = len(machine_code) + self.mm = mmap(-1, self.size, flags=MAP_PRIVATE | MAP_ANONYMOUS, prot=PROT_WRITE | PROT_READ | PROT_EXEC) + + # Copy the machine code into the memory segment + self.mm.write(machine_code) + self.address = ctypes.addressof(ctypes.c_int.from_buffer(self.mm)) + + # Cast the memory segment into a function + functype = ctypes.CFUNCTYPE(self.restype, *self.argtypes) + self.func = functype(self.address) + + def run(self): + # Call the machine code like a function + retval = self.func() + + return retval + + def free(self): + # Free the function memory segment + if DataSource.is_windows: + MEM_RELEASE = ctypes.c_ulong(0x8000) + ctypes.windll.kernel32.VirtualFree(ctypes.c_void_p(self.address), ctypes.c_size_t(0), MEM_RELEASE) + else: + self.mm.close() + + self.prochandle = None + self.mm = None + self.func = None + self.address = None + self.size = 0 + + +class CPUID(object): + def __init__(self, trace=None): + if trace is None: + trace = Trace(False, False) + + # Figure out if SE Linux is on and in enforcing mode + self.is_selinux_enforcing = _is_selinux_enforcing(trace) + + def _asm_func(self, restype=None, argtypes=(), machine_code=[]): + asm = ASM(restype, argtypes, machine_code) + asm.compile() + return asm + + def _run_asm(self, *machine_code): + asm = ASM(ctypes.c_uint32, (), machine_code) + asm.compile() + retval = asm.run() + asm.free() + return retval + + # http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID + def get_vendor_id(self): + # EBX + ebx = self._run_asm( + b"\x31\xC0", # xor eax,eax + b"\x0F\xA2" # cpuid + b"\x89\xD8" # mov ax,bx + b"\xC3" # ret + ) + + # ECX + ecx = self._run_asm( + b"\x31\xC0", # xor eax,eax + b"\x0f\xa2" # cpuid + b"\x89\xC8" # mov ax,cx + b"\xC3" # ret + ) + + # EDX + edx = self._run_asm( + b"\x31\xC0", # xor eax,eax + b"\x0f\xa2" # cpuid + b"\x89\xD0" # mov ax,dx + b"\xC3" # ret + ) + + # Each 4bits is a ascii letter in the name + vendor_id = [] + for reg in [ebx, edx, ecx]: + for n in [0, 8, 16, 24]: + vendor_id.append(chr((reg >> n) & 0xFF)) + vendor_id = ''.join(vendor_id) + + return vendor_id + + # http://en.wikipedia.org/wiki/CPUID#EAX.3D1:_Processor_Info_and_Feature_Bits + def get_info(self): + # EAX + eax = self._run_asm( + b"\xB8\x01\x00\x00\x00", # mov eax,0x1" + b"\x0f\xa2" # cpuid + b"\xC3" # ret + ) + + # Get the CPU info + stepping_id = (eax >> 0) & 0xF # 4 bits + model = (eax >> 4) & 0xF # 4 bits + family_id = (eax >> 8) & 0xF # 4 bits + processor_type = (eax >> 12) & 0x3 # 2 bits + extended_model_id = (eax >> 16) & 0xF # 4 bits + extended_family_id = (eax >> 20) & 0xFF # 8 bits + family = 0 + + if family_id in [15]: + family = extended_family_id + family_id + else: + family = family_id + + if family_id in [6, 15]: + model = (extended_model_id << 4) + model + + return { + 'stepping' : stepping_id, + 'model' : model, + 'family' : family, + 'processor_type' : processor_type + } + + # http://en.wikipedia.org/wiki/CPUID#EAX.3D80000000h:_Get_Highest_Extended_Function_Supported + def get_max_extension_support(self): + # Check for extension support + max_extension_support = self._run_asm( + b"\xB8\x00\x00\x00\x80" # mov ax,0x80000000 + b"\x0f\xa2" # cpuid + b"\xC3" # ret + ) + + return max_extension_support + + # http://en.wikipedia.org/wiki/CPUID#EAX.3D1:_Processor_Info_and_Feature_Bits + def get_flags(self, max_extension_support): + # EDX + edx = self._run_asm( + b"\xB8\x01\x00\x00\x00", # mov eax,0x1" + b"\x0f\xa2" # cpuid + b"\x89\xD0" # mov ax,dx + b"\xC3" # ret + ) + + # ECX + ecx = self._run_asm( + b"\xB8\x01\x00\x00\x00", # mov eax,0x1" + b"\x0f\xa2" # cpuid + b"\x89\xC8" # mov ax,cx + b"\xC3" # ret + ) + + # Get the CPU flags + flags = { + 'fpu' : _is_bit_set(edx, 0), + 'vme' : _is_bit_set(edx, 1), + 'de' : _is_bit_set(edx, 2), + 'pse' : _is_bit_set(edx, 3), + 'tsc' : _is_bit_set(edx, 4), + 'msr' : _is_bit_set(edx, 5), + 'pae' : _is_bit_set(edx, 6), + 'mce' : _is_bit_set(edx, 7), + 'cx8' : _is_bit_set(edx, 8), + 'apic' : _is_bit_set(edx, 9), + #'reserved1' : _is_bit_set(edx, 10), + 'sep' : _is_bit_set(edx, 11), + 'mtrr' : _is_bit_set(edx, 12), + 'pge' : _is_bit_set(edx, 13), + 'mca' : _is_bit_set(edx, 14), + 'cmov' : _is_bit_set(edx, 15), + 'pat' : _is_bit_set(edx, 16), + 'pse36' : _is_bit_set(edx, 17), + 'pn' : _is_bit_set(edx, 18), + 'clflush' : _is_bit_set(edx, 19), + #'reserved2' : _is_bit_set(edx, 20), + 'dts' : _is_bit_set(edx, 21), + 'acpi' : _is_bit_set(edx, 22), + 'mmx' : _is_bit_set(edx, 23), + 'fxsr' : _is_bit_set(edx, 24), + 'sse' : _is_bit_set(edx, 25), + 'sse2' : _is_bit_set(edx, 26), + 'ss' : _is_bit_set(edx, 27), + 'ht' : _is_bit_set(edx, 28), + 'tm' : _is_bit_set(edx, 29), + 'ia64' : _is_bit_set(edx, 30), + 'pbe' : _is_bit_set(edx, 31), + + 'pni' : _is_bit_set(ecx, 0), + 'pclmulqdq' : _is_bit_set(ecx, 1), + 'dtes64' : _is_bit_set(ecx, 2), + 'monitor' : _is_bit_set(ecx, 3), + 'ds_cpl' : _is_bit_set(ecx, 4), + 'vmx' : _is_bit_set(ecx, 5), + 'smx' : _is_bit_set(ecx, 6), + 'est' : _is_bit_set(ecx, 7), + 'tm2' : _is_bit_set(ecx, 8), + 'ssse3' : _is_bit_set(ecx, 9), + 'cid' : _is_bit_set(ecx, 10), + #'reserved3' : _is_bit_set(ecx, 11), + 'fma' : _is_bit_set(ecx, 12), + 'cx16' : _is_bit_set(ecx, 13), + 'xtpr' : _is_bit_set(ecx, 14), + 'pdcm' : _is_bit_set(ecx, 15), + #'reserved4' : _is_bit_set(ecx, 16), + 'pcid' : _is_bit_set(ecx, 17), + 'dca' : _is_bit_set(ecx, 18), + 'sse4_1' : _is_bit_set(ecx, 19), + 'sse4_2' : _is_bit_set(ecx, 20), + 'x2apic' : _is_bit_set(ecx, 21), + 'movbe' : _is_bit_set(ecx, 22), + 'popcnt' : _is_bit_set(ecx, 23), + 'tscdeadline' : _is_bit_set(ecx, 24), + 'aes' : _is_bit_set(ecx, 25), + 'xsave' : _is_bit_set(ecx, 26), + 'osxsave' : _is_bit_set(ecx, 27), + 'avx' : _is_bit_set(ecx, 28), + 'f16c' : _is_bit_set(ecx, 29), + 'rdrnd' : _is_bit_set(ecx, 30), + 'hypervisor' : _is_bit_set(ecx, 31) + } + + # Get a list of only the flags that are true + flags = [k for k, v in flags.items() if v] + + # http://en.wikipedia.org/wiki/CPUID#EAX.3D7.2C_ECX.3D0:_Extended_Features + if max_extension_support >= 7: + # EBX + ebx = self._run_asm( + b"\x31\xC9", # xor ecx,ecx + b"\xB8\x07\x00\x00\x00" # mov eax,7 + b"\x0f\xa2" # cpuid + b"\x89\xD8" # mov ax,bx + b"\xC3" # ret + ) + + # ECX + ecx = self._run_asm( + b"\x31\xC9", # xor ecx,ecx + b"\xB8\x07\x00\x00\x00" # mov eax,7 + b"\x0f\xa2" # cpuid + b"\x89\xC8" # mov ax,cx + b"\xC3" # ret + ) + + # Get the extended CPU flags + extended_flags = { + #'fsgsbase' : _is_bit_set(ebx, 0), + #'IA32_TSC_ADJUST' : _is_bit_set(ebx, 1), + 'sgx' : _is_bit_set(ebx, 2), + 'bmi1' : _is_bit_set(ebx, 3), + 'hle' : _is_bit_set(ebx, 4), + 'avx2' : _is_bit_set(ebx, 5), + #'reserved' : _is_bit_set(ebx, 6), + 'smep' : _is_bit_set(ebx, 7), + 'bmi2' : _is_bit_set(ebx, 8), + 'erms' : _is_bit_set(ebx, 9), + 'invpcid' : _is_bit_set(ebx, 10), + 'rtm' : _is_bit_set(ebx, 11), + 'pqm' : _is_bit_set(ebx, 12), + #'FPU CS and FPU DS deprecated' : _is_bit_set(ebx, 13), + 'mpx' : _is_bit_set(ebx, 14), + 'pqe' : _is_bit_set(ebx, 15), + 'avx512f' : _is_bit_set(ebx, 16), + 'avx512dq' : _is_bit_set(ebx, 17), + 'rdseed' : _is_bit_set(ebx, 18), + 'adx' : _is_bit_set(ebx, 19), + 'smap' : _is_bit_set(ebx, 20), + 'avx512ifma' : _is_bit_set(ebx, 21), + 'pcommit' : _is_bit_set(ebx, 22), + 'clflushopt' : _is_bit_set(ebx, 23), + 'clwb' : _is_bit_set(ebx, 24), + 'intel_pt' : _is_bit_set(ebx, 25), + 'avx512pf' : _is_bit_set(ebx, 26), + 'avx512er' : _is_bit_set(ebx, 27), + 'avx512cd' : _is_bit_set(ebx, 28), + 'sha' : _is_bit_set(ebx, 29), + 'avx512bw' : _is_bit_set(ebx, 30), + 'avx512vl' : _is_bit_set(ebx, 31), + + 'prefetchwt1' : _is_bit_set(ecx, 0), + 'avx512vbmi' : _is_bit_set(ecx, 1), + 'umip' : _is_bit_set(ecx, 2), + 'pku' : _is_bit_set(ecx, 3), + 'ospke' : _is_bit_set(ecx, 4), + #'reserved' : _is_bit_set(ecx, 5), + 'avx512vbmi2' : _is_bit_set(ecx, 6), + #'reserved' : _is_bit_set(ecx, 7), + 'gfni' : _is_bit_set(ecx, 8), + 'vaes' : _is_bit_set(ecx, 9), + 'vpclmulqdq' : _is_bit_set(ecx, 10), + 'avx512vnni' : _is_bit_set(ecx, 11), + 'avx512bitalg' : _is_bit_set(ecx, 12), + #'reserved' : _is_bit_set(ecx, 13), + 'avx512vpopcntdq' : _is_bit_set(ecx, 14), + #'reserved' : _is_bit_set(ecx, 15), + #'reserved' : _is_bit_set(ecx, 16), + #'mpx0' : _is_bit_set(ecx, 17), + #'mpx1' : _is_bit_set(ecx, 18), + #'mpx2' : _is_bit_set(ecx, 19), + #'mpx3' : _is_bit_set(ecx, 20), + #'mpx4' : _is_bit_set(ecx, 21), + 'rdpid' : _is_bit_set(ecx, 22), + #'reserved' : _is_bit_set(ecx, 23), + #'reserved' : _is_bit_set(ecx, 24), + #'reserved' : _is_bit_set(ecx, 25), + #'reserved' : _is_bit_set(ecx, 26), + #'reserved' : _is_bit_set(ecx, 27), + #'reserved' : _is_bit_set(ecx, 28), + #'reserved' : _is_bit_set(ecx, 29), + 'sgx_lc' : _is_bit_set(ecx, 30), + #'reserved' : _is_bit_set(ecx, 31) + } + + # Get a list of only the flags that are true + extended_flags = [k for k, v in extended_flags.items() if v] + flags += extended_flags + + # http://en.wikipedia.org/wiki/CPUID#EAX.3D80000001h:_Extended_Processor_Info_and_Feature_Bits + if max_extension_support >= 0x80000001: + # EBX + ebx = self._run_asm( + b"\xB8\x01\x00\x00\x80" # mov ax,0x80000001 + b"\x0f\xa2" # cpuid + b"\x89\xD8" # mov ax,bx + b"\xC3" # ret + ) + + # ECX + ecx = self._run_asm( + b"\xB8\x01\x00\x00\x80" # mov ax,0x80000001 + b"\x0f\xa2" # cpuid + b"\x89\xC8" # mov ax,cx + b"\xC3" # ret + ) + + # Get the extended CPU flags + extended_flags = { + 'fpu' : _is_bit_set(ebx, 0), + 'vme' : _is_bit_set(ebx, 1), + 'de' : _is_bit_set(ebx, 2), + 'pse' : _is_bit_set(ebx, 3), + 'tsc' : _is_bit_set(ebx, 4), + 'msr' : _is_bit_set(ebx, 5), + 'pae' : _is_bit_set(ebx, 6), + 'mce' : _is_bit_set(ebx, 7), + 'cx8' : _is_bit_set(ebx, 8), + 'apic' : _is_bit_set(ebx, 9), + #'reserved' : _is_bit_set(ebx, 10), + 'syscall' : _is_bit_set(ebx, 11), + 'mtrr' : _is_bit_set(ebx, 12), + 'pge' : _is_bit_set(ebx, 13), + 'mca' : _is_bit_set(ebx, 14), + 'cmov' : _is_bit_set(ebx, 15), + 'pat' : _is_bit_set(ebx, 16), + 'pse36' : _is_bit_set(ebx, 17), + #'reserved' : _is_bit_set(ebx, 18), + 'mp' : _is_bit_set(ebx, 19), + 'nx' : _is_bit_set(ebx, 20), + #'reserved' : _is_bit_set(ebx, 21), + 'mmxext' : _is_bit_set(ebx, 22), + 'mmx' : _is_bit_set(ebx, 23), + 'fxsr' : _is_bit_set(ebx, 24), + 'fxsr_opt' : _is_bit_set(ebx, 25), + 'pdpe1gp' : _is_bit_set(ebx, 26), + 'rdtscp' : _is_bit_set(ebx, 27), + #'reserved' : _is_bit_set(ebx, 28), + 'lm' : _is_bit_set(ebx, 29), + '3dnowext' : _is_bit_set(ebx, 30), + '3dnow' : _is_bit_set(ebx, 31), + + 'lahf_lm' : _is_bit_set(ecx, 0), + 'cmp_legacy' : _is_bit_set(ecx, 1), + 'svm' : _is_bit_set(ecx, 2), + 'extapic' : _is_bit_set(ecx, 3), + 'cr8_legacy' : _is_bit_set(ecx, 4), + 'abm' : _is_bit_set(ecx, 5), + 'sse4a' : _is_bit_set(ecx, 6), + 'misalignsse' : _is_bit_set(ecx, 7), + '3dnowprefetch' : _is_bit_set(ecx, 8), + 'osvw' : _is_bit_set(ecx, 9), + 'ibs' : _is_bit_set(ecx, 10), + 'xop' : _is_bit_set(ecx, 11), + 'skinit' : _is_bit_set(ecx, 12), + 'wdt' : _is_bit_set(ecx, 13), + #'reserved' : _is_bit_set(ecx, 14), + 'lwp' : _is_bit_set(ecx, 15), + 'fma4' : _is_bit_set(ecx, 16), + 'tce' : _is_bit_set(ecx, 17), + #'reserved' : _is_bit_set(ecx, 18), + 'nodeid_msr' : _is_bit_set(ecx, 19), + #'reserved' : _is_bit_set(ecx, 20), + 'tbm' : _is_bit_set(ecx, 21), + 'topoext' : _is_bit_set(ecx, 22), + 'perfctr_core' : _is_bit_set(ecx, 23), + 'perfctr_nb' : _is_bit_set(ecx, 24), + #'reserved' : _is_bit_set(ecx, 25), + 'dbx' : _is_bit_set(ecx, 26), + 'perftsc' : _is_bit_set(ecx, 27), + 'pci_l2i' : _is_bit_set(ecx, 28), + #'reserved' : _is_bit_set(ecx, 29), + #'reserved' : _is_bit_set(ecx, 30), + #'reserved' : _is_bit_set(ecx, 31) + } + + # Get a list of only the flags that are true + extended_flags = [k for k, v in extended_flags.items() if v] + flags += extended_flags + + flags.sort() + return flags + + # http://en.wikipedia.org/wiki/CPUID#EAX.3D80000002h.2C80000003h.2C80000004h:_Processor_Brand_String + def get_processor_brand(self, max_extension_support): + processor_brand = "" + + # Processor brand string + if max_extension_support >= 0x80000004: + instructions = [ + b"\xB8\x02\x00\x00\x80", # mov ax,0x80000002 + b"\xB8\x03\x00\x00\x80", # mov ax,0x80000003 + b"\xB8\x04\x00\x00\x80" # mov ax,0x80000004 + ] + for instruction in instructions: + # EAX + eax = self._run_asm( + instruction, # mov ax,0x8000000? + b"\x0f\xa2" # cpuid + b"\x89\xC0" # mov ax,ax + b"\xC3" # ret + ) + + # EBX + ebx = self._run_asm( + instruction, # mov ax,0x8000000? + b"\x0f\xa2" # cpuid + b"\x89\xD8" # mov ax,bx + b"\xC3" # ret + ) + + # ECX + ecx = self._run_asm( + instruction, # mov ax,0x8000000? + b"\x0f\xa2" # cpuid + b"\x89\xC8" # mov ax,cx + b"\xC3" # ret + ) + + # EDX + edx = self._run_asm( + instruction, # mov ax,0x8000000? + b"\x0f\xa2" # cpuid + b"\x89\xD0" # mov ax,dx + b"\xC3" # ret + ) + + # Combine each of the 4 bytes in each register into the string + for reg in [eax, ebx, ecx, edx]: + for n in [0, 8, 16, 24]: + processor_brand += chr((reg >> n) & 0xFF) + + # Strip off any trailing NULL terminators and white space + processor_brand = processor_brand.strip("\0").strip() + + return processor_brand + + # http://en.wikipedia.org/wiki/CPUID#EAX.3D80000006h:_Extended_L2_Cache_Features + def get_cache(self, max_extension_support): + cache_info = {} + + # Just return if the cache feature is not supported + if max_extension_support < 0x80000006: + return cache_info + + # ECX + ecx = self._run_asm( + b"\xB8\x06\x00\x00\x80" # mov ax,0x80000006 + b"\x0f\xa2" # cpuid + b"\x89\xC8" # mov ax,cx + b"\xC3" # ret + ) + + cache_info = { + 'size_b' : (ecx & 0xFF) * 1024, + 'associativity' : (ecx >> 12) & 0xF, + 'line_size_b' : (ecx >> 16) & 0xFFFF + } + + return cache_info + + def get_ticks_func(self): + retval = None + + if DataSource.bits == '32bit': + # Works on x86_32 + restype = None + argtypes = (ctypes.POINTER(ctypes.c_uint), ctypes.POINTER(ctypes.c_uint)) + get_ticks_x86_32 = self._asm_func(restype, argtypes, + [ + b"\x55", # push bp + b"\x89\xE5", # mov bp,sp + b"\x31\xC0", # xor ax,ax + b"\x0F\xA2", # cpuid + b"\x0F\x31", # rdtsc + b"\x8B\x5D\x08", # mov bx,[di+0x8] + b"\x8B\x4D\x0C", # mov cx,[di+0xc] + b"\x89\x13", # mov [bp+di],dx + b"\x89\x01", # mov [bx+di],ax + b"\x5D", # pop bp + b"\xC3" # ret + ] + ) + + # Monkey patch func to combine high and low args into one return + old_func = get_ticks_x86_32.func + def new_func(): + # Pass two uint32s into function + high = ctypes.c_uint32(0) + low = ctypes.c_uint32(0) + old_func(ctypes.byref(high), ctypes.byref(low)) + + # Shift the two uint32s into one uint64 + retval = ((high.value << 32) & 0xFFFFFFFF00000000) | low.value + return retval + get_ticks_x86_32.func = new_func + + retval = get_ticks_x86_32 + elif DataSource.bits == '64bit': + # Works on x86_64 + restype = ctypes.c_uint64 + argtypes = () + get_ticks_x86_64 = self._asm_func(restype, argtypes, + [ + b"\x48", # dec ax + b"\x31\xC0", # xor ax,ax + b"\x0F\xA2", # cpuid + b"\x0F\x31", # rdtsc + b"\x48", # dec ax + b"\xC1\xE2\x20", # shl dx,byte 0x20 + b"\x48", # dec ax + b"\x09\xD0", # or ax,dx + b"\xC3", # ret + ] + ) + + retval = get_ticks_x86_64 + return retval + + def get_raw_hz(self): + from time import sleep + + ticks_fn = self.get_ticks_func() + + start = ticks_fn.func() + sleep(1) + end = ticks_fn.func() + + ticks = (end - start) + ticks_fn.free() + + return ticks + +def _get_cpu_info_from_cpuid_actual(): + ''' + Warning! This function has the potential to crash the Python runtime. + Do not call it directly. Use the _get_cpu_info_from_cpuid function instead. + It will safely call this function in another process. + ''' + + from io import StringIO + + trace = Trace(True, True) + info = {} + + # Pipe stdout and stderr to strings + sys.stdout = trace._stdout + sys.stderr = trace._stderr + + try: + # Get the CPU arch and bits + arch, bits = _parse_arch(DataSource.arch_string_raw) + + # Return none if this is not an X86 CPU + if not arch in ['X86_32', 'X86_64']: + trace.fail('Not running on X86_32 or X86_64. Skipping ...') + return trace.to_dict(info, True) + + # Return none if SE Linux is in enforcing mode + cpuid = CPUID(trace) + if cpuid.is_selinux_enforcing: + trace.fail('SELinux is enforcing. Skipping ...') + return trace.to_dict(info, True) + + # Get the cpu info from the CPUID register + max_extension_support = cpuid.get_max_extension_support() + cache_info = cpuid.get_cache(max_extension_support) + info = cpuid.get_info() + + processor_brand = cpuid.get_processor_brand(max_extension_support) + + # Get the Hz and scale + hz_actual = cpuid.get_raw_hz() + hz_actual = _to_decimal_string(hz_actual) + + # Get the Hz and scale + hz_advertised, scale = _parse_cpu_brand_string(processor_brand) + info = { + 'vendor_id_raw' : cpuid.get_vendor_id(), + 'hardware_raw' : '', + 'brand_raw' : processor_brand, + + 'hz_advertised_friendly' : _hz_short_to_friendly(hz_advertised, scale), + 'hz_actual_friendly' : _hz_short_to_friendly(hz_actual, 0), + 'hz_advertised' : _hz_short_to_full(hz_advertised, scale), + 'hz_actual' : _hz_short_to_full(hz_actual, 0), + + 'l2_cache_size' : cache_info['size_b'], + 'l2_cache_line_size' : cache_info['line_size_b'], + 'l2_cache_associativity' : cache_info['associativity'], + + 'stepping' : info['stepping'], + 'model' : info['model'], + 'family' : info['family'], + 'processor_type' : info['processor_type'], + 'flags' : cpuid.get_flags(max_extension_support) + } + + info = _filter_dict_keys_with_empty_values(info) + trace.success() + except Exception as err: + from traceback import format_exc + err_string = format_exc() + trace._err = ''.join(['\t\t{0}\n'.format(n) for n in err_string.split('\n')]) + '\n' + return trace.to_dict(info, True) + + return trace.to_dict(info, False) + +def _get_cpu_info_from_cpuid_subprocess_wrapper(queue): + orig_stdout = sys.stdout + orig_stderr = sys.stderr + + output = _get_cpu_info_from_cpuid_actual() + + sys.stdout = orig_stdout + sys.stderr = orig_stderr + + queue.put(_obj_to_b64(output)) + +def _get_cpu_info_from_cpuid(): + ''' + Returns the CPU info gathered by querying the X86 cpuid register in a new process. + Returns {} on non X86 cpus. + Returns {} if SELinux is in enforcing mode. + ''' + + g_trace.header('Tying to get info from CPUID ...') + + from multiprocessing import Process, Queue + + # Return {} if can't cpuid + if not DataSource.can_cpuid: + g_trace.fail('Can\'t CPUID. Skipping ...') + return {} + + # Get the CPU arch and bits + arch, bits = _parse_arch(DataSource.arch_string_raw) + + # Return {} if this is not an X86 CPU + if not arch in ['X86_32', 'X86_64']: + g_trace.fail('Not running on X86_32 or X86_64. Skipping ...') + return {} + + try: + if CAN_CALL_CPUID_IN_SUBPROCESS: + # Start running the function in a subprocess + queue = Queue() + p = Process(target=_get_cpu_info_from_cpuid_subprocess_wrapper, args=(queue,)) + p.start() + + # Wait for the process to end, while it is still alive + while p.is_alive(): + p.join(0) + + # Return {} if it failed + if p.exitcode != 0: + g_trace.fail('Failed to run CPUID in process. Skipping ...') + return {} + + # Return {} if no results + if queue.empty(): + g_trace.fail('Failed to get anything from CPUID process. Skipping ...') + return {} + # Return the result, only if there is something to read + else: + output = _b64_to_obj(queue.get()) + import pprint + pp = pprint.PrettyPrinter(indent=4) + #pp.pprint(output) + + if 'output' in output and output['output']: + g_trace.write(output['output']) + + if 'stdout' in output and output['stdout']: + sys.stdout.write('{0}\n'.format(output['stdout'])) + sys.stdout.flush() + + if 'stderr' in output and output['stderr']: + sys.stderr.write('{0}\n'.format(output['stderr'])) + sys.stderr.flush() + + if 'is_fail' not in output: + g_trace.fail('Failed to get is_fail from CPUID process. Skipping ...') + return {} + + # Fail if there was an exception + if 'err' in output and output['err']: + g_trace.fail('Failed to run CPUID in process. Skipping ...') + g_trace.write(output['err']) + g_trace.write('Failed ...') + return {} + + if 'is_fail' in output and output['is_fail']: + g_trace.write('Failed ...') + return {} + + if 'info' not in output or not output['info']: + g_trace.fail('Failed to get return info from CPUID process. Skipping ...') + return {} + + return output['info'] + else: + # FIXME: This should write the values like in the above call to actual + orig_stdout = sys.stdout + orig_stderr = sys.stderr + + output = _get_cpu_info_from_cpuid_actual() + + sys.stdout = orig_stdout + sys.stderr = orig_stderr + + g_trace.success() + return output['info'] + except Exception as err: + g_trace.fail(err) + + # Return {} if everything failed + return {} + +def _get_cpu_info_from_proc_cpuinfo(): + ''' + Returns the CPU info gathered from /proc/cpuinfo. + Returns {} if /proc/cpuinfo is not found. + ''' + + g_trace.header('Tying to get info from /proc/cpuinfo ...') + + try: + # Just return {} if there is no cpuinfo + if not DataSource.has_proc_cpuinfo(): + g_trace.fail('Failed to find /proc/cpuinfo. Skipping ...') + return {} + + returncode, output = DataSource.cat_proc_cpuinfo() + if returncode != 0: + g_trace.fail('Failed to run cat /proc/cpuinfo. Skipping ...') + return {} + + # Various fields + vendor_id = _get_field(False, output, None, '', 'vendor_id', 'vendor id', 'vendor') + processor_brand = _get_field(True, output, None, None, 'model name', 'cpu', 'processor', 'uarch') + cache_size = _get_field(False, output, None, '', 'cache size') + stepping = _get_field(False, output, int, -1, 'stepping') + model = _get_field(False, output, int, -1, 'model') + family = _get_field(False, output, int, -1, 'cpu family') + hardware = _get_field(False, output, None, '', 'Hardware') + + # Flags + flags = _get_field(False, output, None, None, 'flags', 'Features', 'ASEs implemented') + if flags: + flags = flags.split() + flags.sort() + + # Check for other cache format + if not cache_size: + try: + for i in range(0, 10): + name = "cache{0}".format(i) + value = _get_field(False, output, None, None, name) + if value: + value = [entry.split('=') for entry in value.split(' ')] + value = dict(value) + if 'level' in value and value['level'] == '3' and 'size' in value: + cache_size = value['size'] + break + except Exception: + pass + + # Convert from MHz string to Hz + hz_actual = _get_field(False, output, None, '', 'cpu MHz', 'cpu speed', 'clock', 'cpu MHz dynamic', 'cpu MHz static') + hz_actual = hz_actual.lower().rstrip('mhz').strip() + hz_actual = _to_decimal_string(hz_actual) + + # Convert from GHz/MHz string to Hz + hz_advertised, scale = (None, 0) + try: + hz_advertised, scale = _parse_cpu_brand_string(processor_brand) + except Exception: + pass + + info = { + 'hardware_raw' : hardware, + 'brand_raw' : processor_brand, + + 'l3_cache_size' : _friendly_bytes_to_int(cache_size), + 'flags' : flags, + 'vendor_id_raw' : vendor_id, + 'stepping' : stepping, + 'model' : model, + 'family' : family, + } + + # Make the Hz the same for actual and advertised if missing any + if not hz_advertised or hz_advertised == '0.0': + hz_advertised = hz_actual + scale = 6 + elif not hz_actual or hz_actual == '0.0': + hz_actual = hz_advertised + + # Add the Hz if there is one + if _hz_short_to_full(hz_advertised, scale) > (0, 0): + info['hz_advertised_friendly'] = _hz_short_to_friendly(hz_advertised, scale) + info['hz_advertised'] = _hz_short_to_full(hz_advertised, scale) + if _hz_short_to_full(hz_actual, scale) > (0, 0): + info['hz_actual_friendly'] = _hz_short_to_friendly(hz_actual, 6) + info['hz_actual'] = _hz_short_to_full(hz_actual, 6) + + info = _filter_dict_keys_with_empty_values(info, {'stepping':0, 'model':0, 'family':0}) + g_trace.success() + return info + except Exception as err: + g_trace.fail(err) + #raise # NOTE: To have this throw on error, uncomment this line + return {} + +def _get_cpu_info_from_cpufreq_info(): + ''' + Returns the CPU info gathered from cpufreq-info. + Returns {} if cpufreq-info is not found. + ''' + + g_trace.header('Tying to get info from cpufreq-info ...') + + try: + hz_brand, scale = '0.0', 0 + + if not DataSource.has_cpufreq_info(): + g_trace.fail('Failed to find cpufreq-info. Skipping ...') + return {} + + returncode, output = DataSource.cpufreq_info() + if returncode != 0: + g_trace.fail('Failed to run cpufreq-info. Skipping ...') + return {} + + hz_brand = output.split('current CPU frequency is')[1].split('\n')[0] + i = hz_brand.find('Hz') + assert(i != -1) + hz_brand = hz_brand[0 : i+2].strip().lower() + + if hz_brand.endswith('mhz'): + scale = 6 + elif hz_brand.endswith('ghz'): + scale = 9 + hz_brand = hz_brand.rstrip('mhz').rstrip('ghz').strip() + hz_brand = _to_decimal_string(hz_brand) + + info = { + 'hz_advertised_friendly' : _hz_short_to_friendly(hz_brand, scale), + 'hz_actual_friendly' : _hz_short_to_friendly(hz_brand, scale), + 'hz_advertised' : _hz_short_to_full(hz_brand, scale), + 'hz_actual' : _hz_short_to_full(hz_brand, scale), + } + + info = _filter_dict_keys_with_empty_values(info) + g_trace.success() + return info + except Exception as err: + g_trace.fail(err) + #raise # NOTE: To have this throw on error, uncomment this line + return {} + +def _get_cpu_info_from_lscpu(): + ''' + Returns the CPU info gathered from lscpu. + Returns {} if lscpu is not found. + ''' + + g_trace.header('Tying to get info from lscpu ...') + + try: + if not DataSource.has_lscpu(): + g_trace.fail('Failed to find lscpu. Skipping ...') + return {} + + returncode, output = DataSource.lscpu() + if returncode != 0: + g_trace.fail('Failed to run lscpu. Skipping ...') + return {} + + info = {} + + new_hz = _get_field(False, output, None, None, 'CPU max MHz', 'CPU MHz') + if new_hz: + new_hz = _to_decimal_string(new_hz) + scale = 6 + info['hz_advertised_friendly'] = _hz_short_to_friendly(new_hz, scale) + info['hz_actual_friendly'] = _hz_short_to_friendly(new_hz, scale) + info['hz_advertised'] = _hz_short_to_full(new_hz, scale) + info['hz_actual'] = _hz_short_to_full(new_hz, scale) + + new_hz = _get_field(False, output, None, None, 'CPU dynamic MHz', 'CPU static MHz') + if new_hz: + new_hz = _to_decimal_string(new_hz) + scale = 6 + info['hz_advertised_friendly'] = _hz_short_to_friendly(new_hz, scale) + info['hz_actual_friendly'] = _hz_short_to_friendly(new_hz, scale) + info['hz_advertised'] = _hz_short_to_full(new_hz, scale) + info['hz_actual'] = _hz_short_to_full(new_hz, scale) + + vendor_id = _get_field(False, output, None, None, 'Vendor ID') + if vendor_id: + info['vendor_id_raw'] = vendor_id + + brand = _get_field(False, output, None, None, 'Model name') + if brand: + info['brand_raw'] = brand + else: + brand = _get_field(False, output, None, None, 'Model') + if brand and not brand.isdigit(): + info['brand_raw'] = brand + + family = _get_field(False, output, None, None, 'CPU family') + if family and family.isdigit(): + info['family'] = int(family) + + stepping = _get_field(False, output, None, None, 'Stepping') + if stepping and stepping.isdigit(): + info['stepping'] = int(stepping) + + model = _get_field(False, output, None, None, 'Model') + if model and model.isdigit(): + info['model'] = int(model) + + l1_data_cache_size = _get_field(False, output, None, None, 'L1d cache') + if l1_data_cache_size: + l1_data_cache_size = l1_data_cache_size.split('(')[0].strip() + info['l1_data_cache_size'] = _friendly_bytes_to_int(l1_data_cache_size) + + l1_instruction_cache_size = _get_field(False, output, None, None, 'L1i cache') + if l1_instruction_cache_size: + l1_instruction_cache_size = l1_instruction_cache_size.split('(')[0].strip() + info['l1_instruction_cache_size'] = _friendly_bytes_to_int(l1_instruction_cache_size) + + l2_cache_size = _get_field(False, output, None, None, 'L2 cache', 'L2d cache') + if l2_cache_size: + l2_cache_size = l2_cache_size.split('(')[0].strip() + info['l2_cache_size'] = _friendly_bytes_to_int(l2_cache_size) + + l3_cache_size = _get_field(False, output, None, None, 'L3 cache') + if l3_cache_size: + l3_cache_size = l3_cache_size.split('(')[0].strip() + info['l3_cache_size'] = _friendly_bytes_to_int(l3_cache_size) + + # Flags + flags = _get_field(False, output, None, None, 'flags', 'Features', 'ASEs implemented') + if flags: + flags = flags.split() + flags.sort() + info['flags'] = flags + + info = _filter_dict_keys_with_empty_values(info, {'stepping':0, 'model':0, 'family':0}) + g_trace.success() + return info + except Exception as err: + g_trace.fail(err) + #raise # NOTE: To have this throw on error, uncomment this line + return {} + +def _get_cpu_info_from_dmesg(): + ''' + Returns the CPU info gathered from dmesg. + Returns {} if dmesg is not found or does not have the desired info. + ''' + + g_trace.header('Tying to get info from the dmesg ...') + + # Just return {} if this arch has an unreliable dmesg log + arch, bits = _parse_arch(DataSource.arch_string_raw) + if arch in ['S390X']: + g_trace.fail('Running on S390X. Skipping ...') + return {} + + # Just return {} if there is no dmesg + if not DataSource.has_dmesg(): + g_trace.fail('Failed to find dmesg. Skipping ...') + return {} + + # If dmesg fails return {} + returncode, output = DataSource.dmesg_a() + if output is None or returncode != 0: + g_trace.fail('Failed to run \"dmesg -a\". Skipping ...') + return {} + + info = _parse_dmesg_output(output) + g_trace.success() + return info + + +# https://openpowerfoundation.org/wp-content/uploads/2016/05/LoPAPR_DRAFT_v11_24March2016_cmt1.pdf +# page 767 +def _get_cpu_info_from_ibm_pa_features(): + ''' + Returns the CPU info gathered from lsprop /proc/device-tree/cpus/*/ibm,pa-features + Returns {} if lsprop is not found or ibm,pa-features does not have the desired info. + ''' + + g_trace.header('Tying to get info from lsprop ...') + + try: + # Just return {} if there is no lsprop + if not DataSource.has_ibm_pa_features(): + g_trace.fail('Failed to find lsprop. Skipping ...') + return {} + + # If ibm,pa-features fails return {} + returncode, output = DataSource.ibm_pa_features() + if output is None or returncode != 0: + g_trace.fail('Failed to glob /proc/device-tree/cpus/*/ibm,pa-features. Skipping ...') + return {} + + # Filter out invalid characters from output + value = output.split("ibm,pa-features")[1].lower() + value = [s for s in value if s in list('0123456789abcfed')] + value = ''.join(value) + + # Get data converted to Uint32 chunks + left = int(value[0 : 8], 16) + right = int(value[8 : 16], 16) + + # Get the CPU flags + flags = { + # Byte 0 + 'mmu' : _is_bit_set(left, 0), + 'fpu' : _is_bit_set(left, 1), + 'slb' : _is_bit_set(left, 2), + 'run' : _is_bit_set(left, 3), + #'reserved' : _is_bit_set(left, 4), + 'dabr' : _is_bit_set(left, 5), + 'ne' : _is_bit_set(left, 6), + 'wtr' : _is_bit_set(left, 7), + + # Byte 1 + 'mcr' : _is_bit_set(left, 8), + 'dsisr' : _is_bit_set(left, 9), + 'lp' : _is_bit_set(left, 10), + 'ri' : _is_bit_set(left, 11), + 'dabrx' : _is_bit_set(left, 12), + 'sprg3' : _is_bit_set(left, 13), + 'rislb' : _is_bit_set(left, 14), + 'pp' : _is_bit_set(left, 15), + + # Byte 2 + 'vpm' : _is_bit_set(left, 16), + 'dss_2.05' : _is_bit_set(left, 17), + #'reserved' : _is_bit_set(left, 18), + 'dar' : _is_bit_set(left, 19), + #'reserved' : _is_bit_set(left, 20), + 'ppr' : _is_bit_set(left, 21), + 'dss_2.02' : _is_bit_set(left, 22), + 'dss_2.06' : _is_bit_set(left, 23), + + # Byte 3 + 'lsd_in_dscr' : _is_bit_set(left, 24), + 'ugr_in_dscr' : _is_bit_set(left, 25), + #'reserved' : _is_bit_set(left, 26), + #'reserved' : _is_bit_set(left, 27), + #'reserved' : _is_bit_set(left, 28), + #'reserved' : _is_bit_set(left, 29), + #'reserved' : _is_bit_set(left, 30), + #'reserved' : _is_bit_set(left, 31), + + # Byte 4 + 'sso_2.06' : _is_bit_set(right, 0), + #'reserved' : _is_bit_set(right, 1), + #'reserved' : _is_bit_set(right, 2), + #'reserved' : _is_bit_set(right, 3), + #'reserved' : _is_bit_set(right, 4), + #'reserved' : _is_bit_set(right, 5), + #'reserved' : _is_bit_set(right, 6), + #'reserved' : _is_bit_set(right, 7), + + # Byte 5 + 'le' : _is_bit_set(right, 8), + 'cfar' : _is_bit_set(right, 9), + 'eb' : _is_bit_set(right, 10), + 'lsq_2.07' : _is_bit_set(right, 11), + #'reserved' : _is_bit_set(right, 12), + #'reserved' : _is_bit_set(right, 13), + #'reserved' : _is_bit_set(right, 14), + #'reserved' : _is_bit_set(right, 15), + + # Byte 6 + 'dss_2.07' : _is_bit_set(right, 16), + #'reserved' : _is_bit_set(right, 17), + #'reserved' : _is_bit_set(right, 18), + #'reserved' : _is_bit_set(right, 19), + #'reserved' : _is_bit_set(right, 20), + #'reserved' : _is_bit_set(right, 21), + #'reserved' : _is_bit_set(right, 22), + #'reserved' : _is_bit_set(right, 23), + + # Byte 7 + #'reserved' : _is_bit_set(right, 24), + #'reserved' : _is_bit_set(right, 25), + #'reserved' : _is_bit_set(right, 26), + #'reserved' : _is_bit_set(right, 27), + #'reserved' : _is_bit_set(right, 28), + #'reserved' : _is_bit_set(right, 29), + #'reserved' : _is_bit_set(right, 30), + #'reserved' : _is_bit_set(right, 31), + } + + # Get a list of only the flags that are true + flags = [k for k, v in flags.items() if v] + flags.sort() + + info = { + 'flags' : flags + } + info = _filter_dict_keys_with_empty_values(info) + g_trace.success() + return info + except Exception as err: + g_trace.fail(err) + return {} + + +def _get_cpu_info_from_cat_var_run_dmesg_boot(): + ''' + Returns the CPU info gathered from /var/run/dmesg.boot. + Returns {} if dmesg is not found or does not have the desired info. + ''' + + g_trace.header('Tying to get info from the /var/run/dmesg.boot log ...') + + # Just return {} if there is no /var/run/dmesg.boot + if not DataSource.has_var_run_dmesg_boot(): + g_trace.fail('Failed to find /var/run/dmesg.boot file. Skipping ...') + return {} + + # If dmesg.boot fails return {} + returncode, output = DataSource.cat_var_run_dmesg_boot() + if output is None or returncode != 0: + g_trace.fail('Failed to run \"cat /var/run/dmesg.boot\". Skipping ...') + return {} + + info = _parse_dmesg_output(output) + g_trace.success() + return info + + +def _get_cpu_info_from_sysctl(): + ''' + Returns the CPU info gathered from sysctl. + Returns {} if sysctl is not found. + ''' + + g_trace.header('Tying to get info from sysctl ...') + + try: + # Just return {} if there is no sysctl + if not DataSource.has_sysctl(): + g_trace.fail('Failed to find sysctl. Skipping ...') + return {} + + # If sysctl fails return {} + returncode, output = DataSource.sysctl_machdep_cpu_hw_cpufrequency() + if output is None or returncode != 0: + g_trace.fail('Failed to run \"sysctl machdep.cpu hw.cpufrequency\". Skipping ...') + return {} + + # Various fields + vendor_id = _get_field(False, output, None, None, 'machdep.cpu.vendor') + processor_brand = _get_field(True, output, None, None, 'machdep.cpu.brand_string') + cache_size = _get_field(False, output, int, 0, 'machdep.cpu.cache.size') + stepping = _get_field(False, output, int, 0, 'machdep.cpu.stepping') + model = _get_field(False, output, int, 0, 'machdep.cpu.model') + family = _get_field(False, output, int, 0, 'machdep.cpu.family') + + # Flags + flags = _get_field(False, output, None, '', 'machdep.cpu.features').lower().split() + flags.extend(_get_field(False, output, None, '', 'machdep.cpu.leaf7_features').lower().split()) + flags.extend(_get_field(False, output, None, '', 'machdep.cpu.extfeatures').lower().split()) + flags.sort() + + # Convert from GHz/MHz string to Hz + hz_advertised, scale = _parse_cpu_brand_string(processor_brand) + hz_actual = _get_field(False, output, None, None, 'hw.cpufrequency') + hz_actual = _to_decimal_string(hz_actual) + + info = { + 'vendor_id_raw' : vendor_id, + 'brand_raw' : processor_brand, + + 'hz_advertised_friendly' : _hz_short_to_friendly(hz_advertised, scale), + 'hz_actual_friendly' : _hz_short_to_friendly(hz_actual, 0), + 'hz_advertised' : _hz_short_to_full(hz_advertised, scale), + 'hz_actual' : _hz_short_to_full(hz_actual, 0), + + 'l2_cache_size' : int(cache_size) * 1024, + + 'stepping' : stepping, + 'model' : model, + 'family' : family, + 'flags' : flags + } + + info = _filter_dict_keys_with_empty_values(info) + g_trace.success() + return info + except Exception as err: + g_trace.fail(err) + return {} + + +def _get_cpu_info_from_sysinfo(): + ''' + Returns the CPU info gathered from sysinfo. + Returns {} if sysinfo is not found. + ''' + + info = _get_cpu_info_from_sysinfo_v1() + info.update(_get_cpu_info_from_sysinfo_v2()) + return info + +def _get_cpu_info_from_sysinfo_v1(): + ''' + Returns the CPU info gathered from sysinfo. + Returns {} if sysinfo is not found. + ''' + + g_trace.header('Tying to get info from sysinfo version 1 ...') + + try: + # Just return {} if there is no sysinfo + if not DataSource.has_sysinfo(): + g_trace.fail('Failed to find sysinfo. Skipping ...') + return {} + + # If sysinfo fails return {} + returncode, output = DataSource.sysinfo_cpu() + if output is None or returncode != 0: + g_trace.fail('Failed to run \"sysinfo -cpu\". Skipping ...') + return {} + + # Various fields + vendor_id = '' #_get_field(False, output, None, None, 'CPU #0: ') + processor_brand = output.split('CPU #0: "')[1].split('"\n')[0].strip() + cache_size = '' #_get_field(False, output, None, None, 'machdep.cpu.cache.size') + stepping = int(output.split(', stepping ')[1].split(',')[0].strip()) + model = int(output.split(', model ')[1].split(',')[0].strip()) + family = int(output.split(', family ')[1].split(',')[0].strip()) + + # Flags + flags = [] + for line in output.split('\n'): + if line.startswith('\t\t'): + for flag in line.strip().lower().split(): + flags.append(flag) + flags.sort() + + # Convert from GHz/MHz string to Hz + hz_advertised, scale = _parse_cpu_brand_string(processor_brand) + hz_actual = hz_advertised + + info = { + 'vendor_id_raw' : vendor_id, + 'brand_raw' : processor_brand, + + 'hz_advertised_friendly' : _hz_short_to_friendly(hz_advertised, scale), + 'hz_actual_friendly' : _hz_short_to_friendly(hz_actual, scale), + 'hz_advertised' : _hz_short_to_full(hz_advertised, scale), + 'hz_actual' : _hz_short_to_full(hz_actual, scale), + + 'l2_cache_size' : _to_friendly_bytes(cache_size), + + 'stepping' : stepping, + 'model' : model, + 'family' : family, + 'flags' : flags + } + + info = _filter_dict_keys_with_empty_values(info) + g_trace.success() + return info + except Exception as err: + g_trace.fail(err) + #raise # NOTE: To have this throw on error, uncomment this line + return {} + +def _get_cpu_info_from_sysinfo_v2(): + ''' + Returns the CPU info gathered from sysinfo. + Returns {} if sysinfo is not found. + ''' + + g_trace.header('Tying to get info from sysinfo version 2 ...') + + try: + # Just return {} if there is no sysinfo + if not DataSource.has_sysinfo(): + g_trace.fail('Failed to find sysinfo. Skipping ...') + return {} + + # If sysinfo fails return {} + returncode, output = DataSource.sysinfo_cpu() + if output is None or returncode != 0: + g_trace.fail('Failed to run \"sysinfo -cpu\". Skipping ...') + return {} + + # Various fields + vendor_id = '' #_get_field(False, output, None, None, 'CPU #0: ') + processor_brand = output.split('CPU #0: "')[1].split('"\n')[0].strip() + cache_size = '' #_get_field(False, output, None, None, 'machdep.cpu.cache.size') + signature = output.split('Signature:')[1].split('\n')[0].strip() + # + stepping = int(signature.split('stepping ')[1].split(',')[0].strip()) + model = int(signature.split('model ')[1].split(',')[0].strip()) + family = int(signature.split('family ')[1].split(',')[0].strip()) + + # Flags + def get_subsection_flags(output): + retval = [] + for line in output.split('\n')[1:]: + if not line.startswith(' ') and not line.startswith(' '): break + for entry in line.strip().lower().split(' '): + retval.append(entry) + return retval + + flags = get_subsection_flags(output.split('Features: ')[1]) + \ + get_subsection_flags(output.split('Extended Features (0x00000001): ')[1]) + \ + get_subsection_flags(output.split('Extended Features (0x80000001): ')[1]) + flags.sort() + + # Convert from GHz/MHz string to Hz + lines = [n for n in output.split('\n') if n] + raw_hz = lines[0].split('running at ')[1].strip().lower() + hz_advertised = raw_hz.rstrip('mhz').rstrip('ghz').strip() + hz_advertised = _to_decimal_string(hz_advertised) + hz_actual = hz_advertised + + scale = 0 + if raw_hz.endswith('mhz'): + scale = 6 + elif raw_hz.endswith('ghz'): + scale = 9 + + info = { + 'vendor_id_raw' : vendor_id, + 'brand_raw' : processor_brand, + + 'hz_advertised_friendly' : _hz_short_to_friendly(hz_advertised, scale), + 'hz_actual_friendly' : _hz_short_to_friendly(hz_actual, scale), + 'hz_advertised' : _hz_short_to_full(hz_advertised, scale), + 'hz_actual' : _hz_short_to_full(hz_actual, scale), + + 'l2_cache_size' : _to_friendly_bytes(cache_size), + + 'stepping' : stepping, + 'model' : model, + 'family' : family, + 'flags' : flags + } + + info = _filter_dict_keys_with_empty_values(info) + g_trace.success() + return info + except Exception as err: + g_trace.fail(err) + #raise # NOTE: To have this throw on error, uncomment this line + return {} + +def _get_cpu_info_from_wmic(): + ''' + Returns the CPU info gathered from WMI. + Returns {} if not on Windows, or wmic is not installed. + ''' + g_trace.header('Tying to get info from wmic ...') + + try: + # Just return {} if not Windows or there is no wmic + if not DataSource.is_windows or not DataSource.has_wmic(): + g_trace.fail('Failed to find WMIC, or not on Windows. Skipping ...') + return {} + + returncode, output = DataSource.wmic_cpu() + if output is None or returncode != 0: + g_trace.fail('Failed to run wmic. Skipping ...') + return {} + + # Break the list into key values pairs + value = output.split("\n") + value = [s.rstrip().split('=') for s in value if '=' in s] + value = {k: v for k, v in value if v} + + # Get the advertised MHz + processor_brand = value.get('Name') + hz_advertised, scale_advertised = _parse_cpu_brand_string(processor_brand) + + # Get the actual MHz + hz_actual = value.get('CurrentClockSpeed') + scale_actual = 6 + if hz_actual: + hz_actual = _to_decimal_string(hz_actual) + + # Get cache sizes + l2_cache_size = value.get('L2CacheSize') # NOTE: L2CacheSize is in kilobytes + if l2_cache_size: + l2_cache_size = int(l2_cache_size) * 1024 + + l3_cache_size = value.get('L3CacheSize') # NOTE: L3CacheSize is in kilobytes + if l3_cache_size: + l3_cache_size = int(l3_cache_size) * 1024 + + # Get family, model, and stepping + family, model, stepping = '', '', '' + description = value.get('Description') or value.get('Caption') + entries = description.split(' ') + + if 'Family' in entries and entries.index('Family') < len(entries)-1: + i = entries.index('Family') + family = int(entries[i + 1]) + + if 'Model' in entries and entries.index('Model') < len(entries)-1: + i = entries.index('Model') + model = int(entries[i + 1]) + + if 'Stepping' in entries and entries.index('Stepping') < len(entries)-1: + i = entries.index('Stepping') + stepping = int(entries[i + 1]) + + info = { + 'vendor_id_raw' : value.get('Manufacturer'), + 'brand_raw' : processor_brand, + + 'hz_advertised_friendly' : _hz_short_to_friendly(hz_advertised, scale_advertised), + 'hz_actual_friendly' : _hz_short_to_friendly(hz_actual, scale_actual), + 'hz_advertised' : _hz_short_to_full(hz_advertised, scale_advertised), + 'hz_actual' : _hz_short_to_full(hz_actual, scale_actual), + + 'l2_cache_size' : l2_cache_size, + 'l3_cache_size' : l3_cache_size, + + 'stepping' : stepping, + 'model' : model, + 'family' : family, + } + + info = _filter_dict_keys_with_empty_values(info) + g_trace.success() + return info + except Exception as err: + g_trace.fail(err) + #raise # NOTE: To have this throw on error, uncomment this line + return {} + +def _get_cpu_info_from_registry(): + ''' + Returns the CPU info gathered from the Windows Registry. + Returns {} if not on Windows. + ''' + + g_trace.header('Tying to get info from Windows registry ...') + + try: + # Just return {} if not on Windows + if not DataSource.is_windows: + g_trace.fail('Not running on Windows. Skipping ...') + return {} + + # Get the CPU name + processor_brand = DataSource.winreg_processor_brand().strip() + + # Get the CPU vendor id + vendor_id = DataSource.winreg_vendor_id_raw() + + # Get the CPU arch and bits + arch_string_raw = DataSource.winreg_arch_string_raw() + arch, bits = _parse_arch(arch_string_raw) + + # Get the actual CPU Hz + hz_actual = DataSource.winreg_hz_actual() + hz_actual = _to_decimal_string(hz_actual) + + # Get the advertised CPU Hz + hz_advertised, scale = _parse_cpu_brand_string(processor_brand) + + # If advertised hz not found, use the actual hz + if hz_advertised == '0.0': + scale = 6 + hz_advertised = _to_decimal_string(hz_actual) + + # Get the CPU features + feature_bits = DataSource.winreg_feature_bits() + + def is_set(bit): + mask = 0x80000000 >> bit + retval = mask & feature_bits > 0 + return retval + + # http://en.wikipedia.org/wiki/CPUID + # http://unix.stackexchange.com/questions/43539/what-do-the-flags-in-proc-cpuinfo-mean + # http://www.lohninger.com/helpcsuite/public_constants_cpuid.htm + flags = { + 'fpu' : is_set(0), # Floating Point Unit + 'vme' : is_set(1), # V86 Mode Extensions + 'de' : is_set(2), # Debug Extensions - I/O breakpoints supported + 'pse' : is_set(3), # Page Size Extensions (4 MB pages supported) + 'tsc' : is_set(4), # Time Stamp Counter and RDTSC instruction are available + 'msr' : is_set(5), # Model Specific Registers + 'pae' : is_set(6), # Physical Address Extensions (36 bit address, 2MB pages) + 'mce' : is_set(7), # Machine Check Exception supported + 'cx8' : is_set(8), # Compare Exchange Eight Byte instruction available + 'apic' : is_set(9), # Local APIC present (multiprocessor operation support) + 'sepamd' : is_set(10), # Fast system calls (AMD only) + 'sep' : is_set(11), # Fast system calls + 'mtrr' : is_set(12), # Memory Type Range Registers + 'pge' : is_set(13), # Page Global Enable + 'mca' : is_set(14), # Machine Check Architecture + 'cmov' : is_set(15), # Conditional MOVe instructions + 'pat' : is_set(16), # Page Attribute Table + 'pse36' : is_set(17), # 36 bit Page Size Extensions + 'serial' : is_set(18), # Processor Serial Number + 'clflush' : is_set(19), # Cache Flush + #'reserved1' : is_set(20), # reserved + 'dts' : is_set(21), # Debug Trace Store + 'acpi' : is_set(22), # ACPI support + 'mmx' : is_set(23), # MultiMedia Extensions + 'fxsr' : is_set(24), # FXSAVE and FXRSTOR instructions + 'sse' : is_set(25), # SSE instructions + 'sse2' : is_set(26), # SSE2 (WNI) instructions + 'ss' : is_set(27), # self snoop + #'reserved2' : is_set(28), # reserved + 'tm' : is_set(29), # Automatic clock control + 'ia64' : is_set(30), # IA64 instructions + '3dnow' : is_set(31) # 3DNow! instructions available + } + + # Get a list of only the flags that are true + flags = [k for k, v in flags.items() if v] + flags.sort() + + info = { + 'vendor_id_raw' : vendor_id, + 'brand_raw' : processor_brand, + + 'hz_advertised_friendly' : _hz_short_to_friendly(hz_advertised, scale), + 'hz_actual_friendly' : _hz_short_to_friendly(hz_actual, 6), + 'hz_advertised' : _hz_short_to_full(hz_advertised, scale), + 'hz_actual' : _hz_short_to_full(hz_actual, 6), + + 'flags' : flags + } + + info = _filter_dict_keys_with_empty_values(info) + g_trace.success() + return info + except Exception as err: + g_trace.fail(err) + return {} + +def _get_cpu_info_from_kstat(): + ''' + Returns the CPU info gathered from isainfo and kstat. + Returns {} if isainfo or kstat are not found. + ''' + + g_trace.header('Tying to get info from kstat ...') + + try: + # Just return {} if there is no isainfo or kstat + if not DataSource.has_isainfo() or not DataSource.has_kstat(): + g_trace.fail('Failed to find isinfo or kstat. Skipping ...') + return {} + + # If isainfo fails return {} + returncode, flag_output = DataSource.isainfo_vb() + if flag_output is None or returncode != 0: + g_trace.fail('Failed to run \"isainfo -vb\". Skipping ...') + return {} + + # If kstat fails return {} + returncode, kstat = DataSource.kstat_m_cpu_info() + if kstat is None or returncode != 0: + g_trace.fail('Failed to run \"kstat -m cpu_info\". Skipping ...') + return {} + + # Various fields + vendor_id = kstat.split('\tvendor_id ')[1].split('\n')[0].strip() + processor_brand = kstat.split('\tbrand ')[1].split('\n')[0].strip() + stepping = int(kstat.split('\tstepping ')[1].split('\n')[0].strip()) + model = int(kstat.split('\tmodel ')[1].split('\n')[0].strip()) + family = int(kstat.split('\tfamily ')[1].split('\n')[0].strip()) + + # Flags + flags = flag_output.strip().split('\n')[-1].strip().lower().split() + flags.sort() + + # Convert from GHz/MHz string to Hz + scale = 6 + hz_advertised = kstat.split('\tclock_MHz ')[1].split('\n')[0].strip() + hz_advertised = _to_decimal_string(hz_advertised) + + # Convert from GHz/MHz string to Hz + hz_actual = kstat.split('\tcurrent_clock_Hz ')[1].split('\n')[0].strip() + hz_actual = _to_decimal_string(hz_actual) + + info = { + 'vendor_id_raw' : vendor_id, + 'brand_raw' : processor_brand, + + 'hz_advertised_friendly' : _hz_short_to_friendly(hz_advertised, scale), + 'hz_actual_friendly' : _hz_short_to_friendly(hz_actual, 0), + 'hz_advertised' : _hz_short_to_full(hz_advertised, scale), + 'hz_actual' : _hz_short_to_full(hz_actual, 0), + + 'stepping' : stepping, + 'model' : model, + 'family' : family, + 'flags' : flags + } + + info = _filter_dict_keys_with_empty_values(info) + g_trace.success() + return info + except Exception as err: + g_trace.fail(err) + return {} + +def _get_cpu_info_from_platform_uname(): + + g_trace.header('Tying to get info from platform.uname ...') + + try: + uname = DataSource.uname_string_raw.split(',')[0] + + family, model, stepping = (None, None, None) + entries = uname.split(' ') + + if 'Family' in entries and entries.index('Family') < len(entries)-1: + i = entries.index('Family') + family = int(entries[i + 1]) + + if 'Model' in entries and entries.index('Model') < len(entries)-1: + i = entries.index('Model') + model = int(entries[i + 1]) + + if 'Stepping' in entries and entries.index('Stepping') < len(entries)-1: + i = entries.index('Stepping') + stepping = int(entries[i + 1]) + + info = { + 'family' : family, + 'model' : model, + 'stepping' : stepping + } + info = _filter_dict_keys_with_empty_values(info) + g_trace.success() + return info + except Exception as err: + g_trace.fail(err) + return {} + +def _get_cpu_info_internal(): + ''' + Returns the CPU info by using the best sources of information for your OS. + Returns {} if nothing is found. + ''' + + g_trace.write('!' * 80) + + # Get the CPU arch and bits + arch, bits = _parse_arch(DataSource.arch_string_raw) + + friendly_maxsize = { 2**31-1: '32 bit', 2**63-1: '64 bit' }.get(sys.maxsize) or 'unknown bits' + friendly_version = "{0}.{1}.{2}.{3}.{4}".format(*sys.version_info) + PYTHON_VERSION = "{0} ({1})".format(friendly_version, friendly_maxsize) + + info = { + 'python_version' : PYTHON_VERSION, + 'cpuinfo_version' : CPUINFO_VERSION, + 'cpuinfo_version_string' : CPUINFO_VERSION_STRING, + 'arch' : arch, + 'bits' : bits, + 'count' : DataSource.cpu_count, + 'arch_string_raw' : DataSource.arch_string_raw, + } + + g_trace.write("python_version: {0}".format(info['python_version'])) + g_trace.write("cpuinfo_version: {0}".format(info['cpuinfo_version'])) + g_trace.write("arch: {0}".format(info['arch'])) + g_trace.write("bits: {0}".format(info['bits'])) + g_trace.write("count: {0}".format(info['count'])) + g_trace.write("arch_string_raw: {0}".format(info['arch_string_raw'])) + + # Try the Windows wmic + _copy_new_fields(info, _get_cpu_info_from_wmic()) + + # Try the Windows registry + _copy_new_fields(info, _get_cpu_info_from_registry()) + + # Try /proc/cpuinfo + _copy_new_fields(info, _get_cpu_info_from_proc_cpuinfo()) + + # Try cpufreq-info + _copy_new_fields(info, _get_cpu_info_from_cpufreq_info()) + + # Try LSCPU + _copy_new_fields(info, _get_cpu_info_from_lscpu()) + + # Try sysctl + _copy_new_fields(info, _get_cpu_info_from_sysctl()) + + # Try kstat + _copy_new_fields(info, _get_cpu_info_from_kstat()) + + # Try dmesg + _copy_new_fields(info, _get_cpu_info_from_dmesg()) + + # Try /var/run/dmesg.boot + _copy_new_fields(info, _get_cpu_info_from_cat_var_run_dmesg_boot()) + + # Try lsprop ibm,pa-features + _copy_new_fields(info, _get_cpu_info_from_ibm_pa_features()) + + # Try sysinfo + _copy_new_fields(info, _get_cpu_info_from_sysinfo()) + + # Try querying the CPU cpuid register + # FIXME: This should print stdout and stderr to trace log + _copy_new_fields(info, _get_cpu_info_from_cpuid()) + + # Try platform.uname + _copy_new_fields(info, _get_cpu_info_from_platform_uname()) + + g_trace.write('!' * 80) + + return info + +def get_cpu_info_json(): + ''' + Returns the CPU info by using the best sources of information for your OS. + Returns the result in a json string + ''' + + import json + + output = None + + # If running under pyinstaller, run normally + if getattr(sys, 'frozen', False): + info = _get_cpu_info_internal() + output = json.dumps(info) + output = "{0}".format(output) + # if not running under pyinstaller, run in another process. + # This is done because multiprocesing has a design flaw that + # causes non main programs to run multiple times on Windows. + else: + from subprocess import Popen, PIPE + + command = [sys.executable, __file__, '--json'] + p1 = Popen(command, stdout=PIPE, stderr=PIPE, stdin=PIPE) + output = p1.communicate()[0] + + if p1.returncode != 0: + return "{}" + + output = output.decode(encoding='UTF-8') + + return output + +def get_cpu_info(): + ''' + Returns the CPU info by using the best sources of information for your OS. + Returns the result in a dict + ''' + + import json + + output = get_cpu_info_json() + + # Convert JSON to Python with non unicode strings + output = json.loads(output, object_hook = _utf_to_str) + + return output + +def main(): + from argparse import ArgumentParser + import json + + # Parse args + parser = ArgumentParser(description='Gets CPU info with pure Python') + parser.add_argument('--json', action='store_true', help='Return the info in JSON format') + parser.add_argument('--version', action='store_true', help='Return the version of py-cpuinfo') + parser.add_argument('--trace', action='store_true', help='Traces code paths used to find CPU info to file') + args = parser.parse_args() + + global g_trace + g_trace = Trace(args.trace, False) + + try: + _check_arch() + except Exception as err: + sys.stderr.write(str(err) + "\n") + sys.exit(1) + + info = _get_cpu_info_internal() + + if not info: + sys.stderr.write("Failed to find cpu info\n") + sys.exit(1) + + if args.json: + print(json.dumps(info)) + elif args.version: + print(CPUINFO_VERSION_STRING) + else: + print('Python Version: {0}'.format(info.get('python_version', ''))) + print('Cpuinfo Version: {0}'.format(info.get('cpuinfo_version_string', ''))) + print('Vendor ID Raw: {0}'.format(info.get('vendor_id_raw', ''))) + print('Hardware Raw: {0}'.format(info.get('hardware_raw', ''))) + print('Brand Raw: {0}'.format(info.get('brand_raw', ''))) + print('Hz Advertised Friendly: {0}'.format(info.get('hz_advertised_friendly', ''))) + print('Hz Actual Friendly: {0}'.format(info.get('hz_actual_friendly', ''))) + print('Hz Advertised: {0}'.format(info.get('hz_advertised', ''))) + print('Hz Actual: {0}'.format(info.get('hz_actual', ''))) + print('Arch: {0}'.format(info.get('arch', ''))) + print('Bits: {0}'.format(info.get('bits', ''))) + print('Count: {0}'.format(info.get('count', ''))) + print('Arch String Raw: {0}'.format(info.get('arch_string_raw', ''))) + print('L1 Data Cache Size: {0}'.format(info.get('l1_data_cache_size', ''))) + print('L1 Instruction Cache Size: {0}'.format(info.get('l1_instruction_cache_size', ''))) + print('L2 Cache Size: {0}'.format(info.get('l2_cache_size', ''))) + print('L2 Cache Line Size: {0}'.format(info.get('l2_cache_line_size', ''))) + print('L2 Cache Associativity: {0}'.format(info.get('l2_cache_associativity', ''))) + print('L3 Cache Size: {0}'.format(info.get('l3_cache_size', ''))) + print('Stepping: {0}'.format(info.get('stepping', ''))) + print('Model: {0}'.format(info.get('model', ''))) + print('Family: {0}'.format(info.get('family', ''))) + print('Processor Type: {0}'.format(info.get('processor_type', ''))) + print('Flags: {0}'.format(', '.join(info.get('flags', '')))) + + +if __name__ == '__main__': + main() +else: + g_trace = Trace(False, False) + _check_arch() diff --git a/venv/Lib/site-packages/include/libtcc.h b/venv/Lib/site-packages/include/libtcc.h new file mode 100644 index 0000000..71f088e --- /dev/null +++ b/venv/Lib/site-packages/include/libtcc.h @@ -0,0 +1,132 @@ +#ifndef LIBTCC_H +#define LIBTCC_H + +#ifndef LIBTCCAPI +# define LIBTCCAPI +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/*****************************/ +/* set custom allocator for all allocations (optional), NULL for default. */ +typedef void *TCCReallocFunc(void *ptr, unsigned long size); +LIBTCCAPI void tcc_set_realloc(TCCReallocFunc *my_realloc); + +/*****************************/ +typedef struct TCCState TCCState; + +/* create a new TCC compilation context */ +LIBTCCAPI TCCState *tcc_new(void); + +/* free a TCC compilation context */ +LIBTCCAPI void tcc_delete(TCCState *s); + +/* set CONFIG_TCCDIR at runtime */ +LIBTCCAPI void tcc_set_lib_path(TCCState *s, const char *path); + +/* set error/warning callback (optional) */ +typedef void TCCErrorFunc(void *opaque, const char *msg); +LIBTCCAPI void tcc_set_error_func(TCCState *s, void *error_opaque, TCCErrorFunc *error_func); + +/* set options as from command line (multiple supported) */ +LIBTCCAPI int tcc_set_options(TCCState *s, const char *str); + +/*****************************/ +/* preprocessor */ + +/* add include path */ +LIBTCCAPI int tcc_add_include_path(TCCState *s, const char *pathname); + +/* add in system include path */ +LIBTCCAPI int tcc_add_sysinclude_path(TCCState *s, const char *pathname); + +/* define preprocessor symbol 'sym'. value can be NULL, sym can be "sym=val" */ +LIBTCCAPI void tcc_define_symbol(TCCState *s, const char *sym, const char *value); + +/* undefine preprocess symbol 'sym' */ +LIBTCCAPI void tcc_undefine_symbol(TCCState *s, const char *sym); + +/*****************************/ +/* compiling */ + +/* add a file (C file, dll, object, library, ld script). Return -1 if error. */ +LIBTCCAPI int tcc_add_file(TCCState *s, const char *filename); + +/* compile a string containing a C source. Return -1 if error. */ +LIBTCCAPI int tcc_compile_string(TCCState *s, const char *buf); + +/* Tip: to have more specific errors/warnings from tcc_compile_string(), + you can prefix the string with "#line \"\"\n" */ + +/*****************************/ +/* linking commands */ + +/* set output type. MUST BE CALLED before any compilation */ +LIBTCCAPI int tcc_set_output_type(TCCState *s, int output_type); +#define TCC_OUTPUT_MEMORY 1 /* output will be run in memory */ +#define TCC_OUTPUT_EXE 2 /* executable file */ +#define TCC_OUTPUT_DLL 4 /* dynamic library */ +#define TCC_OUTPUT_OBJ 3 /* object file */ +#define TCC_OUTPUT_PREPROCESS 5 /* only preprocess */ + +/* equivalent to -Lpath option */ +LIBTCCAPI int tcc_add_library_path(TCCState *s, const char *pathname); + +/* the library name is the same as the argument of the '-l' option */ +LIBTCCAPI int tcc_add_library(TCCState *s, const char *libraryname); + +/* add a symbol to the compiled program */ +LIBTCCAPI int tcc_add_symbol(TCCState *s, const char *name, const void *val); + +/* output an executable, library or object file. DO NOT call + tcc_relocate() before. */ +LIBTCCAPI int tcc_output_file(TCCState *s, const char *filename); + +/* link and run main() function and return its value. DO NOT call + tcc_relocate() before. */ +LIBTCCAPI int tcc_run(TCCState *s, int argc, char **argv); + +/* do all relocations (needed before using tcc_get_symbol()) */ +LIBTCCAPI int tcc_relocate(TCCState *s1); + +/* return symbol value or NULL if not found */ +LIBTCCAPI void *tcc_get_symbol(TCCState *s, const char *name); + +/* list all (global) symbols and their values via 'symbol_cb()' */ +LIBTCCAPI void tcc_list_symbols(TCCState *s, void *ctx, + void (*symbol_cb)(void *ctx, const char *name, const void *val)); + +/* experimental/advanced section (see libtcc_test_mt.c for an example) */ + +/* catch runtime exceptions (optionally limit backtraces at top_func), + when using tcc_set_options("-bt") and when not using tcc_run() */ +LIBTCCAPI void *_tcc_setjmp(TCCState *s1, void *jmp_buf, void *top_func, void *longjmp); +#define tcc_setjmp(s1,jb,f) setjmp(_tcc_setjmp(s1, jb, f, longjmp)) + +/* debugging */ +/* For debugging to work you have to enable it with tcc_set_options */ + +/* compile a string containing a C source. Return -1 if error. + Write the string to file filename if debug is set. */ +LIBTCCAPI int tcc_compile_string_file(TCCState *s, const char *buf, const char *filename); + +/* Output object file. This must be done after tcc_relocate. + It only generates the file if debug is set. + The filename can be loaded with gdb command add-symbol-file */ +LIBTCCAPI int elf_output_obj(TCCState *s1, const char *filename); + +/* Set base address for wasm32 data/stack layout (default 1024). + Call before tcc_output_file(). Only meaningful for TCC_TARGET_WASM32. */ +LIBTCCAPI void tcc_set_wasm_data_base(TCCState *s, unsigned int base); + +/* custom error printer for runtime exceptions. Returning 0 stops backtrace */ +typedef int TCCBtFunc(void *udata, void *pc, const char *file, int line, const char* func, const char *msg); +LIBTCCAPI void tcc_set_backtrace_func(TCCState *s1, void* userdata, TCCBtFunc*); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/venv/Lib/site-packages/lib/tcc.dll b/venv/Lib/site-packages/lib/tcc.dll new file mode 100644 index 0000000..4864106 Binary files /dev/null and b/venv/Lib/site-packages/lib/tcc.dll differ diff --git a/venv/Lib/site-packages/msgpack-1.1.2.dist-info/INSTALLER b/venv/Lib/site-packages/msgpack-1.1.2.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/Lib/site-packages/msgpack-1.1.2.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/Lib/site-packages/msgpack-1.1.2.dist-info/METADATA b/venv/Lib/site-packages/msgpack-1.1.2.dist-info/METADATA new file mode 100644 index 0000000..d23f73b --- /dev/null +++ b/venv/Lib/site-packages/msgpack-1.1.2.dist-info/METADATA @@ -0,0 +1,265 @@ +Metadata-Version: 2.4 +Name: msgpack +Version: 1.1.2 +Summary: MessagePack serializer +Author-email: Inada Naoki +License-Expression: Apache-2.0 +Project-URL: Homepage, https://msgpack.org/ +Project-URL: Documentation, https://msgpack-python.readthedocs.io/ +Project-URL: Repository, https://github.com/msgpack/msgpack-python/ +Project-URL: Tracker, https://github.com/msgpack/msgpack-python/issues +Project-URL: Changelog, https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst +Keywords: msgpack,messagepack,serializer,serialization,binary +Classifier: Development Status :: 5 - Production/Stable +Classifier: Operating System :: OS Independent +Classifier: Topic :: File Formats +Classifier: Intended Audience :: Developers +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Requires-Python: >=3.9 +Description-Content-Type: text/markdown +License-File: COPYING +Dynamic: license-file + +# MessagePack for Python + +[![Build Status](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml/badge.svg)](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml) +[![Documentation Status](https://readthedocs.org/projects/msgpack-python/badge/?version=latest)](https://msgpack-python.readthedocs.io/en/latest/?badge=latest) + +## What is this? + +[MessagePack](https://msgpack.org/) is an efficient binary serialization format. +It lets you exchange data among multiple languages like JSON. +But it's faster and smaller. +This package provides CPython bindings for reading and writing MessagePack data. + +## Install + +``` +$ pip install msgpack +``` + +### Pure Python implementation + +The extension module in msgpack (`msgpack._cmsgpack`) does not support PyPy. + +But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy. + + +### Windows + +If you can't use a binary distribution, you need to install Visual Studio +or the Windows SDK on Windows. +Without the extension, the pure Python implementation on CPython runs slowly. + + +## How to use + +### One-shot pack & unpack + +Use `packb` for packing and `unpackb` for unpacking. +msgpack provides `dumps` and `loads` as aliases for compatibility with +`json` and `pickle`. + +`pack` and `dump` pack to a file-like object. +`unpack` and `load` unpack from a file-like object. + +```pycon +>>> import msgpack +>>> msgpack.packb([1, 2, 3]) +'\x93\x01\x02\x03' +>>> msgpack.unpackb(_) +[1, 2, 3] +``` + +Read the docstring for options. + + +### Streaming unpacking + +`Unpacker` is a "streaming unpacker". It unpacks multiple objects from one +stream (or from bytes provided through its `feed` method). + +```py +import msgpack +from io import BytesIO + +buf = BytesIO() +for i in range(100): + buf.write(msgpack.packb(i)) + +buf.seek(0) + +unpacker = msgpack.Unpacker(buf) +for unpacked in unpacker: + print(unpacked) +``` + + +### Packing/unpacking of custom data types + +It is also possible to pack/unpack custom data types. Here is an example for +`datetime.datetime`. + +```py +import datetime +import msgpack + +useful_dict = { + "id": 1, + "created": datetime.datetime.now(), +} + +def decode_datetime(obj): + if '__datetime__' in obj: + obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f") + return obj + +def encode_datetime(obj): + if isinstance(obj, datetime.datetime): + return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")} + return obj + + +packed_dict = msgpack.packb(useful_dict, default=encode_datetime) +this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) +``` + +`Unpacker`'s `object_hook` callback receives a dict; the +`object_pairs_hook` callback may instead be used to receive a list of +key-value pairs. + +NOTE: msgpack can encode datetime with tzinfo into standard ext type for now. +See `datetime` option in `Packer` docstring. + + +### Extended types + +It is also possible to pack/unpack custom data types using the **ext** type. + +```pycon +>>> import msgpack +>>> import array +>>> def default(obj): +... if isinstance(obj, array.array) and obj.typecode == 'd': +... return msgpack.ExtType(42, obj.tostring()) +... raise TypeError("Unknown type: %r" % (obj,)) +... +>>> def ext_hook(code, data): +... if code == 42: +... a = array.array('d') +... a.fromstring(data) +... return a +... return ExtType(code, data) +... +>>> data = array.array('d', [1.2, 3.4]) +>>> packed = msgpack.packb(data, default=default) +>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) +>>> data == unpacked +True +``` + + +### Advanced unpacking control + +As an alternative to iteration, `Unpacker` objects provide `unpack`, +`skip`, `read_array_header`, and `read_map_header` methods. The former two +read an entire message from the stream, respectively deserializing and returning +the result, or ignoring it. The latter two methods return the number of elements +in the upcoming container, so that each element in an array, or key-value pair +in a map, can be unpacked or skipped individually. + + +## Notes + +### String and binary types in the old MessagePack spec + +Early versions of msgpack didn't distinguish string and binary types. +The type for representing both string and binary types was named **raw**. + +You can pack into and unpack from this old spec using `use_bin_type=False` +and `raw=True` options. + +```pycon +>>> import msgpack +>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=False), raw=True) +[b'spam', b'eggs'] +>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=True), raw=False) +[b'spam', 'eggs'] +``` + +### ext type + +To use the **ext** type, pass a `msgpack.ExtType` object to the packer. + +```pycon +>>> import msgpack +>>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) +>>> msgpack.unpackb(packed) +ExtType(code=42, data='xyzzy') +``` + +You can use it with `default` and `ext_hook`. See below. + + +### Security + +When unpacking data received from an unreliable source, msgpack provides +two security options. + +`max_buffer_size` (default: `100*1024*1024`) limits the internal buffer size. +It is also used to limit preallocated list sizes. + +`strict_map_key` (default: `True`) limits the type of map keys to bytes and str. +While the MessagePack spec doesn't limit map key types, +there is a risk of a hash DoS. +If you need to support other types for map keys, use `strict_map_key=False`. + + +### Performance tips + +CPython's GC starts when the number of allocated objects grows. +This means unpacking may trigger unnecessary GC. +You can use `gc.disable()` when unpacking a large message. + +A list is the default sequence type in Python. +However, a tuple is lighter than a list. +You can use `use_list=False` while unpacking when performance is important. + + +## Major breaking changes in the history + +### msgpack 0.5 + +The package name on PyPI was changed from `msgpack-python` to `msgpack` in 0.5. + +When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before +`pip install -U msgpack`. + + +### msgpack 1.0 + +* Python 2 support + + * The extension module no longer supports Python 2. + The pure Python implementation (`msgpack.fallback`) is used for Python 2. + + * msgpack 1.0.6 drops official support of Python 2.7, as pip and + GitHub Action "setup-python" no longer supports Python 2.7. + +* Packer + + * Packer uses `use_bin_type=True` by default. + Bytes are encoded in the bin type in MessagePack. + * The `encoding` option is removed. UTF-8 is always used. + +* Unpacker + + * Unpacker uses `raw=False` by default. It assumes str values are valid UTF-8 strings + and decodes them to Python str (Unicode) objects. + * `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str). + * The default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attacks. + You need to pass `max_buffer_size=0` if you have large but safe data. + * The default value of `strict_map_key` is changed to True to avoid hash DoS. + You need to pass `strict_map_key=False` if you have data that contain map keys + whose type is neither bytes nor str. diff --git a/venv/Lib/site-packages/msgpack-1.1.2.dist-info/RECORD b/venv/Lib/site-packages/msgpack-1.1.2.dist-info/RECORD new file mode 100644 index 0000000..1d117b0 --- /dev/null +++ b/venv/Lib/site-packages/msgpack-1.1.2.dist-info/RECORD @@ -0,0 +1,15 @@ +msgpack-1.1.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +msgpack-1.1.2.dist-info/METADATA,sha256=AzsCYs3CsW_Ihmrc0TKSERKnM0C4ZRHj0obd6hZ7EWQ,8356 +msgpack-1.1.2.dist-info/RECORD,, +msgpack-1.1.2.dist-info/WHEEL,sha256=JLOMsP7F5qtkAkINx5UnzbFguf8CqZeraV8o04b0I8I,101 +msgpack-1.1.2.dist-info/licenses/COPYING,sha256=T73_QuukWTwW96fqcHiIOytzSHCh4rY2KEzi3OYr9Pc,628 +msgpack-1.1.2.dist-info/top_level.txt,sha256=2tykSY1pXdiA2xYTDR6jPw0qI5ZGxRihyhf4S5hZyXk,8 +msgpack/__init__.py,sha256=VyYtXI_OKFlyox4xlPRWvwU74d5ll6L6Oj01CJcxdqg,1164 +msgpack/__pycache__/__init__.cpython-311.pyc,, +msgpack/__pycache__/exceptions.cpython-311.pyc,, +msgpack/__pycache__/ext.cpython-311.pyc,, +msgpack/__pycache__/fallback.cpython-311.pyc,, +msgpack/_cmsgpack.cp311-win_amd64.pyd,sha256=7zWzgU8v1k7_LDlx4HhhNaKcEa6b2p-HBXH988Wr-BY,128000 +msgpack/exceptions.py,sha256=2fCtczricqQgdT3NtW6cTqmZn3WA7GQtmlPuT-NhLyM,1129 +msgpack/ext.py,sha256=9gDKxuEHfYWdPRzcpFwFYyuBx0puprlQflDGOaccRhE,5896 +msgpack/fallback.py,sha256=EAP6g9N7tTWvTw01RtnXXdYg-zZn21FcGVfdBid8aUg,33319 diff --git a/venv/Lib/site-packages/msgpack-1.1.2.dist-info/WHEEL b/venv/Lib/site-packages/msgpack-1.1.2.dist-info/WHEEL new file mode 100644 index 0000000..8f98e0a --- /dev/null +++ b/venv/Lib/site-packages/msgpack-1.1.2.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (80.9.0) +Root-Is-Purelib: false +Tag: cp311-cp311-win_amd64 + diff --git a/venv/Lib/site-packages/msgpack-1.1.2.dist-info/licenses/COPYING b/venv/Lib/site-packages/msgpack-1.1.2.dist-info/licenses/COPYING new file mode 100644 index 0000000..f067af3 --- /dev/null +++ b/venv/Lib/site-packages/msgpack-1.1.2.dist-info/licenses/COPYING @@ -0,0 +1,14 @@ +Copyright (C) 2008-2011 INADA Naoki + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/venv/Lib/site-packages/msgpack-1.1.2.dist-info/top_level.txt b/venv/Lib/site-packages/msgpack-1.1.2.dist-info/top_level.txt new file mode 100644 index 0000000..3aae276 --- /dev/null +++ b/venv/Lib/site-packages/msgpack-1.1.2.dist-info/top_level.txt @@ -0,0 +1 @@ +msgpack diff --git a/venv/Lib/site-packages/msgpack/__init__.py b/venv/Lib/site-packages/msgpack/__init__.py new file mode 100644 index 0000000..f3266b7 --- /dev/null +++ b/venv/Lib/site-packages/msgpack/__init__.py @@ -0,0 +1,55 @@ +# ruff: noqa: F401 +import os + +from .exceptions import * # noqa: F403 +from .ext import ExtType, Timestamp + +version = (1, 1, 2) +__version__ = "1.1.2" + + +if os.environ.get("MSGPACK_PUREPYTHON"): + from .fallback import Packer, Unpacker, unpackb +else: + try: + from ._cmsgpack import Packer, Unpacker, unpackb + except ImportError: + from .fallback import Packer, Unpacker, unpackb + + +def pack(o, stream, **kwargs): + """ + Pack object `o` and write it to `stream` + + See :class:`Packer` for options. + """ + packer = Packer(**kwargs) + stream.write(packer.pack(o)) + + +def packb(o, **kwargs): + """ + Pack object `o` and return packed bytes + + See :class:`Packer` for options. + """ + return Packer(**kwargs).pack(o) + + +def unpack(stream, **kwargs): + """ + Unpack an object from `stream`. + + Raises `ExtraData` when `stream` contains extra bytes. + See :class:`Unpacker` for options. + """ + data = stream.read() + return unpackb(data, **kwargs) + + +# alias for compatibility to simplejson/marshal/pickle. +load = unpack +loads = unpackb + +dump = pack +dumps = packb diff --git a/venv/Lib/site-packages/msgpack/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/msgpack/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..30e4663 Binary files /dev/null and b/venv/Lib/site-packages/msgpack/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/msgpack/__pycache__/exceptions.cpython-311.pyc b/venv/Lib/site-packages/msgpack/__pycache__/exceptions.cpython-311.pyc new file mode 100644 index 0000000..5b8f091 Binary files /dev/null and b/venv/Lib/site-packages/msgpack/__pycache__/exceptions.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/msgpack/__pycache__/ext.cpython-311.pyc b/venv/Lib/site-packages/msgpack/__pycache__/ext.cpython-311.pyc new file mode 100644 index 0000000..5076de2 Binary files /dev/null and b/venv/Lib/site-packages/msgpack/__pycache__/ext.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/msgpack/__pycache__/fallback.cpython-311.pyc b/venv/Lib/site-packages/msgpack/__pycache__/fallback.cpython-311.pyc new file mode 100644 index 0000000..8c958e5 Binary files /dev/null and b/venv/Lib/site-packages/msgpack/__pycache__/fallback.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/msgpack/_cmsgpack.cp311-win_amd64.pyd b/venv/Lib/site-packages/msgpack/_cmsgpack.cp311-win_amd64.pyd new file mode 100644 index 0000000..38131a3 Binary files /dev/null and b/venv/Lib/site-packages/msgpack/_cmsgpack.cp311-win_amd64.pyd differ diff --git a/venv/Lib/site-packages/msgpack/exceptions.py b/venv/Lib/site-packages/msgpack/exceptions.py new file mode 100644 index 0000000..d6d2615 --- /dev/null +++ b/venv/Lib/site-packages/msgpack/exceptions.py @@ -0,0 +1,48 @@ +class UnpackException(Exception): + """Base class for some exceptions raised while unpacking. + + NOTE: unpack may raise exception other than subclass of + UnpackException. If you want to catch all error, catch + Exception instead. + """ + + +class BufferFull(UnpackException): + pass + + +class OutOfData(UnpackException): + pass + + +class FormatError(ValueError, UnpackException): + """Invalid msgpack format""" + + +class StackError(ValueError, UnpackException): + """Too nested""" + + +# Deprecated. Use ValueError instead +UnpackValueError = ValueError + + +class ExtraData(UnpackValueError): + """ExtraData is raised when there is trailing data. + + This exception is raised while only one-shot (not streaming) + unpack. + """ + + def __init__(self, unpacked, extra): + self.unpacked = unpacked + self.extra = extra + + def __str__(self): + return "unpack(b) received extra data." + + +# Deprecated. Use Exception instead to catch all exception during packing. +PackException = Exception +PackValueError = ValueError +PackOverflowError = OverflowError diff --git a/venv/Lib/site-packages/msgpack/ext.py b/venv/Lib/site-packages/msgpack/ext.py new file mode 100644 index 0000000..9694819 --- /dev/null +++ b/venv/Lib/site-packages/msgpack/ext.py @@ -0,0 +1,170 @@ +import datetime +import struct +from collections import namedtuple + + +class ExtType(namedtuple("ExtType", "code data")): + """ExtType represents ext type in msgpack.""" + + def __new__(cls, code, data): + if not isinstance(code, int): + raise TypeError("code must be int") + if not isinstance(data, bytes): + raise TypeError("data must be bytes") + if not 0 <= code <= 127: + raise ValueError("code must be 0~127") + return super().__new__(cls, code, data) + + +class Timestamp: + """Timestamp represents the Timestamp extension type in msgpack. + + When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. + When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and + unpack `Timestamp`. + + This class is immutable: Do not override seconds and nanoseconds. + """ + + __slots__ = ["seconds", "nanoseconds"] + + def __init__(self, seconds, nanoseconds=0): + """Initialize a Timestamp object. + + :param int seconds: + Number of seconds since the UNIX epoch (00:00:00 UTC Jan 1 1970, minus leap seconds). + May be negative. + + :param int nanoseconds: + Number of nanoseconds to add to `seconds` to get fractional time. + Maximum is 999_999_999. Default is 0. + + Note: Negative times (before the UNIX epoch) are represented as neg. seconds + pos. ns. + """ + if not isinstance(seconds, int): + raise TypeError("seconds must be an integer") + if not isinstance(nanoseconds, int): + raise TypeError("nanoseconds must be an integer") + if not (0 <= nanoseconds < 10**9): + raise ValueError("nanoseconds must be a non-negative integer less than 999999999.") + self.seconds = seconds + self.nanoseconds = nanoseconds + + def __repr__(self): + """String representation of Timestamp.""" + return f"Timestamp(seconds={self.seconds}, nanoseconds={self.nanoseconds})" + + def __eq__(self, other): + """Check for equality with another Timestamp object""" + if type(other) is self.__class__: + return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds + return False + + def __ne__(self, other): + """not-equals method (see :func:`__eq__()`)""" + return not self.__eq__(other) + + def __hash__(self): + return hash((self.seconds, self.nanoseconds)) + + @staticmethod + def from_bytes(b): + """Unpack bytes into a `Timestamp` object. + + Used for pure-Python msgpack unpacking. + + :param b: Payload from msgpack ext message with code -1 + :type b: bytes + + :returns: Timestamp object unpacked from msgpack ext payload + :rtype: Timestamp + """ + if len(b) == 4: + seconds = struct.unpack("!L", b)[0] + nanoseconds = 0 + elif len(b) == 8: + data64 = struct.unpack("!Q", b)[0] + seconds = data64 & 0x00000003FFFFFFFF + nanoseconds = data64 >> 34 + elif len(b) == 12: + nanoseconds, seconds = struct.unpack("!Iq", b) + else: + raise ValueError( + "Timestamp type can only be created from 32, 64, or 96-bit byte objects" + ) + return Timestamp(seconds, nanoseconds) + + def to_bytes(self): + """Pack this Timestamp object into bytes. + + Used for pure-Python msgpack packing. + + :returns data: Payload for EXT message with code -1 (timestamp type) + :rtype: bytes + """ + if (self.seconds >> 34) == 0: # seconds is non-negative and fits in 34 bits + data64 = self.nanoseconds << 34 | self.seconds + if data64 & 0xFFFFFFFF00000000 == 0: + # nanoseconds is zero and seconds < 2**32, so timestamp 32 + data = struct.pack("!L", data64) + else: + # timestamp 64 + data = struct.pack("!Q", data64) + else: + # timestamp 96 + data = struct.pack("!Iq", self.nanoseconds, self.seconds) + return data + + @staticmethod + def from_unix(unix_sec): + """Create a Timestamp from posix timestamp in seconds. + + :param unix_float: Posix timestamp in seconds. + :type unix_float: int or float + """ + seconds = int(unix_sec // 1) + nanoseconds = int((unix_sec % 1) * 10**9) + return Timestamp(seconds, nanoseconds) + + def to_unix(self): + """Get the timestamp as a floating-point value. + + :returns: posix timestamp + :rtype: float + """ + return self.seconds + self.nanoseconds / 1e9 + + @staticmethod + def from_unix_nano(unix_ns): + """Create a Timestamp from posix timestamp in nanoseconds. + + :param int unix_ns: Posix timestamp in nanoseconds. + :rtype: Timestamp + """ + return Timestamp(*divmod(unix_ns, 10**9)) + + def to_unix_nano(self): + """Get the timestamp as a unixtime in nanoseconds. + + :returns: posix timestamp in nanoseconds + :rtype: int + """ + return self.seconds * 10**9 + self.nanoseconds + + def to_datetime(self): + """Get the timestamp as a UTC datetime. + + :rtype: `datetime.datetime` + """ + utc = datetime.timezone.utc + return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta( + seconds=self.seconds, microseconds=self.nanoseconds // 1000 + ) + + @staticmethod + def from_datetime(dt): + """Create a Timestamp from datetime with tzinfo. + + :rtype: Timestamp + """ + return Timestamp(seconds=int(dt.timestamp()), nanoseconds=dt.microsecond * 1000) diff --git a/venv/Lib/site-packages/msgpack/fallback.py b/venv/Lib/site-packages/msgpack/fallback.py new file mode 100644 index 0000000..b02e47c --- /dev/null +++ b/venv/Lib/site-packages/msgpack/fallback.py @@ -0,0 +1,929 @@ +"""Fallback pure Python implementation of msgpack""" + +import struct +import sys +from datetime import datetime as _DateTime + +if hasattr(sys, "pypy_version_info"): + from __pypy__ import newlist_hint + from __pypy__.builders import BytesBuilder + + _USING_STRINGBUILDER = True + + class BytesIO: + def __init__(self, s=b""): + if s: + self.builder = BytesBuilder(len(s)) + self.builder.append(s) + else: + self.builder = BytesBuilder() + + def write(self, s): + if isinstance(s, memoryview): + s = s.tobytes() + elif isinstance(s, bytearray): + s = bytes(s) + self.builder.append(s) + + def getvalue(self): + return self.builder.build() + +else: + from io import BytesIO + + _USING_STRINGBUILDER = False + + def newlist_hint(size): + return [] + + +from .exceptions import BufferFull, ExtraData, FormatError, OutOfData, StackError +from .ext import ExtType, Timestamp + +EX_SKIP = 0 +EX_CONSTRUCT = 1 +EX_READ_ARRAY_HEADER = 2 +EX_READ_MAP_HEADER = 3 + +TYPE_IMMEDIATE = 0 +TYPE_ARRAY = 1 +TYPE_MAP = 2 +TYPE_RAW = 3 +TYPE_BIN = 4 +TYPE_EXT = 5 + +DEFAULT_RECURSE_LIMIT = 511 + + +def _check_type_strict(obj, t, type=type, tuple=tuple): + if type(t) is tuple: + return type(obj) in t + else: + return type(obj) is t + + +def _get_data_from_buffer(obj): + view = memoryview(obj) + if view.itemsize != 1: + raise ValueError("cannot unpack from multi-byte object") + return view + + +def unpackb(packed, **kwargs): + """ + Unpack an object from `packed`. + + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``ValueError`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. + + See :class:`Unpacker` for options. + """ + unpacker = Unpacker(None, max_buffer_size=len(packed), **kwargs) + unpacker.feed(packed) + try: + ret = unpacker._unpack() + except OutOfData: + raise ValueError("Unpack failed: incomplete input") + except RecursionError: + raise StackError + if unpacker._got_extradata(): + raise ExtraData(ret, unpacker._get_extradata()) + return ret + + +_NO_FORMAT_USED = "" +_MSGPACK_HEADERS = { + 0xC4: (1, _NO_FORMAT_USED, TYPE_BIN), + 0xC5: (2, ">H", TYPE_BIN), + 0xC6: (4, ">I", TYPE_BIN), + 0xC7: (2, "Bb", TYPE_EXT), + 0xC8: (3, ">Hb", TYPE_EXT), + 0xC9: (5, ">Ib", TYPE_EXT), + 0xCA: (4, ">f"), + 0xCB: (8, ">d"), + 0xCC: (1, _NO_FORMAT_USED), + 0xCD: (2, ">H"), + 0xCE: (4, ">I"), + 0xCF: (8, ">Q"), + 0xD0: (1, "b"), + 0xD1: (2, ">h"), + 0xD2: (4, ">i"), + 0xD3: (8, ">q"), + 0xD4: (1, "b1s", TYPE_EXT), + 0xD5: (2, "b2s", TYPE_EXT), + 0xD6: (4, "b4s", TYPE_EXT), + 0xD7: (8, "b8s", TYPE_EXT), + 0xD8: (16, "b16s", TYPE_EXT), + 0xD9: (1, _NO_FORMAT_USED, TYPE_RAW), + 0xDA: (2, ">H", TYPE_RAW), + 0xDB: (4, ">I", TYPE_RAW), + 0xDC: (2, ">H", TYPE_ARRAY), + 0xDD: (4, ">I", TYPE_ARRAY), + 0xDE: (2, ">H", TYPE_MAP), + 0xDF: (4, ">I", TYPE_MAP), +} + + +class Unpacker: + """Streaming unpacker. + + Arguments: + + :param file_like: + File-like object having `.read(n)` method. + If specified, unpacker reads serialized data from it and `.feed()` is not usable. + + :param int read_size: + Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) + + :param bool use_list: + If true, unpack msgpack array to Python list. + Otherwise, unpack to Python tuple. (default: True) + + :param bool raw: + If true, unpack msgpack raw to Python bytes. + Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). + + :param int timestamp: + Control how timestamp type is unpacked: + + 0 - Timestamp + 1 - float (Seconds from the EPOCH) + 2 - int (Nanoseconds from the EPOCH) + 3 - datetime.datetime (UTC). + + :param bool strict_map_key: + If true (default), only str or bytes are accepted for map (dict) keys. + + :param object_hook: + When specified, it should be callable. + Unpacker calls it with a dict argument after unpacking msgpack map. + (See also simplejson) + + :param object_pairs_hook: + When specified, it should be callable. + Unpacker calls it with a list of key-value pairs after unpacking msgpack map. + (See also simplejson) + + :param str unicode_errors: + The error handler for decoding unicode. (default: 'strict') + This option should be used only when you have msgpack data which + contains invalid UTF-8 string. + + :param int max_buffer_size: + Limits size of data waiting unpacked. 0 means 2**32-1. + The default value is 100*1024*1024 (100MiB). + Raises `BufferFull` exception when it is insufficient. + You should set this parameter when unpacking data from untrusted source. + + :param int max_str_len: + Deprecated, use *max_buffer_size* instead. + Limits max length of str. (default: max_buffer_size) + + :param int max_bin_len: + Deprecated, use *max_buffer_size* instead. + Limits max length of bin. (default: max_buffer_size) + + :param int max_array_len: + Limits max length of array. + (default: max_buffer_size) + + :param int max_map_len: + Limits max length of map. + (default: max_buffer_size//2) + + :param int max_ext_len: + Deprecated, use *max_buffer_size* instead. + Limits max size of ext type. (default: max_buffer_size) + + Example of streaming deserialize from file-like object:: + + unpacker = Unpacker(file_like) + for o in unpacker: + process(o) + + Example of streaming deserialize from socket:: + + unpacker = Unpacker() + while True: + buf = sock.recv(1024**2) + if not buf: + break + unpacker.feed(buf) + for o in unpacker: + process(o) + + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``OutOfData`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. + """ + + def __init__( + self, + file_like=None, + *, + read_size=0, + use_list=True, + raw=False, + timestamp=0, + strict_map_key=True, + object_hook=None, + object_pairs_hook=None, + list_hook=None, + unicode_errors=None, + max_buffer_size=100 * 1024 * 1024, + ext_hook=ExtType, + max_str_len=-1, + max_bin_len=-1, + max_array_len=-1, + max_map_len=-1, + max_ext_len=-1, + ): + if unicode_errors is None: + unicode_errors = "strict" + + if file_like is None: + self._feeding = True + else: + if not callable(file_like.read): + raise TypeError("`file_like.read` must be callable") + self.file_like = file_like + self._feeding = False + + #: array of bytes fed. + self._buffer = bytearray() + #: Which position we currently reads + self._buff_i = 0 + + # When Unpacker is used as an iterable, between the calls to next(), + # the buffer is not "consumed" completely, for efficiency sake. + # Instead, it is done sloppily. To make sure we raise BufferFull at + # the correct moments, we have to keep track of how sloppy we were. + # Furthermore, when the buffer is incomplete (that is: in the case + # we raise an OutOfData) we need to rollback the buffer to the correct + # state, which _buf_checkpoint records. + self._buf_checkpoint = 0 + + if not max_buffer_size: + max_buffer_size = 2**31 - 1 + if max_str_len == -1: + max_str_len = max_buffer_size + if max_bin_len == -1: + max_bin_len = max_buffer_size + if max_array_len == -1: + max_array_len = max_buffer_size + if max_map_len == -1: + max_map_len = max_buffer_size // 2 + if max_ext_len == -1: + max_ext_len = max_buffer_size + + self._max_buffer_size = max_buffer_size + if read_size > self._max_buffer_size: + raise ValueError("read_size must be smaller than max_buffer_size") + self._read_size = read_size or min(self._max_buffer_size, 16 * 1024) + self._raw = bool(raw) + self._strict_map_key = bool(strict_map_key) + self._unicode_errors = unicode_errors + self._use_list = use_list + if not (0 <= timestamp <= 3): + raise ValueError("timestamp must be 0..3") + self._timestamp = timestamp + self._list_hook = list_hook + self._object_hook = object_hook + self._object_pairs_hook = object_pairs_hook + self._ext_hook = ext_hook + self._max_str_len = max_str_len + self._max_bin_len = max_bin_len + self._max_array_len = max_array_len + self._max_map_len = max_map_len + self._max_ext_len = max_ext_len + self._stream_offset = 0 + + if list_hook is not None and not callable(list_hook): + raise TypeError("`list_hook` is not callable") + if object_hook is not None and not callable(object_hook): + raise TypeError("`object_hook` is not callable") + if object_pairs_hook is not None and not callable(object_pairs_hook): + raise TypeError("`object_pairs_hook` is not callable") + if object_hook is not None and object_pairs_hook is not None: + raise TypeError("object_pairs_hook and object_hook are mutually exclusive") + if not callable(ext_hook): + raise TypeError("`ext_hook` is not callable") + + def feed(self, next_bytes): + assert self._feeding + view = _get_data_from_buffer(next_bytes) + if len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size: + raise BufferFull + + # Strip buffer before checkpoint before reading file. + if self._buf_checkpoint > 0: + del self._buffer[: self._buf_checkpoint] + self._buff_i -= self._buf_checkpoint + self._buf_checkpoint = 0 + + # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython + self._buffer.extend(view) + view.release() + + def _consume(self): + """Gets rid of the used parts of the buffer.""" + self._stream_offset += self._buff_i - self._buf_checkpoint + self._buf_checkpoint = self._buff_i + + def _got_extradata(self): + return self._buff_i < len(self._buffer) + + def _get_extradata(self): + return self._buffer[self._buff_i :] + + def read_bytes(self, n): + ret = self._read(n, raise_outofdata=False) + self._consume() + return ret + + def _read(self, n, raise_outofdata=True): + # (int) -> bytearray + self._reserve(n, raise_outofdata=raise_outofdata) + i = self._buff_i + ret = self._buffer[i : i + n] + self._buff_i = i + len(ret) + return ret + + def _reserve(self, n, raise_outofdata=True): + remain_bytes = len(self._buffer) - self._buff_i - n + + # Fast path: buffer has n bytes already + if remain_bytes >= 0: + return + + if self._feeding: + self._buff_i = self._buf_checkpoint + raise OutOfData + + # Strip buffer before checkpoint before reading file. + if self._buf_checkpoint > 0: + del self._buffer[: self._buf_checkpoint] + self._buff_i -= self._buf_checkpoint + self._buf_checkpoint = 0 + + # Read from file + remain_bytes = -remain_bytes + if remain_bytes + len(self._buffer) > self._max_buffer_size: + raise BufferFull + while remain_bytes > 0: + to_read_bytes = max(self._read_size, remain_bytes) + read_data = self.file_like.read(to_read_bytes) + if not read_data: + break + assert isinstance(read_data, bytes) + self._buffer += read_data + remain_bytes -= len(read_data) + + if len(self._buffer) < n + self._buff_i and raise_outofdata: + self._buff_i = 0 # rollback + raise OutOfData + + def _read_header(self): + typ = TYPE_IMMEDIATE + n = 0 + obj = None + self._reserve(1) + b = self._buffer[self._buff_i] + self._buff_i += 1 + if b & 0b10000000 == 0: + obj = b + elif b & 0b11100000 == 0b11100000: + obj = -1 - (b ^ 0xFF) + elif b & 0b11100000 == 0b10100000: + n = b & 0b00011111 + typ = TYPE_RAW + if n > self._max_str_len: + raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") + obj = self._read(n) + elif b & 0b11110000 == 0b10010000: + n = b & 0b00001111 + typ = TYPE_ARRAY + if n > self._max_array_len: + raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") + elif b & 0b11110000 == 0b10000000: + n = b & 0b00001111 + typ = TYPE_MAP + if n > self._max_map_len: + raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") + elif b == 0xC0: + obj = None + elif b == 0xC2: + obj = False + elif b == 0xC3: + obj = True + elif 0xC4 <= b <= 0xC6: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] + else: + n = self._buffer[self._buff_i] + self._buff_i += size + if n > self._max_bin_len: + raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})") + obj = self._read(n) + elif 0xC7 <= b <= 0xC9: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + L, n = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size + if L > self._max_ext_len: + raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})") + obj = self._read(L) + elif 0xCA <= b <= 0xD3: + size, fmt = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] + else: + obj = self._buffer[self._buff_i] + self._buff_i += size + elif 0xD4 <= b <= 0xD8: + size, fmt, typ = _MSGPACK_HEADERS[b] + if self._max_ext_len < size: + raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})") + self._reserve(size + 1) + n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size + 1 + elif 0xD9 <= b <= 0xDB: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) + else: + n = self._buffer[self._buff_i] + self._buff_i += size + if n > self._max_str_len: + raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") + obj = self._read(n) + elif 0xDC <= b <= 0xDD: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size + if n > self._max_array_len: + raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") + elif 0xDE <= b <= 0xDF: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size + if n > self._max_map_len: + raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") + else: + raise FormatError("Unknown header: 0x%x" % b) + return typ, n, obj + + def _unpack(self, execute=EX_CONSTRUCT): + typ, n, obj = self._read_header() + + if execute == EX_READ_ARRAY_HEADER: + if typ != TYPE_ARRAY: + raise ValueError("Expected array") + return n + if execute == EX_READ_MAP_HEADER: + if typ != TYPE_MAP: + raise ValueError("Expected map") + return n + # TODO should we eliminate the recursion? + if typ == TYPE_ARRAY: + if execute == EX_SKIP: + for i in range(n): + # TODO check whether we need to call `list_hook` + self._unpack(EX_SKIP) + return + ret = newlist_hint(n) + for i in range(n): + ret.append(self._unpack(EX_CONSTRUCT)) + if self._list_hook is not None: + ret = self._list_hook(ret) + # TODO is the interaction between `list_hook` and `use_list` ok? + return ret if self._use_list else tuple(ret) + if typ == TYPE_MAP: + if execute == EX_SKIP: + for i in range(n): + # TODO check whether we need to call hooks + self._unpack(EX_SKIP) + self._unpack(EX_SKIP) + return + if self._object_pairs_hook is not None: + ret = self._object_pairs_hook( + (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n) + ) + else: + ret = {} + for _ in range(n): + key = self._unpack(EX_CONSTRUCT) + if self._strict_map_key and type(key) not in (str, bytes): + raise ValueError("%s is not allowed for map key" % str(type(key))) + if isinstance(key, str): + key = sys.intern(key) + ret[key] = self._unpack(EX_CONSTRUCT) + if self._object_hook is not None: + ret = self._object_hook(ret) + return ret + if execute == EX_SKIP: + return + if typ == TYPE_RAW: + if self._raw: + obj = bytes(obj) + else: + obj = obj.decode("utf_8", self._unicode_errors) + return obj + if typ == TYPE_BIN: + return bytes(obj) + if typ == TYPE_EXT: + if n == -1: # timestamp + ts = Timestamp.from_bytes(bytes(obj)) + if self._timestamp == 1: + return ts.to_unix() + elif self._timestamp == 2: + return ts.to_unix_nano() + elif self._timestamp == 3: + return ts.to_datetime() + else: + return ts + else: + return self._ext_hook(n, bytes(obj)) + assert typ == TYPE_IMMEDIATE + return obj + + def __iter__(self): + return self + + def __next__(self): + try: + ret = self._unpack(EX_CONSTRUCT) + self._consume() + return ret + except OutOfData: + self._consume() + raise StopIteration + except RecursionError: + raise StackError + + next = __next__ + + def skip(self): + self._unpack(EX_SKIP) + self._consume() + + def unpack(self): + try: + ret = self._unpack(EX_CONSTRUCT) + except RecursionError: + raise StackError + self._consume() + return ret + + def read_array_header(self): + ret = self._unpack(EX_READ_ARRAY_HEADER) + self._consume() + return ret + + def read_map_header(self): + ret = self._unpack(EX_READ_MAP_HEADER) + self._consume() + return ret + + def tell(self): + return self._stream_offset + + +class Packer: + """ + MessagePack Packer + + Usage:: + + packer = Packer() + astream.write(packer.pack(a)) + astream.write(packer.pack(b)) + + Packer's constructor has some keyword arguments: + + :param default: + When specified, it should be callable. + Convert user type to builtin type that Packer supports. + See also simplejson's document. + + :param bool use_single_float: + Use single precision float type for float. (default: False) + + :param bool autoreset: + Reset buffer after each pack and return its content as `bytes`. (default: True). + If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. + + :param bool use_bin_type: + Use bin type introduced in msgpack spec 2.0 for bytes. + It also enables str8 type for unicode. (default: True) + + :param bool strict_types: + If set to true, types will be checked to be exact. Derived classes + from serializable types will not be serialized and will be + treated as unsupported type and forwarded to default. + Additionally tuples will not be serialized as lists. + This is useful when trying to implement accurate serialization + for python types. + + :param bool datetime: + If set to true, datetime with tzinfo is packed into Timestamp type. + Note that the tzinfo is stripped in the timestamp. + You can get UTC datetime with `timestamp=3` option of the Unpacker. + + :param str unicode_errors: + The error handler for encoding unicode. (default: 'strict') + DO NOT USE THIS!! This option is kept for very specific usage. + + :param int buf_size: + Internal buffer size. This option is used only for C implementation. + """ + + def __init__( + self, + *, + default=None, + use_single_float=False, + autoreset=True, + use_bin_type=True, + strict_types=False, + datetime=False, + unicode_errors=None, + buf_size=None, + ): + self._strict_types = strict_types + self._use_float = use_single_float + self._autoreset = autoreset + self._use_bin_type = use_bin_type + self._buffer = BytesIO() + self._datetime = bool(datetime) + self._unicode_errors = unicode_errors or "strict" + if default is not None and not callable(default): + raise TypeError("default must be callable") + self._default = default + + def _pack( + self, + obj, + nest_limit=DEFAULT_RECURSE_LIMIT, + check=isinstance, + check_type_strict=_check_type_strict, + ): + default_used = False + if self._strict_types: + check = check_type_strict + list_types = list + else: + list_types = (list, tuple) + while True: + if nest_limit < 0: + raise ValueError("recursion limit exceeded") + if obj is None: + return self._buffer.write(b"\xc0") + if check(obj, bool): + if obj: + return self._buffer.write(b"\xc3") + return self._buffer.write(b"\xc2") + if check(obj, int): + if 0 <= obj < 0x80: + return self._buffer.write(struct.pack("B", obj)) + if -0x20 <= obj < 0: + return self._buffer.write(struct.pack("b", obj)) + if 0x80 <= obj <= 0xFF: + return self._buffer.write(struct.pack("BB", 0xCC, obj)) + if -0x80 <= obj < 0: + return self._buffer.write(struct.pack(">Bb", 0xD0, obj)) + if 0xFF < obj <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xCD, obj)) + if -0x8000 <= obj < -0x80: + return self._buffer.write(struct.pack(">Bh", 0xD1, obj)) + if 0xFFFF < obj <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xCE, obj)) + if -0x80000000 <= obj < -0x8000: + return self._buffer.write(struct.pack(">Bi", 0xD2, obj)) + if 0xFFFFFFFF < obj <= 0xFFFFFFFFFFFFFFFF: + return self._buffer.write(struct.pack(">BQ", 0xCF, obj)) + if -0x8000000000000000 <= obj < -0x80000000: + return self._buffer.write(struct.pack(">Bq", 0xD3, obj)) + if not default_used and self._default is not None: + obj = self._default(obj) + default_used = True + continue + raise OverflowError("Integer value out of range") + if check(obj, (bytes, bytearray)): + n = len(obj) + if n >= 2**32: + raise ValueError("%s is too large" % type(obj).__name__) + self._pack_bin_header(n) + return self._buffer.write(obj) + if check(obj, str): + obj = obj.encode("utf-8", self._unicode_errors) + n = len(obj) + if n >= 2**32: + raise ValueError("String is too large") + self._pack_raw_header(n) + return self._buffer.write(obj) + if check(obj, memoryview): + n = obj.nbytes + if n >= 2**32: + raise ValueError("Memoryview is too large") + self._pack_bin_header(n) + return self._buffer.write(obj) + if check(obj, float): + if self._use_float: + return self._buffer.write(struct.pack(">Bf", 0xCA, obj)) + return self._buffer.write(struct.pack(">Bd", 0xCB, obj)) + if check(obj, (ExtType, Timestamp)): + if check(obj, Timestamp): + code = -1 + data = obj.to_bytes() + else: + code = obj.code + data = obj.data + assert isinstance(code, int) + assert isinstance(data, bytes) + L = len(data) + if L == 1: + self._buffer.write(b"\xd4") + elif L == 2: + self._buffer.write(b"\xd5") + elif L == 4: + self._buffer.write(b"\xd6") + elif L == 8: + self._buffer.write(b"\xd7") + elif L == 16: + self._buffer.write(b"\xd8") + elif L <= 0xFF: + self._buffer.write(struct.pack(">BB", 0xC7, L)) + elif L <= 0xFFFF: + self._buffer.write(struct.pack(">BH", 0xC8, L)) + else: + self._buffer.write(struct.pack(">BI", 0xC9, L)) + self._buffer.write(struct.pack("b", code)) + self._buffer.write(data) + return + if check(obj, list_types): + n = len(obj) + self._pack_array_header(n) + for i in range(n): + self._pack(obj[i], nest_limit - 1) + return + if check(obj, dict): + return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1) + + if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None: + obj = Timestamp.from_datetime(obj) + default_used = 1 + continue + + if not default_used and self._default is not None: + obj = self._default(obj) + default_used = 1 + continue + + if self._datetime and check(obj, _DateTime): + raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None") + + raise TypeError(f"Cannot serialize {obj!r}") + + def pack(self, obj): + try: + self._pack(obj) + except: + self._buffer = BytesIO() # force reset + raise + if self._autoreset: + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret + + def pack_map_pairs(self, pairs): + self._pack_map_pairs(len(pairs), pairs) + if self._autoreset: + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret + + def pack_array_header(self, n): + if n >= 2**32: + raise ValueError + self._pack_array_header(n) + if self._autoreset: + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret + + def pack_map_header(self, n): + if n >= 2**32: + raise ValueError + self._pack_map_header(n) + if self._autoreset: + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret + + def pack_ext_type(self, typecode, data): + if not isinstance(typecode, int): + raise TypeError("typecode must have int type.") + if not 0 <= typecode <= 127: + raise ValueError("typecode should be 0-127") + if not isinstance(data, bytes): + raise TypeError("data must have bytes type") + L = len(data) + if L > 0xFFFFFFFF: + raise ValueError("Too large data") + if L == 1: + self._buffer.write(b"\xd4") + elif L == 2: + self._buffer.write(b"\xd5") + elif L == 4: + self._buffer.write(b"\xd6") + elif L == 8: + self._buffer.write(b"\xd7") + elif L == 16: + self._buffer.write(b"\xd8") + elif L <= 0xFF: + self._buffer.write(b"\xc7" + struct.pack("B", L)) + elif L <= 0xFFFF: + self._buffer.write(b"\xc8" + struct.pack(">H", L)) + else: + self._buffer.write(b"\xc9" + struct.pack(">I", L)) + self._buffer.write(struct.pack("B", typecode)) + self._buffer.write(data) + + def _pack_array_header(self, n): + if n <= 0x0F: + return self._buffer.write(struct.pack("B", 0x90 + n)) + if n <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xDC, n)) + if n <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xDD, n)) + raise ValueError("Array is too large") + + def _pack_map_header(self, n): + if n <= 0x0F: + return self._buffer.write(struct.pack("B", 0x80 + n)) + if n <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xDE, n)) + if n <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xDF, n)) + raise ValueError("Dict is too large") + + def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): + self._pack_map_header(n) + for k, v in pairs: + self._pack(k, nest_limit - 1) + self._pack(v, nest_limit - 1) + + def _pack_raw_header(self, n): + if n <= 0x1F: + self._buffer.write(struct.pack("B", 0xA0 + n)) + elif self._use_bin_type and n <= 0xFF: + self._buffer.write(struct.pack(">BB", 0xD9, n)) + elif n <= 0xFFFF: + self._buffer.write(struct.pack(">BH", 0xDA, n)) + elif n <= 0xFFFFFFFF: + self._buffer.write(struct.pack(">BI", 0xDB, n)) + else: + raise ValueError("Raw is too large") + + def _pack_bin_header(self, n): + if not self._use_bin_type: + return self._pack_raw_header(n) + elif n <= 0xFF: + return self._buffer.write(struct.pack(">BB", 0xC4, n)) + elif n <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xC5, n)) + elif n <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xC6, n)) + else: + raise ValueError("Bin is too large") + + def bytes(self): + """Return internal buffer contents as bytes object""" + return self._buffer.getvalue() + + def reset(self): + """Reset internal buffer. + + This method is useful only when autoreset=False. + """ + self._buffer = BytesIO() + + def getbuffer(self): + """Return view of internal buffer.""" + if _USING_STRINGBUILDER: + return memoryview(self.bytes()) + else: + return self._buffer.getbuffer() diff --git a/venv/Lib/site-packages/ndindex-1.10.1.dist-info/INSTALLER b/venv/Lib/site-packages/ndindex-1.10.1.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/Lib/site-packages/ndindex-1.10.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/Lib/site-packages/ndindex-1.10.1.dist-info/METADATA b/venv/Lib/site-packages/ndindex-1.10.1.dist-info/METADATA new file mode 100644 index 0000000..3d18b6e --- /dev/null +++ b/venv/Lib/site-packages/ndindex-1.10.1.dist-info/METADATA @@ -0,0 +1,118 @@ +Metadata-Version: 2.4 +Name: ndindex +Version: 1.10.1 +Summary: A Python library for manipulating indices of ndarrays. +Home-page: https://quansight-labs.github.io/ndindex/ +Author: Quansight Labs +License: MIT +Classifier: Programming Language :: Python :: 3 +Classifier: Operating System :: OS Independent +Requires-Python: >=3.9 +Description-Content-Type: text/markdown +License-File: LICENSE +Provides-Extra: arrays +Requires-Dist: numpy; extra == "arrays" +Dynamic: author +Dynamic: classifier +Dynamic: description +Dynamic: description-content-type +Dynamic: home-page +Dynamic: license +Dynamic: license-file +Dynamic: provides-extra +Dynamic: requires-python +Dynamic: summary + +# ndindex + +![ndindex logo](docs/_static/ndindex_logo_white_bg.svg) + +A Python library for manipulating indices of ndarrays. + +The documentation for ndindex can be found at https://quansight-labs.github.io/ndindex/ + +ndindex is a library that allows representing and manipulating objects that +can be valid indices to numpy arrays, i.e., slices, integers, ellipses, +None, integer and boolean arrays, and tuples thereof. The goals of the library +are + +- Provide a uniform API to manipulate these objects. Unlike the standard index + objects themselves like `slice`, `int`, and `tuple`, which do not share any + methods in common related to being indices, ndindex classes can all be + manipulated uniformly. For example, `idx.args` always gives the arguments + used to construct `idx`. + +- Give 100% correct semantics as defined by numpy's ndarray. This means that + ndindex will not make a transformation on an index object unless it is + correct for all possible input array shapes. The only exception to this rule + is that ndindex assumes that any given index will not raise IndexError (for + instance, from an out of bounds integer index or from too few dimensions). + For those operations where the array shape is known, there is a `reduce()` + method to reduce an index to a simpler index that is equivalent for the + given shape. + +- Enable useful transformation and manipulation functions on index objects. + +## Examples + +**Canonicalize a slice (over a given shape, or independent of array shape)** + + +```py +>>> from ndindex import * +>>> Slice(-2, 10, 3).reduce() +Slice(-2, 10, 2) +>>> Slice(-2, 10, 3).reduce(5) +Slice(3, 4, 1) +``` + +**Compute the maximum length of a sliced axis** + + +```py +>>> import numpy as np +>>> len(Slice(2, 10, 3)) +3 +>>> len(np.arange(10)[2:10:3]) +3 +``` + +**Compute the shape of an array of shape `(10, 20)` indexed by `[0, 0:10]`** + +```py +>>> Tuple(0, slice(0, 10)).newshape((10, 20)) +(10,) +>>> np.ones((10, 20))[0, 0:10].shape +(10,) +``` + +**Check if an indexed array would be empty** + +```py +>>> Tuple(0, ..., Slice(10, 20)).isempty((3, 4, 5)) +True +>>> np.ones((3, 4, 5))[0,...,10:20] +array([], shape=(4, 0), dtype=float64) +``` + +See the [documentation](https://quansight-labs.github.io/ndindex/) for full details +on what ndindex can do. + +## License + +[MIT License](LICENSE) + +## Acknowledgments + +ndindex development is supported by [Quansight +Labs](https://labs.quansight.org/) and is sponsored in part by [the D. E. +Shaw group](https://www.deshaw.com/). The D. E. Shaw group collaborates with +Quansight on numerous open source projects, including Numba, Dask and Project +Jupyter. + +

+https://labs.quansight.org/ +https://www.deshaw.com +

diff --git a/venv/Lib/site-packages/ndindex-1.10.1.dist-info/RECORD b/venv/Lib/site-packages/ndindex-1.10.1.dist-info/RECORD new file mode 100644 index 0000000..71d28a3 --- /dev/null +++ b/venv/Lib/site-packages/ndindex-1.10.1.dist-info/RECORD @@ -0,0 +1,82 @@ +ndindex-1.10.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +ndindex-1.10.1.dist-info/METADATA,sha256=vU6ZwpdeRUpvssKvCRQe7dw-VOeLizxoLg9Vvr3N5N8,3711 +ndindex-1.10.1.dist-info/RECORD,, +ndindex-1.10.1.dist-info/WHEEL,sha256=JLOMsP7F5qtkAkINx5UnzbFguf8CqZeraV8o04b0I8I,101 +ndindex-1.10.1.dist-info/licenses/LICENSE,sha256=sUQkhYdiW4ukiLf9axzXOCimml6tZhskG1aqswgivOg,1092 +ndindex-1.10.1.dist-info/top_level.txt,sha256=qup-k5ro1-vnLHmVvs26Fa9DsfLa5agfgGvPfB2o2qQ,8 +ndindex/__init__.py,sha256=lxsWBxwWMQKtiawL9FK29WwASW1zhnJfhYtxhbw2tZE,826 +ndindex/__pycache__/__init__.cpython-311.pyc,, +ndindex/__pycache__/_crt.cpython-311.pyc,, +ndindex/__pycache__/_version.cpython-311.pyc,, +ndindex/__pycache__/array.cpython-311.pyc,, +ndindex/__pycache__/booleanarray.cpython-311.pyc,, +ndindex/__pycache__/chunking.cpython-311.pyc,, +ndindex/__pycache__/ellipsis.cpython-311.pyc,, +ndindex/__pycache__/integer.cpython-311.pyc,, +ndindex/__pycache__/integerarray.cpython-311.pyc,, +ndindex/__pycache__/ndindex.cpython-311.pyc,, +ndindex/__pycache__/newaxis.cpython-311.pyc,, +ndindex/__pycache__/shapetools.cpython-311.pyc,, +ndindex/__pycache__/slice.cpython-311.pyc,, +ndindex/__pycache__/subindex_helpers.cpython-311.pyc,, +ndindex/__pycache__/tuple.cpython-311.pyc,, +ndindex/_crt.py,sha256=A0oSXZpgaBaZ5D8QPE2Wpnpl3w7snlAFHEtRUou-IHE,7884 +ndindex/_slice.cp311-win_amd64.pyd,sha256=wZ3e8ufDDz9L_fncL4kldIwsha4Q-Z6fa3IfnHcyuEc,60416 +ndindex/_tuple.cp311-win_amd64.pyd,sha256=QM_vNNpV6SbK6XKvI0jlkxsBCowx6q66bn0kblnzTKI,78336 +ndindex/_version.py,sha256=hxCRB5SzWsG8kEsSsvTDulDzRo0iwfvDjKAcCYua0-U,519 +ndindex/array.py,sha256=atIWIm3CpLn337k09iJe3r83Kd-CnNwSO0KU2QfKwh8,6472 +ndindex/booleanarray.py,sha256=frOj0NPElcOsfuQDVzTGUgIECMvjIru2MrxeW4Yc_KY,7730 +ndindex/chunking.py,sha256=boi5XuBmALNbhCltr2P0JxVyW6_nFGnGBN54kcyxG_E,17234 +ndindex/ellipsis.py,sha256=nvg-2ybr6eiLMFPHDx0aQ_lOpDPh6RsWZdYNArBNA0w,3225 +ndindex/integer.py,sha256=67ty4f2n-XQIj0nDR1mh4LZWc_me2jkLv94c-iHEFEY,6332 +ndindex/integerarray.py,sha256=diJqpwypT2b-s5mp0jZ_okmYdM60WV8d07WbH7F-_v4,7549 +ndindex/ndindex.py,sha256=lrEJuSGTnlvQ6ny4xnHT5-MAjcMPr5osAORkoAaZNwY,26588 +ndindex/newaxis.py,sha256=t26taZm7TPqrKQxVqyfnbXHY4-j-rdCIdUJeHMKulQs,2924 +ndindex/shapetools.py,sha256=fqOqSmNYYDcJssp0Vk1xS2mtto-kXgCao1rsgxH2iIc,19574 +ndindex/slice.py,sha256=C9orokxXpyLl_K_i1O1i5MJJK0xxJLq0B5yoe_plDl4,21938 +ndindex/subindex_helpers.py,sha256=qihAJHtfxxmQwv6ASERmqx-wPYvcjCeCm1c20IczhZY,2957 +ndindex/tests/__init__.py,sha256=ldqROJigrG69B9fjUF3o2GmZQMe_XxFTmU-zBDgaZdw,3142 +ndindex/tests/__pycache__/__init__.cpython-311.pyc,, +ndindex/tests/__pycache__/doctest.cpython-311.pyc,, +ndindex/tests/__pycache__/helpers.cpython-311.pyc,, +ndindex/tests/__pycache__/test_array.cpython-311.pyc,, +ndindex/tests/__pycache__/test_as_subindex.cpython-311.pyc,, +ndindex/tests/__pycache__/test_booleanarray.cpython-311.pyc,, +ndindex/tests/__pycache__/test_broadcast_arrays.cpython-311.pyc,, +ndindex/tests/__pycache__/test_chunking.cpython-311.pyc,, +ndindex/tests/__pycache__/test_crt.cpython-311.pyc,, +ndindex/tests/__pycache__/test_ellipsis.cpython-311.pyc,, +ndindex/tests/__pycache__/test_expand.cpython-311.pyc,, +ndindex/tests/__pycache__/test_integer.cpython-311.pyc,, +ndindex/tests/__pycache__/test_integerarray.cpython-311.pyc,, +ndindex/tests/__pycache__/test_isvalid.cpython-311.pyc,, +ndindex/tests/__pycache__/test_ndindex.cpython-311.pyc,, +ndindex/tests/__pycache__/test_newaxis.cpython-311.pyc,, +ndindex/tests/__pycache__/test_newshape.cpython-311.pyc,, +ndindex/tests/__pycache__/test_no_dependencies.cpython-311.pyc,, +ndindex/tests/__pycache__/test_selected_indices.cpython-311.pyc,, +ndindex/tests/__pycache__/test_shapetools.cpython-311.pyc,, +ndindex/tests/__pycache__/test_slice.cpython-311.pyc,, +ndindex/tests/__pycache__/test_tuple.cpython-311.pyc,, +ndindex/tests/doctest.py,sha256=j1MHdKUQOUQPra0ga1w3BC2uqSxQOHKihestxjf5628,3355 +ndindex/tests/helpers.py,sha256=_34pzr_pcj67uFWf6ek_pEnfiu4VopvIlRPu5ss61XQ,22160 +ndindex/tests/test_array.py,sha256=OLGlQ5dpaWA_gcbS_HPufviWVdKLwBzJg2Po1TMqLP8,1091 +ndindex/tests/test_as_subindex.py,sha256=akmvpvpH83niPFT0IaKTM7nr39HfTZqYFZxkVfFYg7M,4238 +ndindex/tests/test_booleanarray.py,sha256=8qesHI3XfXpI3AX9qQoFh8JbqKiBWmgYrVoC9ylooxQ,4192 +ndindex/tests/test_broadcast_arrays.py,sha256=9gKm2ok_Co4Zx0j0eu9b60jd3Bq-hVCcIoQMysxEP5U,3368 +ndindex/tests/test_chunking.py,sha256=CFQzJq5FjwRMDfwMMR-nYsNm0AFvrPz1QCHeD7W0xvg,9673 +ndindex/tests/test_crt.py,sha256=-3MRuIp5YxGl6k01VTNBHi8OBp8x630riHcSmo-EZuY,1318 +ndindex/tests/test_ellipsis.py,sha256=Z2Pvb8JT_CSiuSlj78bfckn9mO-OQEY-ntrLJg3kwKg,2559 +ndindex/tests/test_expand.py,sha256=kaunymRwMFVjP3EX1G6tZ-I7TWPjJ9IeSCBZ6zFfaJw,3135 +ndindex/tests/test_integer.py,sha256=u2QCMP3gQ1BoinlcZNKJqLw-yWEKo2PcDQYitmkI_K4,7994 +ndindex/tests/test_integerarray.py,sha256=1K3ktw-h3IwT_NK2zZ2CvaO_I0xQeAQAPcNtHOGK32s,4941 +ndindex/tests/test_isvalid.py,sha256=sz7cPTRFKe2xP7-Hv2vbgpR4_ikRc0z7QvbglIci-sM,1543 +ndindex/tests/test_ndindex.py,sha256=qp-hV9y_7WvHQywxRKmkO7nx4eJ5Qo_q24CxaEozxjQ,5821 +ndindex/tests/test_newaxis.py,sha256=ailqMZH4tI81-GehbZvFuVp23EaoemxVzBjYL01aMw4,2387 +ndindex/tests/test_newshape.py,sha256=sdgeyLkq2W7V43PqVvzaV49tjCed9-phKONihzy6GlI,1728 +ndindex/tests/test_no_dependencies.py,sha256=1bXHaT_BMnnKvbTOzEIdyYxw7tft2m3Wdm-K9a_FiIY,6289 +ndindex/tests/test_selected_indices.py,sha256=unh4j_cSi-ZAnateKJy0zfSyyFM0zxhSUOn2he-hLDo,1384 +ndindex/tests/test_shapetools.py,sha256=pUfTwPQDZfzsZSrJ3T_YAlCaX1cwHYDCE0dUGwry04g,24003 +ndindex/tests/test_slice.py,sha256=pl2v2XO54dTLO3AViSgetDAP5Ngu0hFFZ_QVPoBokRs,17215 +ndindex/tests/test_tuple.py,sha256=z2x8PqyDNq2q91mlSgNi5KRP4pmtMV1BpfbqW4N3xmU,9980 +ndindex/tuple.py,sha256=exwTEScfbMQOpehqPvAk_QBcIPzOhn_Ri7qQIe4z7QY,30325 diff --git a/venv/Lib/site-packages/ndindex-1.10.1.dist-info/WHEEL b/venv/Lib/site-packages/ndindex-1.10.1.dist-info/WHEEL new file mode 100644 index 0000000..8f98e0a --- /dev/null +++ b/venv/Lib/site-packages/ndindex-1.10.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (80.9.0) +Root-Is-Purelib: false +Tag: cp311-cp311-win_amd64 + diff --git a/venv/Lib/site-packages/ndindex-1.10.1.dist-info/licenses/LICENSE b/venv/Lib/site-packages/ndindex-1.10.1.dist-info/licenses/LICENSE new file mode 100644 index 0000000..54e6076 --- /dev/null +++ b/venv/Lib/site-packages/ndindex-1.10.1.dist-info/licenses/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Quansight Labs + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/venv/Lib/site-packages/ndindex-1.10.1.dist-info/top_level.txt b/venv/Lib/site-packages/ndindex-1.10.1.dist-info/top_level.txt new file mode 100644 index 0000000..dec666d --- /dev/null +++ b/venv/Lib/site-packages/ndindex-1.10.1.dist-info/top_level.txt @@ -0,0 +1 @@ +ndindex diff --git a/venv/Lib/site-packages/ndindex/__init__.py b/venv/Lib/site-packages/ndindex/__init__.py new file mode 100644 index 0000000..debe401 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/__init__.py @@ -0,0 +1,45 @@ +__all__ = [] + +from .ndindex import ndindex + +__all__ += ['ndindex'] + +from .shapetools import broadcast_shapes, iter_indices, AxisError, BroadcastError + +__all__ += ['broadcast_shapes', 'iter_indices', 'AxisError', 'BroadcastError'] + +from .slice import Slice + +__all__ += ['Slice'] + +from .integer import Integer + +__all__ += ['Integer'] + +from .tuple import Tuple + +__all__ += ['Tuple'] + +from .ellipsis import ellipsis + +__all__ += ['ellipsis'] + +from .newaxis import Newaxis + +__all__ += ['Newaxis'] + +from .integerarray import IntegerArray + +__all__ += ['IntegerArray'] + +from .booleanarray import BooleanArray + +__all__ += ['BooleanArray'] + +from .chunking import ChunkSize + +__all__ += ['ChunkSize'] + +from ._version import get_versions +__version__ = get_versions()['version'] +del get_versions diff --git a/venv/Lib/site-packages/ndindex/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..ef392be Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/_crt.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/_crt.cpython-311.pyc new file mode 100644 index 0000000..de329a0 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/_crt.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/_version.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/_version.cpython-311.pyc new file mode 100644 index 0000000..3b7364b Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/_version.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/array.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/array.cpython-311.pyc new file mode 100644 index 0000000..1968ed8 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/array.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/booleanarray.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/booleanarray.cpython-311.pyc new file mode 100644 index 0000000..16a4ff8 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/booleanarray.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/chunking.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/chunking.cpython-311.pyc new file mode 100644 index 0000000..90ee42a Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/chunking.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/ellipsis.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/ellipsis.cpython-311.pyc new file mode 100644 index 0000000..b3d1e9b Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/ellipsis.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/integer.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/integer.cpython-311.pyc new file mode 100644 index 0000000..22e0cae Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/integer.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/integerarray.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/integerarray.cpython-311.pyc new file mode 100644 index 0000000..906ede2 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/integerarray.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/ndindex.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/ndindex.cpython-311.pyc new file mode 100644 index 0000000..25ac339 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/ndindex.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/newaxis.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/newaxis.cpython-311.pyc new file mode 100644 index 0000000..1d85b3e Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/newaxis.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/shapetools.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/shapetools.cpython-311.pyc new file mode 100644 index 0000000..107dfda Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/shapetools.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/slice.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/slice.cpython-311.pyc new file mode 100644 index 0000000..51708a2 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/slice.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/subindex_helpers.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/subindex_helpers.cpython-311.pyc new file mode 100644 index 0000000..02ff26d Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/subindex_helpers.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/__pycache__/tuple.cpython-311.pyc b/venv/Lib/site-packages/ndindex/__pycache__/tuple.cpython-311.pyc new file mode 100644 index 0000000..a365066 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/__pycache__/tuple.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/_crt.py b/venv/Lib/site-packages/ndindex/_crt.py new file mode 100644 index 0000000..ef91420 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/_crt.py @@ -0,0 +1,274 @@ +""" +Implementation of the Chinese Remainder Theorem. + +The code in this file is taken from SymPy (see the license below). We could +(and used to) just import sympy.ntheory.modular.crt, but SymPy is a very heavy +dependency for a rather basic algorithm. + +Note: although many of the helper functions from SymPy are copied here, these +functions should not be used by code outside of ndindex. If you want to use +one of these algorithms, you should import them from SymPy (note that the +functions here have had their functionality stripped down from the SymPy +versions). + +License +------- + +Copyright (c) 2006-2021 SymPy Development Team + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of SymPy nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. +""" + +from math import gcd, prod + +def gcdex(a, b): + """Returns x, y, g such that g = x*a + y*b = gcd(a, b). + + Examples + ======== + + >>> from ndindex._crt import gcdex + >>> gcdex(2, 3) + (-1, 1, 1) + >>> gcdex(10, 12) + (-1, 1, 2) + + >>> x, y, g = gcdex(100, 2004) + >>> x, y, g + (-20, 1, 4) + >>> x*100 + y*2004 + 4 + + """ + if (not a) and (not b): + return (0, 1, 0) + + if not a: + return (0, b//abs(b), abs(b)) + if not b: + return (a//abs(a), 0, abs(a)) + + if a < 0: + a, x_sign = -a, -1 + else: + x_sign = 1 + + if b < 0: + b, y_sign = -b, -1 + else: + y_sign = 1 + + x, y, r, s = 1, 0, 0, 1 + + while b: + (c, q) = (a % b, a // b) + (a, b, r, s, x, y) = (b, c, x - q*r, y - q*s, r, s) + + return (x*x_sign, y*y_sign, a) + +def _crt(U, M): + """ + Chinese Remainder Theorem. + + Given a set of integer residues ``u_0,...,u_n`` and a set of + co-prime integer moduli ``m_0,...,m_n``, returns an integer + ``u``, such that ``u = u_i mod m_i`` for ``i = ``0,...,n``. + + Examples + ======== + + Consider a set of residues ``U = [49, 76, 65]`` + and a set of moduli ``M = [99, 97, 95]``. Then we have:: + + >>> from ndindex._crt import _crt + + >>> _crt([49, 76, 65], [99, 97, 95]) + 639985 + + This is the correct result because:: + + >>> [639985 % m for m in [99, 97, 95]] + [49, 76, 65] + + Note: this is a low-level routine with no error checking. + """ + p = prod(M) + v = 0 + + for u, m in zip(U, M): + e = p // m + s, _, _ = gcdex(e, m) + v += e*(u*s % m) + + return v % p + + +def solve_congruence(*remainder_modulus_pairs): + """Compute the integer ``n`` that has the residual ``ai`` when it is + divided by ``mi`` where the ``ai`` and ``mi`` are given as pairs to + this function: ((a1, m1), (a2, m2), ...). If there is no solution, + return None. Otherwise return ``n`` and its modulus. + + The ``mi`` values need not be co-prime. + + Examples + ======== + + >>> from ndindex._crt import solve_congruence + + What number is 2 mod 3, 3 mod 5 and 2 mod 7? + + >>> solve_congruence((2, 3), (3, 5), (2, 7)) + 23 + >>> [23 % m for m in [3, 5, 7]] + [2, 3, 2] + + If you prefer to work with all remainder in one list and + all moduli in another, send the arguments like this: + + >>> solve_congruence(*zip((2, 3, 2), (3, 5, 7))) + 23 + + The moduli need not be co-prime; in this case there may or + may not be a solution: + + >>> solve_congruence((2, 3), (4, 6)) is None + True + + >>> solve_congruence((2, 3), (5, 6)) + 5 + + """ + def combine(c1, c2): + """Return the tuple (a, m) which satisfies the requirement + that n = a + i*m satisfy n = a1 + j*m1 and n = a2 = k*m2. + + References + ========== + + .. [1] https://en.wikipedia.org/wiki/Method_of_successive_substitution + """ + a1, m1 = c1 + a2, m2 = c2 + a, b, c = m1, a2 - a1, m2 + g = gcd(a, b, c) + a, b, c = [i//g for i in [a, b, c]] + if a != 1: + inv_a, _, g = gcdex(a, c) + if g != 1: + return None + b *= inv_a + a, m = a1 + m1*b, m1*c + return a, m + + rm = remainder_modulus_pairs + + rv = (0, 1) + for rmi in rm: + rv = combine(rv, rmi) + if rv is None: + break + n, m = rv + n = n % m + else: + return n + +def crt(m, v, check=True): + r"""Chinese Remainder Theorem. + + The moduli in m are assumed to be pairwise coprime. The output + is then an integer f, such that f = v_i mod m_i for each pair out + of v and m. + + If the moduli are not co-prime the correct result will be returned + if/when the test of the result is found to be incorrect. This result + will be None if there is no solution. + + The keyword ``check`` can be set to False if it is known that the moduli + are coprime. + + Examples + ======== + + As an example consider a set of residues ``U = [49, 76, 65]`` + and a set of moduli ``M = [99, 97, 95]``. Then we have:: + + >>> from ndindex._crt import crt + + >>> crt([99, 97, 95], [49, 76, 65]) + 639985 + + This is the correct result because:: + + >>> [639985 % m for m in [99, 97, 95]] + [49, 76, 65] + + If the moduli are not co-prime, you may receive an incorrect result + if you use ``check=False``: + + >>> crt([12, 6, 17], [3, 4, 2], check=False) + 954 + >>> [954 % m for m in [12, 6, 17]] + [6, 0, 2] + >>> crt([12, 6, 17], [3, 4, 2]) is None + True + >>> crt([3, 6], [2, 5]) + 5 + + Note: the order of gf_crt's arguments is reversed relative to crt, + and that solve_congruence takes residue, modulus pairs. + + Programmer's note: rather than checking that all pairs of moduli share + no GCD (an O(n**2) test) and rather than factoring all moduli and seeing + that there is no factor in common, a check that the result gives the + indicated residuals is performed -- an O(n) operation. + """ + result = _crt(v, m) + + if check: + if not all(v % m == result % m for v, m in zip(v, m)): + result = solve_congruence(*list(zip(v, m))) + + return result + +def ilcm(a, b): + """Computes integer least common multiple. + + Examples + ======== + + >>> from ndindex._crt import ilcm + >>> ilcm(5, 10) + 10 + >>> ilcm(7, 3) + 21 + + """ + if 0 in [a, b]: + return 0 + return a // gcd(a, b) * b # since gcd(a,b) | a diff --git a/venv/Lib/site-packages/ndindex/_slice.cp311-win_amd64.pyd b/venv/Lib/site-packages/ndindex/_slice.cp311-win_amd64.pyd new file mode 100644 index 0000000..aa8ebca Binary files /dev/null and b/venv/Lib/site-packages/ndindex/_slice.cp311-win_amd64.pyd differ diff --git a/venv/Lib/site-packages/ndindex/_tuple.cp311-win_amd64.pyd b/venv/Lib/site-packages/ndindex/_tuple.cp311-win_amd64.pyd new file mode 100644 index 0000000..f2fe971 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/_tuple.cp311-win_amd64.pyd differ diff --git a/venv/Lib/site-packages/ndindex/_version.py b/venv/Lib/site-packages/ndindex/_version.py new file mode 100644 index 0000000..5acfb5a --- /dev/null +++ b/venv/Lib/site-packages/ndindex/_version.py @@ -0,0 +1,21 @@ + +# This file was generated by 'versioneer.py' (0.18) from +# revision-control system data, or from the parent directory name of an +# unpacked source archive. Distribution tarballs contain a pre-generated copy +# of this file. + +import json + +version_json = ''' +{ + "date": "2025-11-19T11:27:34-0700", + "dirty": false, + "error": null, + "full-revisionid": "db26ea1658494de6a30d29df5c1fb638c0a982c2", + "version": "1.10.1" +} +''' # END VERSION_JSON + + +def get_versions(): + return json.loads(version_json) diff --git a/venv/Lib/site-packages/ndindex/array.py b/venv/Lib/site-packages/ndindex/array.py new file mode 100644 index 0000000..358d313 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/array.py @@ -0,0 +1,180 @@ +import warnings + +from .ndindex import NDIndex +from .shapetools import asshape + +class ArrayIndex(NDIndex): + """ + Superclass for array indices + + This class should not be instantiated directly. Rather, use one of its + subclasses, :class:`~.IntegerArray` or :class:`~.BooleanArray`. + + To subclass this, define the `dtype` attribute, as well as all the usual + ndindex methods. + """ + __slots__ = () + + # Subclasses should redefine this + dtype = None + + def _typecheck(self, idx, shape=None, _copy=True): + try: + from numpy import ndarray, asarray, integer, bool_, empty, intp + except ImportError: # pragma: no cover + raise ImportError("NumPy must be installed to create array indices") + try: + from numpy import VisibleDeprecationWarning + except ImportError: # pragma: no cover + from numpy.exceptions import VisibleDeprecationWarning + + if self.dtype is None: + raise TypeError("Do not instantiate the superclass ArrayIndex directly") + + if shape is not None: + if idx != []: + raise ValueError("The shape argument is only allowed for empty arrays (idx=[])") + shape = asshape(shape) + if 0 not in shape: + raise ValueError("The shape argument must be an empty shape") + idx = empty(shape, dtype=self.dtype) + + if isinstance(idx, (list, ndarray, bool, integer, int, bool_)): + # Ignore deprecation warnings for things like [1, []]. These will be + # filtered out anyway since they produce object arrays. + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', + category=VisibleDeprecationWarning, + message='Creating an ndarray from ragged nested sequences') + a = asarray(idx) + if a is idx and _copy: + a = a.copy() + if isinstance(idx, list) and 0 in a.shape: + if not _copy: + raise ValueError("_copy=False is not allowed with list input") + a = a.astype(self.dtype) + if self.dtype == intp and issubclass(a.dtype.type, integer): + if a.dtype != self.dtype: + if not _copy: + raise ValueError("If _copy=False, the input array dtype must already be intp") + a = a.astype(self.dtype) + if a.dtype != self.dtype: + raise TypeError(f"The input array to {self.__class__.__name__} must have dtype {self.dtype.__name__}, not {a.dtype}") + a.flags.writeable = False + return (a,) + raise TypeError(f"{self.__class__.__name__} must be created with an array with dtype {self.dtype.__name__}") + + # These will allow array == ArrayIndex to give True or False instead of + # returning an array. + __array_ufunc__ = None + def __array_function__(self, func, types, args, kwargs): + return NotImplemented + + def __array__(self, **kwargs): + raise TypeError(f"Cannot convert {self.__class__.__name__} to an array. Use .array instead.") + + + @property + def raw(self): + return self.args[0] + + @property + def array(self): + """ + Return the NumPy array of self. + + This is the same as `self.args[0]`. + + >>> from ndindex import IntegerArray, BooleanArray + >>> IntegerArray([0, 1]).array + array([0, 1]) + >>> BooleanArray([False, True]).array + array([False, True]) + + """ + return self.args[0] + + @property + def shape(self): + """ + Return the shape of the array of self. + + This is the same as `self.array.shape`. Note that this is **not** the + same as the shape of an array that is indexed by `self`. Use + :meth:`~.NDIndex.newshape` to get that. + + >>> from ndindex import IntegerArray, BooleanArray + >>> IntegerArray([[0], [1]]).shape + (2, 1) + >>> BooleanArray([[False], [True]]).shape + (2, 1) + + """ + return self.array.shape + + @property + def ndim(self): + """ + Return the number of dimensions of the array of self. + + This is the same as `self.array.ndim`. Note that this is **not** the + same as the number of dimensions of an array that is indexed by + `self`. Use `len` on :meth:`~.NDIndex.newshape` to get that. + + >>> from ndindex import IntegerArray, BooleanArray + >>> IntegerArray([[0], [1]]).ndim + 2 + >>> BooleanArray([[False], [True]]).ndim + 2 + + """ + return self.array.ndim + + @property + def size(self): + """ + Return the number of elements of the array of self. + + This is the same as `self.array.size`. Note that this is **not** the + same as the number of elements of an array that is indexed by `self`. + Use `np.prod` on :meth:`~.NDIndex.newshape` to get that. + + >>> from ndindex import IntegerArray, BooleanArray + >>> IntegerArray([[0], [1]]).size + 2 + >>> BooleanArray([[False], [True]]).size + 2 + + """ + return self.array.size + + # The repr form recreates the object. The str form gives the truncated + # array string and is explicitly non-valid Python (doesn't have commas). + def __repr__(self): + if 0 not in self.shape: + arg = repr(self.array.tolist()) + else: + arg = f"[], shape={self.shape}" + return f"{self.__class__.__name__}({arg})" + + def __str__(self): + from numpy import array2string + + return (self.__class__.__name__ + + "(" + + array2string(self.array).replace('\n', '') + + ")") + + def __hash__(self): + return hash(self.array.tobytes()) + + def isvalid(self, shape, _axis=0): + shape = asshape(shape) + try: + # The logic is in _raise_indexerror because the error message uses + # the additional information that is computed when checking if the + # array is valid. + self._raise_indexerror(shape, _axis) + except IndexError: + return False + return True diff --git a/venv/Lib/site-packages/ndindex/booleanarray.py b/venv/Lib/site-packages/ndindex/booleanarray.py new file mode 100644 index 0000000..0cf27de --- /dev/null +++ b/venv/Lib/site-packages/ndindex/booleanarray.py @@ -0,0 +1,203 @@ +from .array import ArrayIndex +from .shapetools import asshape + +class BooleanArray(ArrayIndex): + """ + Represents a boolean array index (also known as a mask). + + If `idx` is an n-dimensional boolean array with shape `s = (s1, ..., sn)` + and `a` is an array of shape `s = (s1, ..., sn, ..., sm)`, `a[idx]` + replaces the first `n` dimensions of `a` with a single dimensions of size + `np.nonzero(idx)`, where each entry is included if the corresponding + element of `idx` is True. The axes in the index shape should match the + corresponding axes in the array shape or be 0, or the index produces + IndexError. + + The typical way of creating a mask is to use boolean operations on an + array, then index the array with that. For example, if `a` is an array of + integers, `a[a > 0]` will produces a flat array of the elements of `a` + that are positive. + + Some important things to note about boolean array index semantics: + + 1. A boolean array index will remove as many dimensions as the index has, + and replace them with a single flat dimension which is the size of the + number of `True` elements in the index. + + 2. A boolean array index `idx` works the same as the integer array index + `np.nonzero(idx)`. In particular, the elements of the index are always + iterated in row-major, C-style order. This does not apply to + 0-dimensional boolean indices. + + 3. A 0-dimensional boolean index (i.e., just the scalar `True` or `False`) + can still be thought of as removing 0 dimensions and adding a single + dimension of length 1 for True or 0 for False. Hence, if `a` has shape + `(s1, ..., sn)`, then `a[True]` has shape `(1, s1, ..., sn)`, and + `a[False]` has shape `(0, s1, ..., sn)`. + + 4. If a tuple index has multiple boolean arrays, they are broadcast + together and iterated as a single array, similar to + :class:`IntegerArray`. If a boolean array index `idx` is mixed with an + integer array index in a tuple index, it is treated like + `np.nonzero(idx)`. + + See :doc:`../indexing-guide/multidimensional-indices/boolean-arrays` for a + more complete description of the semantics of boolean array indices. + + A list (or list of lists) of booleans may also be used in place of an + array. + + >>> from ndindex import BooleanArray + >>> import numpy as np + >>> idx = BooleanArray([[ True, True], + ... [ True, False], + ... [False, False], + ... [False, True], + ... [False, False]]) + >>> a = np.arange(10).reshape((5, 2)) + >>> a[idx.raw] + array([0, 1, 2, 7]) + + .. note:: + + `BooleanArray` does *not* represent an array, but rather an *array + index*. It does not have most methods that `numpy.ndarray` has, and + should not be used in array contexts. See the document on + :any:`type-confusion` for more details. + + """ + __slots__ = () + + @property + def dtype(self): + """ + The dtype of `BooleanArray` is `np.bool_`. + """ + from numpy import bool_ + return bool_ + + def __hash__(self): + # Match the hash for scalar booleans. Otherwise, hash(True) won't + # equal hash(ndindex(True)). + if self.shape == (): + return hash(self.array.any()) + return super().__hash__() + + @property + def count_nonzero(self): + """ + Returns the number of elements indexed by self. + + In general, if shapes match, when indexed by `self`, the first *n* + dimensions of an array are replaced with a single dimension of size + `count_nonzero`, where *n* is `self.shape`. + + This is the same as `np.count_nonzero(self.array)`. Note, to get the + shape of an array indexed by self, use :meth:`newshape`, not this + method. + + >>> from ndindex import BooleanArray + >>> BooleanArray([True, False, True]).count_nonzero + 2 + """ + from numpy import count_nonzero + return int(count_nonzero(self.array)) + + def _raise_indexerror(self, shape, axis=0): + if len(shape) < self.ndim + axis: + raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional, but {self.ndim + axis} were indexed") + + for i in range(axis, axis+self.ndim): + if self.shape[i-axis] != 0 and shape[i] != self.shape[i-axis]: + raise IndexError(f'boolean index did not match indexed array along axis {i}; size of axis is {shape[i]} but size of corresponding boolean axis is {self.shape[i-axis]}') + + def reduce(self, shape=None, *, axis=0, negative_int=False): + """ + Reduce a `BooleanArray` index on an array of shape `shape`. + + The result will either be `IndexError` if the index is invalid for the + given shape, or a `BooleanArray` index. Presently, no simplifications + are done for BooleanArray: if `reduce()` does not produce an + `IndexArray` the index returned will be the same as `self`. + + >>> from ndindex import BooleanArray + >>> idx = BooleanArray([True, False]) + >>> idx.reduce((3,)) + Traceback (most recent call last): + ... + IndexError: boolean index did not match indexed array along axis 0; size of axis is 3 but size of corresponding boolean axis is 2 + >>> idx.reduce((2,)) + BooleanArray([True, False]) + + See Also + ======== + + .NDIndex.reduce + .Tuple.reduce + .Slice.reduce + .ellipsis.reduce + .Newaxis.reduce + .Integer.reduce + .IntegerArray.reduce + + """ + if shape is None: + return self + + shape = asshape(shape) + + self._raise_indexerror(shape, axis) + return self + + def newshape(self, shape): + # The docstring for this method is on the NDIndex base class + shape = asshape(shape) + + self._raise_indexerror(shape) + return (self.count_nonzero,) + shape[self.ndim:] + + def isempty(self, shape=None): + if shape is not None: + return 0 in self.newshape(shape) + + return self.count_nonzero == 0 + + def as_subindex(self, index): + if self in [True, False]: + raise NotImplementedError("as_subindex is not supported for scalar boolean indices") + return Tuple(*self.array.nonzero()).as_subindex(index) + + def broadcast_arrays(self): + return Tuple(self).broadcast_arrays() + + def __eq__(self, other): + from numpy import bool_, ndarray + + if isinstance(other, (bool, bool_)): + return self.shape == () and self.array == other + if isinstance(other, BooleanArray): + b = other.array + elif isinstance(other, ndarray): + b = other + elif isinstance(other, list): + try: + b = BooleanArray(other) + except TypeError: + return False + else: + return False + a = self.array + return a.shape == b.shape and (a == b).all() + +def _is_boolean_scalar(idx): + """ + Determine if idx is a scalar boolean index. + + This is for internal usage only. Assumes idx is already an ndindex type. + This is more performant than `idx in [True, False]`. + """ + # TODO: Instead of this function, make BooleanScalar a separate class. + return isinstance(idx, BooleanArray) and idx.shape == () + +# Imports at the bottom to avoid circular import issues +from .tuple import Tuple diff --git a/venv/Lib/site-packages/ndindex/chunking.py b/venv/Lib/site-packages/ndindex/chunking.py new file mode 100644 index 0000000..634d0b1 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/chunking.py @@ -0,0 +1,461 @@ +from collections.abc import Sequence +from itertools import chain, product + +from .ndindex import ImmutableObject, operator_index, ndindex +from .tuple import Tuple +from .slice import Slice +from .integer import Integer +from .integerarray import IntegerArray +from .newaxis import Newaxis +from .shapetools import asshape +from .subindex_helpers import ceiling +from ._crt import prod + +class ChunkSize(ImmutableObject, Sequence): + """ + Represents a chunk size tuple. + + A chunk size is a tuple of length n where each element is either a + positive integer or `None`. It represents a chunking of an array with n + dimensions, where each corresponding dimension is chunked by the + corresponding chunk size, or not chunked for `None` (note, `None` chunks + are currently not yet implemented). + + For example, given a 3 dimensional chunk size of `(20, 20, None)` and an + array of shape `(40, 30, 10)`, the array would be split into four chunks, + corresponding to the indices `0:20,0:20,:`, `0:20,20:30,:`, + `20:40,0:20,:`, and `20:40,20:30,:`. Note that the size of a chunk may be + less than the total chunk size if the array shape is not a multiple of the + chunk size in a given dimension. + + `ChunkSize` behaves like a `tuple`. For example, `chunk_size[0]` gives the + first chunk dimension, and `len(chunk_size)` gives the number of + dimensions of a chunk. Also, the input to ChunkSize should be a tuple, + just as with the `tuple` constructor, even for single dimensional chunk + sizes. + + >>> from ndindex import ChunkSize + >>> ChunkSize((20, 30, 40)) + ChunkSize((20, 30, 40)) + >>> ChunkSize((2**12,)) + ChunkSize((4096,)) + + """ + def _typecheck(self, chunk_size): + # TODO: Also accept ChunkSize(1, 2, 3)? + if isinstance(chunk_size, Tuple): + raise TypeError("Tuple is not a valid input to ChunkSize. Use tuple instead.") + args = [] + for i in chunk_size: + if i is None: + raise NotImplementedError("None in chunks is not supported yet") + # args.append(i) + else: + try: + i = operator_index(i) + except TypeError: + raise TypeError("Chunks must be positive integers or None") + if i <= 0: + raise ValueError("Chunks must be positive integers") + args.append(i) + return (tuple(args),) + + def __hash__(self): + return hash(self.args[0]) + + # Methods for collections.abc.Sequence to make ChunkSize act like a tuple + def __getitem__(self, *args): + return self.args[0].__getitem__(*args) + + def __len__(self): + return len(self.args[0]) + + def num_chunks(self, shape): + """ + Give the number of chunks for the given `shape`. + + This is the same as `len(list(self.indices(shape)))`, but much faster. + `shape` must have the same number of dimensions as `self`. + + >>> from ndindex import ChunkSize + >>> chunk_size = ChunkSize((10, 10, 10)) + >>> shape = (10000, 10000, 10000) + >>> # len(list(chunk_size.indices(shape))) would be very slow, as + >>> # would have to iterate all 1 billion chunks + >>> chunk_size.num_chunks(shape) + 1000000000 + + See Also + ======== + + ChunkSize.indices + + """ + shape = asshape(shape) + d = [ceiling(i, c) for i, c in zip(shape, self)] + if 0 in d: + return 1 + return prod(d) + + def indices(self, shape): + """ + Yield a set of ndindex indices for the chunks on an array of shape `shape`. + + `shape` should have the same number of dimensions as `self`. If the + shape is not a multiple of the chunk size, some chunks will be + truncated, so that :any:`len(idx.args[i]) ` can + be used to get the size of an indexed axis. + + For example, if `a` has shape `(10, 19)` and is chunked into chunks + of shape `(5, 5)`: + + >>> from ndindex import ChunkSize + >>> chunk_size = ChunkSize((5, 5)) + >>> for idx in chunk_size.indices((10, 19)): + ... print(idx) + Tuple(slice(0, 5, 1), slice(0, 5, 1)) + Tuple(slice(0, 5, 1), slice(5, 10, 1)) + Tuple(slice(0, 5, 1), slice(10, 15, 1)) + Tuple(slice(0, 5, 1), slice(15, 19, 1)) + Tuple(slice(5, 10, 1), slice(0, 5, 1)) + Tuple(slice(5, 10, 1), slice(5, 10, 1)) + Tuple(slice(5, 10, 1), slice(10, 15, 1)) + Tuple(slice(5, 10, 1), slice(15, 19, 1)) + + See Also + ======== + + ChunkSize.num_chunks + + """ + shape = asshape(shape) + + if len(shape) != len(self): + raise ValueError("chunks dimensions must equal the array dimensions") + d = [ceiling(i, c) for i, c in zip(shape, self)] + if 0 in d: + yield Tuple(*[Slice(0, bool(i)*chunk_size, 1) for i, chunk_size in zip(d, self)]).expand(shape) + for p in product(*[range(i) for i in d]): + # p = (0, 0, 0), (0, 0, 1), ... + yield Tuple(*[Slice(chunk_size*i, min(chunk_size*(i + 1), n), 1) + for n, chunk_size, i in zip(shape, self, p)]) + + def as_subchunks(self, idx, shape, *, _force_slow=None): + """ + Split an index `idx` on an array of shape `shape` into subchunk indices. + + Yields indices `c`, where `c` is an index for the chunk that + should be sliced. Only those `c` for which `idx` includes at least one + element are yielded. + + That is to say, for each `c` index yielded, `a[c][idx.as_subindex(c)]` + will give those elements of `a[idx]` that are part of the `c` chunk, + and together they give all the elements of `a[idx]`. See also the + docstring of :meth:`~ndindex.NDIndex.as_subindex`. + + This method is roughly equivalent to + + .. code:: python + + def as_subchunks(self, idx, shape): + for c in self.indices(shape): + try: + index = idx.as_subindex(c) + except ValueError: + # as_subindex raises ValueError in some cases when the + # indices do not intersect (see the docstring of + # as_subindex()) + continue + + if not index.isempty(self): + # Yield those c for which idx.as_subindex(c) is nonempty + yield c + + except it is more efficient. + + >>> from ndindex import ChunkSize, Tuple + >>> idx = Tuple(slice(5, 15), 0) + >>> shape = (20, 20) + >>> chunk_size = ChunkSize((10, 10)) + >>> for c in chunk_size.as_subchunks(idx, shape): + ... print(c) + ... print(' ', idx.as_subindex(c)) + Tuple(slice(0, 10, 1), slice(0, 10, 1)) + Tuple(slice(5, 10, 1), 0) + Tuple(slice(10, 20, 1), slice(0, 10, 1)) + Tuple(slice(0, 5, 1), 0) + + See Also + ======== + + ndindex.NDIndex.as_subindex + num_subchunks + + """ + shape = asshape(shape) + if len(shape) != len(self): + raise ValueError("chunks dimensions must equal the array dimensions") + + if 0 in shape: + return + idx = ndindex(idx).expand(shape) + + # The slow naive fallback is kept here for testing purposes and to support + # indices that aren't supported in the fast way yet below. + def _fallback(): + if _force_slow is False: # pragma: no cover + raise RuntimeError("as_subchunks() attempted fallback with _force_slow=False") + for c in self.indices(shape): + try: + index = idx.as_subindex(c) + except ValueError: + continue + + if not index.isempty(self): + yield c + return + + if _force_slow: + yield from _fallback() + return + + if idx.isempty(shape): + return + + if self == (): + yield Tuple() + return + + iters = [] + idx_args = iter(idx.args) + self_ = iter(self) + while True: + try: + i = next(idx_args) + if isinstance(i, Newaxis) or i == True: + continue + n = next(self_) + except StopIteration: + break + if isinstance(i, Integer): + iters.append([i.raw//n]) + elif isinstance(i, IntegerArray): + # All arrays will be together after calling expand() (Tuple does not support arrays + # separated by non-integer indices). Collect them all together + # at once. + arrs = [] + while True: + try: + if isinstance(i, IntegerArray): + arrs.append(i.raw.flatten()//n) + else: + idx_args = chain([i], idx_args) + self_ = chain([n], self_) + break + i = next(idx_args) + n = next(self_) + except StopIteration: + break + + import numpy as np + a = np.unique(np.stack(arrs), axis=-1) + def _array_iter(a): + for i in range(a.shape[-1]): + yield tuple(a[..., i].flat) + iters.append(_array_iter(a)) + elif isinstance(i, Slice) and i.step > 0: + def _slice_iter(s, n): + a, N, m = s.args + if m > n: + yield from ((a + k*m)//n for k in range(ceiling(N-a, m))) + else: + yield from range(a//n, ceiling(N, n)) + iters.append(_slice_iter(i, n)) + else: + # Fallback to the naive algorithm. This should currently only + # happen in cases where the naive as_subindex algorithm will + # raise NotImplementedError. + yield from _fallback() + return # pragma: no cover + + def _flatten(l): + for element in l: + if isinstance(element, tuple): + yield from element + else: + yield element + + def _indices(iters): + for _p in product(*iters): + p = _flatten(_p) + # p = (0, 0, 0), (0, 0, 1), ... + yield Tuple(*[Slice(chunk_size*i, min(chunk_size*(i + 1), n), 1) + for n, chunk_size, i in zip(shape, self, p)]) + + for c in _indices(iters): + # Empty indices should be impossible by the construction of the + # iterators above. + yield c + + def num_subchunks(self, idx, shape): + """ + Give the number of chunks indexed by `idx` on an array of shape + `shape`. + + This is equivalent to `len(list(self.as_subindex(idx, shape)))`, but + more efficient. + + >>> from ndindex import ChunkSize, Tuple + >>> idx = Tuple(slice(5, 15), 0) + >>> shape = (20, 20) + >>> chunk_size = ChunkSize((10, 10)) + >>> chunk_size.num_subchunks(idx, shape) + 2 + + See Also + ======== + + ndindex.NDIndex.as_subindex + num_subchunks + + """ + shape = asshape(shape) + if len(shape) != len(self): + raise ValueError("chunks dimensions must equal the array dimensions") + + idx = ndindex(idx).expand(shape) + + if 0 in shape: + return 0 + + if idx.isempty(shape): + return 0 + + if self == (): + return 1 + + idx_args = iter(idx.args) + self_ = iter(self) + res = 1 + while True: + try: + i = next(idx_args) + if isinstance(i, Newaxis) or i == True: + continue + n = next(self_) + except StopIteration: + break + if isinstance(i, Integer): + continue + elif isinstance(i, IntegerArray): + arrs = [] + # see as_subchunks + while True: + try: + if isinstance(i, IntegerArray): + arrs.append(i.raw.flatten()//n) + else: + idx_args = chain([i], idx_args) + self_ = chain([n], self_) + break + i = next(idx_args) + n = next(self_) + except StopIteration: + break + + import numpy as np + res *= np.unique(np.stack(arrs), axis=-1).shape[-1] + elif isinstance(i, Slice): + if i.step < 0: + raise NotImplementedError("num_subchunks() is not implemented for slices with negative step") + a, N, m = i.args + if m > n: + res *= ceiling(N-a, m) + else: + res *= ceiling(N, n) - a//n + else: + raise NotImplementedError(f"num_subchunks() is not implemented for {type(i).__name__}") + + return res + + def containing_block(self, idx, shape): + """ + Compute the index for the smallest contiguous block of chunks that + contains `idx` on an array of shape `shape`. + + A block is a subset of an array that is contiguous in all dimensions + and is aligned along the chunk size. A block index is always of the + form `(Slice(k1, m1), Slice(k2, m2), …, Slice(kn, mn))` where `n` is + the number of dimensions in the chunk size, and the `ki` and `mi` are + multiples of the corresponding chunk dimension (the `mi` may be + truncated to the shape). + + For example, given a chunk size of `(10, 15)`, an example block might + be `(Slice(0, 20), Slice(30, 45))`. Such a block would be the smallest + block that contains the index `(Slice(0, 12), 40)`, for example. + + >>> from ndindex import ChunkSize + >>> chunk_size = ChunkSize((10, 15)) + >>> idx = (slice(0, 12), 40) + >>> shape = (100, 100) + >>> block = chunk_size.containing_block(idx, shape) + >>> block + Tuple(slice(0, 20, 1), slice(30, 45, 1)) + + The method :meth:`as_subchunks` can be used on the block to determine + which chunks are contained in it, and :meth:`num_subchunks` to + determine how many: + + >>> chunk_size.num_subchunks(block, shape) + 2 + >>> for c in chunk_size.as_subchunks(block, shape): + ... print(c) + Tuple(slice(0, 10, 1), slice(30, 45, 1)) + Tuple(slice(10, 20, 1), slice(30, 45, 1)) + + In this example, `chunk_size.as_subchunk(block, shape)` and + `chunk_size.as_subchunks(idx, shape)` are the same, but in general, a + block may overlap with more chunks than the original index because the + block is contiguous. + + """ + shape = asshape(shape) + if len(shape) != len(self): + raise ValueError("chunks dimensions must equal the array dimensions") + idx = ndindex(idx).expand(shape) + + idx_args = iter(idx.args) + self_ = iter(self) + res = [] + + if False in idx.args: + return Tuple(*[slice(0, 0, 1) for i in range(len(shape))]) + + while True: + try: + i = next(idx_args) + if isinstance(i, Newaxis) or i == True: + continue + n = next(self_) + except StopIteration: + break + if isinstance(i, Integer): + chunk_n = i.raw//n + res.append(Slice(chunk_n*n, (chunk_n + 1)*n)) + elif isinstance(i, IntegerArray): + import numpy as np + + if i.size == 0: + res.append(Slice(0, 0)) + continue + m = np.min(i.array) + M = np.max(i.array) + res.append(Slice(m//n*n, (M//n + 1)*n)) + elif isinstance(i, Slice): + if i.step < 0: + raise NotImplementedError("containing_block() is not implemented for slices with negative step") + res.append(Slice(i.start - (i.start % n), ceiling(i.stop, n)*n)) + else: + raise NotImplementedError(f"containing_block() is not implemented for {type(i).__name__}") + + return Tuple(*res).expand(shape) diff --git a/venv/Lib/site-packages/ndindex/ellipsis.py b/venv/Lib/site-packages/ndindex/ellipsis.py new file mode 100644 index 0000000..5951ac9 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/ellipsis.py @@ -0,0 +1,105 @@ +from .ndindex import NDIndex +from .tuple import Tuple +from .shapetools import asshape + +class ellipsis(NDIndex): + """ + Represents an ellipsis index, i.e., `...` (or `Ellipsis`). + + Ellipsis indices by themselves return the full array. Inside of a tuple + index, an ellipsis skips 0 or more axes of the array so that everything + after the ellipsis indexes the last axes of the array. A tuple index can + have at most one ellipsis. + + See :doc:`../indexing-guide/multidimensional-indices/ellipses` for more + details on the semantics of ellipsis indices. + + For example `a[(0, ..., -2)]` would index the first element on the first + axis, the second-to-last element in the last axis, and include all the + axes in between. + + >>> from numpy import arange + >>> a = arange(2*3*4).reshape((2, 3, 4)) + >>> a + array([[[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]], + [[12, 13, 14, 15], + [16, 17, 18, 19], + [20, 21, 22, 23]]]) + >>> a[0, ..., -2] + array([ 2, 6, 10]) + + An ellipsis can go at the beginning of end of a tuple index, and is + allowed to match 0 axes. + + .. note:: + + Unlike the standard Python `Ellipsis`, `ellipsis` is the type, not the + object (the name is lowercase to avoid conflicting with the built-in). + Use `ellipsis()` or `ndindex(...)` to create the object. In most + ndindex contexts, `...` can be used instead of `ellipsis()`, for + instance, when creating a `Tuple` object. Also unlike `Ellipsis`, + `ellipsis()` is not singletonized, so you should not use `is` to + compare it. See the document on :any:`type confusion + ` for more details. + + """ + __slots__ = () + + def _typecheck(self): + return () + + def reduce(self, shape=None, *, negative_int=False): + """ + Reduce an ellipsis index + + Since an ellipsis by itself always returns the full array unchanged, + `ellipsis().reduce()` returns `Tuple()` as a canonical form (the index + `()` also always returns an array unchanged). + + >>> from ndindex import ellipsis + >>> ellipsis().reduce() + Tuple() + + See Also + ======== + + .NDIndex.reduce + .Tuple.reduce + .Slice.reduce + .Newaxis.reduce + .Integer.reduce + .IntegerArray.reduce + .BooleanArray.reduce + + """ + if shape is not None: + shape = asshape(shape) + return Tuple() + + @property + def raw(self): + return ... + + def isvalid(self, shape): + shape = asshape(shape) + return True + + def newshape(self, shape): + # The docstring for this method is on the NDIndex base class + shape = asshape(shape) + + return shape + + def as_subindex(self, index): + return Tuple().as_subindex(index) + + def isempty(self, shape=None): + return Tuple().isempty(shape=shape) + + def __eq__(self, other): + return other is ... or isinstance(other, ellipsis) + + def __hash__(self): + return super().__hash__() diff --git a/venv/Lib/site-packages/ndindex/integer.py b/venv/Lib/site-packages/ndindex/integer.py new file mode 100644 index 0000000..f71ca51 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/integer.py @@ -0,0 +1,190 @@ +from .ndindex import NDIndex, operator_index +from .shapetools import AxisError, asshape + +class Integer(NDIndex): + """ + Represents an integer index on an axis of an nd-array. + + Any object that implements `__index__` can be used as an integer index. + + >>> from ndindex import Integer + >>> idx = Integer(1) + >>> [0, 1, 2][idx.raw] + 1 + >>> idx = Integer(-3) + >>> [0, 1, 2][idx.raw] + 0 + + Note that `Integer` itself implements `__index__`, so it can be used as an + index directly. However, it is still recommended to use `raw` for + consistency, as this only works for `Integer`. + + See :doc:`../indexing-guide/integer-indices` for a description of the + semantics of integers as indices. + + .. note:: + + `Integer` does *not* represent an integer, but rather an + *integer index*. It does not have most methods that `int` has, and + should not be used in non-indexing contexts. See the document on + :any:`type-confusion` for more details. + + """ + __slots__ = () + + def _typecheck(self, idx): + idx = operator_index(idx) + return (idx,) + + def __index__(self): + return self.raw + + @property + def raw(self): + return self.args[0] + + def __len__(self): + """ + Returns the number of elements indexed by `self` + + Since `self` is an integer index, this always returns 1. Note that + integer indices always remove an axis. + """ + return 1 + + def isvalid(self, shape, _axis=0): + # The docstring for this method is on the NDIndex base class + shape = asshape(shape) + if not shape: + return False + size = shape[_axis] + return -size <= self.raw < size + + def _raise_indexerror(self, shape, axis=0): + if not self.isvalid(shape, axis): + size = shape[axis] + raise IndexError(f"index {self.raw} is out of bounds for axis {axis} with size {size}") + + def reduce(self, shape=None, *, axis=0, negative_int=False, axiserror=False): + """ + Reduce an Integer index on an array of shape `shape`. + + The result will either be `IndexError` if the index is invalid for the + given shape, or an Integer index where the value is nonnegative. + + If `negative_int` is `True` and a `shape` is provided, then the result + will be an Integer index where the value is negative. + + >>> from ndindex import Integer + >>> idx = Integer(-5) + >>> idx.reduce((3,)) + Traceback (most recent call last): + ... + IndexError: index -5 is out of bounds for axis 0 with size 3 + >>> idx.reduce((9,)) + Integer(4) + + See Also + ======== + + .NDIndex.reduce + .Tuple.reduce + .Slice.reduce + .ellipsis.reduce + .Newaxis.reduce + .IntegerArray.reduce + .BooleanArray.reduce + + """ + if shape is None: + return self + + if axiserror: + if not isinstance(shape, int): # pragma: no cover + raise TypeError("axiserror=True requires shape to be an integer") + if not self.isvalid(shape): + raise AxisError(self.raw, shape) + + shape = asshape(shape, axis=axis) + + self._raise_indexerror(shape, axis) + + if self.raw < 0 and not negative_int: + size = shape[axis] + return self.__class__(size + self.raw) + elif self.raw >= 0 and negative_int: + size = shape[axis] + return self.__class__(self.raw - size) + + return self + + def newshape(self, shape): + # The docstring for this method is on the NDIndex base class + shape = asshape(shape) + + self._raise_indexerror(shape) + return shape[1:] + + def as_subindex(self, index): + index = ndindex(index) + + if isinstance(index, Tuple): + return Tuple(self).as_subindex(index) + + if not isinstance(index, Slice): + raise NotImplementedError("Integer.as_subindex is only implemented for slices") + + if self == -1: + s = Slice(self.args[0], None).as_subindex(index) + else: + s = Slice(self.args[0], self.args[0] + 1).as_subindex(index) + if s == Slice(0, 0, 1): + # The intersection is empty. There is no valid index we can return + # here. We want an index that produces an empty array, but the + # shape should be one less, to match a[self]. Since a[index] has + # as many dimensions as a, there is no way to index a[index] so + # that it gives one fewer dimension but is also empty. The best we + # could do is to return a boolean array index array([False]), + # which would replace the first dimension with a length 0 + # dimension. But + # + # 1. this isn't implemented yet, + # 2. there are complications if this happens in multiple + # dimensions (it might not be possible to represent, I'm not + # sure), and + # 3. Slice.as_subindex(Integer) also raises this exception in the + # case of an empty intersection (see the comment in that code). + raise ValueError(f"{self} and {index} do not intersect") + assert len(s) == 1 + return Integer(s.args[0]) + + def isempty(self, shape=None): + if shape is not None: + return 0 in self.newshape(shape) + + return False + + def selected_indices(self, shape, axis=None): + if axis is None: + yield from self.expand(shape).selected_indices(shape) + else: + shape = asshape(shape, axis=axis) + yield self + + def __eq__(self, other): + if isinstance(other, Integer): + return self.args == other.args + try: + other = operator_index(other) + except TypeError: + return False + return self.args[0] == other + + def __hash__(self): + return super().__hash__() + + +# Imports at the bottom to avoid circular import issues +from .ndindex import ndindex +from .slice import Slice +from .tuple import Tuple diff --git a/venv/Lib/site-packages/ndindex/integerarray.py b/venv/Lib/site-packages/ndindex/integerarray.py new file mode 100644 index 0000000..8fc3c44 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/integerarray.py @@ -0,0 +1,211 @@ +from .array import ArrayIndex +from .shapetools import asshape +from .subindex_helpers import subindex_slice + +class IntegerArray(ArrayIndex): + """ + Represents an integer array index. + + If `idx` is an n-dimensional integer array with shape `s = (s1, ..., sn)` + and `a` is any array, `a[idx]` replaces the first dimension of `a` with + dimensions of size `s1, ..., sn`, where each entry is indexed according to + the entry in `idx` as an integer index. + + Integer arrays can also appear as part of tuple indices. In that case, + they replace the axis being indexed. If more than one integer array + appears inside of a tuple index, they are broadcast together and iterated + as one. Furthermore, if an integer array appears in a tuple index, all + integer indices in the tuple are treated as scalar integer arrays and are + also broadcast. In general, an :any:`Integer` index semantically behaves + the same as a scalar (`shape=()`) `IntegerArray`. + + A list (or list of lists) of integers may also be used in place of an + array. + + See :doc:`../indexing-guide/multidimensional-indices/integer-arrays` for a + description of the semantics of integer array indices. + + >>> from ndindex import IntegerArray + >>> import numpy as np + >>> idx = IntegerArray([[0, 1], [1, 2]]) + >>> a = np.arange(10) + >>> a[idx.raw] + array([[0, 1], + [1, 2]]) + + .. note:: + + `IntegerArray` does *not* represent an array, but rather an *array + index*. It does not have most methods that `numpy.ndarray` has, and + should not be used in array contexts. See the document on + :any:`type-confusion` for more details. + + """ + __slots__ = () + + @property + def dtype(self): + """ + The dtype of `IntegerArray` is `np.intp`, which is typically either + `np.int32` or `np.int64` depending on the platform. + """ + from numpy import intp + return intp + + def _raise_indexerror(self, shape, axis=0): + size = shape[axis] + out_of_bounds = (self.array >= size) | ((-size > self.array) & (self.array < 0)) + if out_of_bounds.any(): + raise IndexError(f"index {self.array[out_of_bounds].flat[0]} is out of bounds for axis {axis} with size {size}") + + def reduce(self, shape=None, *, axis=0, negative_int=False): + """ + Reduce an `IntegerArray` index on an array of shape `shape`. + + The result will either be `IndexError` if the index is invalid for the + given shape, an `IntegerArray` index where the values are all + nonnegative, or, if `self` is a scalar array index (`self.shape == + ()`), an `Integer` whose value is nonnegative. + + If `negative_int` is `True` and a `shape` is provided, the result will + be an `IntegerArray` with negative entries instead of positive + entries. + + >>> from ndindex import IntegerArray + >>> idx = IntegerArray([-5, 2]) + >>> idx.reduce((3,)) + Traceback (most recent call last): + ... + IndexError: index -5 is out of bounds for axis 0 with size 3 + >>> idx.reduce((9,)) + IntegerArray([4, 2]) + >>> idx.reduce((9,), negative_int=True) + IntegerArray([-5, -7]) + + See Also + ======== + + .NDIndex.reduce + .Tuple.reduce + .Slice.reduce + .ellipsis.reduce + .Newaxis.reduce + .Integer.reduce + .BooleanArray.reduce + + """ + if self.shape == (): + return Integer(self.array).reduce(shape, axis=axis, negative_int=negative_int) + + if shape is None: + return self + + shape = asshape(shape, axis=axis) + + self._raise_indexerror(shape, axis) + + size = shape[axis] + new_array = self.array.copy() + if negative_int: + new_array[new_array >= 0] -= size + else: + new_array[new_array < 0] += size + return IntegerArray(new_array) + + def newshape(self, shape): + # The docstring for this method is on the NDIndex base class + shape = asshape(shape) + + self._raise_indexerror(shape) + return self.shape + shape[1:] + + def isempty(self, shape=None): + if shape is not None: + return 0 in self.newshape(shape) + + return 0 in self.shape + + def as_subindex(self, index): + index = ndindex(index) + + if isinstance(index, Tuple): + return Tuple(self).as_subindex(index) + + if isinstance(index, Slice): + index = index.reduce() + if (self.array < 0).any(): + raise NotImplementedError("IntegerArray.as_subindex() is only implemented for arrays with all nonnegative entries. Try calling reduce() with a shape first.") + if index.step < 0: + raise NotImplementedError("IntegerArray.as_subindex(Slice) is only implemented for slices with positive steps") + + # After reducing, start is not None when step > 0 + if index.stop is None or index.start < 0 or index.stop < 0: + raise NotImplementedError("IntegerArray.as_subindex(Slice) is only implemented for slices with nonnegative start and stop. Try calling reduce() with a shape first.") + + # Equivalent to + + # res = [] + # for i in self.array.flat: + # s = Slice(i, i+1).as_subindex(index) + # if s == Slice(0, 0, 1): + # continue + # res.append(s.start) + # return IntegerArray(res) + + # See also Integer.as_subindex(). + + s = self.array + start, stop, step = subindex_slice(s, s+1, 1, + index.start, index.stop, index.step) + if (stop <= 0).all(): + raise ValueError("Indices do not intersect") + + if start.shape == (): + if start < stop: + return IntegerArray(start) + else: + raise ValueError("Indices do not intersect") + + mask = start < stop + if not mask.all(): + start = start[start < stop] + + if 0 in start.shape: + raise ValueError("Indices do not intersect") + return IntegerArray(start) + + raise NotImplementedError("IntegerArray.as_subindex is only implemented for slices") + + def selected_indices(self, shape, axis=None): + if axis is None: + yield from self.expand(shape).selected_indices(shape) + else: + shape = asshape(shape, axis=axis) + for i in self.array.flat: + yield Integer(i) + + def __eq__(self, other): + from numpy import ndarray + + if isinstance(other, IntegerArray): + b = other.array + elif isinstance(other, ndarray): + b = other + elif isinstance(other, list): + try: + b = IntegerArray(other) + except TypeError: + return False + else: + return False + a = self.array + return a.shape == b.shape and (a == b).all() + + def __hash__(self): + return super().__hash__() + +# Imports at the bottom to avoid circular import issues +from .ndindex import ndindex +from .slice import Slice +from .tuple import Tuple +from .integer import Integer diff --git a/venv/Lib/site-packages/ndindex/ndindex.py b/venv/Lib/site-packages/ndindex/ndindex.py new file mode 100644 index 0000000..6aa46e8 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/ndindex.py @@ -0,0 +1,723 @@ +import sys +import inspect +import operator + +newaxis = None + +class NDIndexConstructor: + """ + Convert an object into an ndindex type. + + Invalid indices will raise `IndexError`, `TypeError`, or `ValueError` + (generally, the same error NumPy would raise if the index were used on an + array). + + Indices are created by calling the `ndindex` with raw index objects: + + >>> from ndindex import ndindex + >>> ndindex(slice(0, 10)) + Slice(0, 10, None) + >>> ndindex((slice(0, 10), 0)) + Tuple(slice(0, 10, None), 0) + + Indices can also be created by calling `ndindex` with getitem syntax. + + >>> ndindex[1] + Integer(1) + >>> ndindex[0:10] + Slice(0, 10, None) + >>> ndindex(0:10) + Traceback (most recent call last): + ... + ndindex(0:10) + ^ + SyntaxError: invalid syntax + + The `ndindex[idx]` form should generally be preferred when creating an + index from a tuple or slice literal, since `ndindex(a:b)` is not + syntactically valid and must be typed as `ndindex(slice(a, b))`. + Additionally, the `ndindex[idx]` syntax does not require parentheses when + creating a tuple index: + + >>> ndindex[0, 1] + Tuple(0, 1) + >>> ndindex(0, 1) # doctest:+IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError: NDIndexConstructor.__call__() takes 2 positional arguments but 3 were given + >>> ndindex((0, 1)) + Tuple(0, 1) + + """ + def __getitem__(self, obj): + if isinstance(obj, NDIndexCommon): + return obj + + if 'numpy' in sys.modules: + from numpy import ndarray, bool_ + else: # pragma: no cover + bool_ = bool + ndarray = () + + if isinstance(obj, (bool, bool_)): + from . import BooleanArray + return BooleanArray(obj) + + if isinstance(obj, (list, ndarray)): + from . import IntegerArray, BooleanArray + + try: + return IntegerArray(obj) + except TypeError: + pass + try: + return BooleanArray(obj) + except TypeError: + pass + + # Match the NumPy exceptions + if isinstance(obj, ndarray): + raise IndexError("arrays used as indices must be of integer (or boolean) type") + else: + raise IndexError("only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices") + + try: + from . import Integer + # If operator.index() works, use that + return Integer(obj) + except TypeError: + pass + + if isinstance(obj, slice): + from . import Slice + return Slice(obj) + + if isinstance(obj, tuple): + from . import Tuple + return Tuple(*obj) + + from . import ellipsis + + if obj == ellipsis: + raise IndexError("Got ellipsis class. Did you mean to use the instance, ellipsis()?") + if obj is Ellipsis: + return ellipsis() + + if obj == newaxis: + from . import Newaxis + return Newaxis() + + raise IndexError("only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices") + + def __call__(self, obj): + return self[obj] + +ndindex = NDIndexConstructor() + +class classproperty(object): + def __init__(self, f): + self.f = f + def __get__(self, obj, owner): + return self.f(owner) + +class ImmutableObject: + """ + Base class for immutable objects. + + Subclasses of this class are immutable objects. They all have the `.args` + attribute, which gives the full necessary data to recreate the class, via, + + .. code:: python + + type(obj)(*obj.args) == obj + + Note: subclasses that specifically represent indices should subclass + :class:`NDIndex` instead. + + All classes that subclass `ImmutableObject` should define the `_typecheck` + method. `_typecheck(self, *args)` should do type checking and basic type + canonicalization, and either return a tuple of the new arguments for the + class or raise an exception. Type checking means it should raise + exceptions for input types that are never semantically meaningful for + numpy arrays, for example, floating point indices, using the same + exceptions as numpy where possible. Basic type canonicalization means, for + instance, converting integers into `int` using `operator.index()`. All + other canonicalization should be done in the `reduce()` method. The + `ImmutableObject` base constructor will automatically set `.args` to the + arguments returned by this method. Classes should always be able to + recreate themselves with `.args`, i.e., `type(obj)(*obj.args) == obj` + should always hold. + + See Also + ======== + + NDIndex + + """ + __slots__ = ('args',) + + def __init__(self, *args, **kwargs): + """ + This method should be called by subclasses (via super()) after type-checking + """ + args = self._typecheck(*args, **kwargs) + self.args = args + """ + `idx.args` contains the arguments needed to create `idx`. + + For an ndindex object `idx`, `idx.args` is always a tuple such that + + .. code:: python + + type(idx)(*idx.args) == idx + + For :class:`Tuple` indices, the elements of `.args` are themselves + ndindex types. For other types, `.args` contains raw Python types. + Note that `.args` contains NumPy arrays for :class:`~.IntegerArray` + and :class:`~.BooleanArray` types, so one should always do equality + testing or hashing on the ndindex type itself, not its `.args`. + + """ + @classproperty + def __signature__(self): + """ + Allow Python 3's inspect.signature to give a useful signature for + NDIndex subclasses. + """ + sig = inspect.signature(self._typecheck) + d = dict(sig.parameters) + d.pop('self') + return inspect.Signature(d.values()) + + def __repr__(self): + return f"{self.__class__.__name__}({', '.join(map(repr, self.args))})" + + def __str__(self): + return f"{self.__class__.__name__}({', '.join(map(str, self.args))})" + + def __eq__(self, other): + if not isinstance(other, ImmutableObject): + try: + other = self.__class__(other) + except TypeError: + return False + + return self.args == other.args + + def __hash__(self): # pragma: no cover + # Note: subclasses where .args is not hashable should redefine + # __hash__ + return hash(self.args) + +class NDIndexCommon: + """ + Definitions of common methods for :class:`NDIndex` subclasses. + + Most of these are only here so that they can define a common docstring. + """ + __slots__ = () + + def reduce(self, shape=None, *, negative_int=False): + """ + Simplify an index given that it will be applied to an array of a given shape. + + If `shape` is None (the default), the index will be canonicalized as + much as possible while still staying equivalent for all array shapes + that it does not raise IndexError for. + + Either returns a new index type, which is equivalent on arrays of + shape `shape`, or raises IndexError if the index would give an index + error (for instance, out of bounds integer index or too many indices + for array). + + >>> from ndindex import Slice, Integer + >>> Slice(0, 10).reduce((5,)) + Slice(0, 5, 1) + >>> Integer(10).reduce((5,)) + Traceback (most recent call last): + ... + IndexError: index 10 is out of bounds for axis 0 with size 5 + + For single axis indices such as Slice and Tuple, `reduce` takes an + optional `axis` argument to specify the axis, defaulting to 0. + + See Also + ======== + + .Integer.reduce + .Tuple.reduce + .Slice.reduce + .ellipsis.reduce + .Newaxis.reduce + .IntegerArray.reduce + .BooleanArray.reduce + + """ + # XXX: Should the default be raise NotImplementedError or return self? + raise NotImplementedError + + def isvalid(self, shape): + """ + Check whether a given index is valid on an array of a given shape. + + Returns `True` if an array of shape `shape` can be indexed by `self` + and `False` if it would raise `IndexError`. + + >>> from ndindex import ndindex + >>> ndindex(3).isvalid((4,)) + True + >>> ndindex(3).isvalid((2,)) + False + + Note that some indices can never be valid and will raise a + `IndexError` or `TypeError` if you attempt to construct them. + + >>> ndindex((..., 0, ...)) + Traceback (most recent call last): + ... + IndexError: an index can only have a single ellipsis ('...') + >>> ndindex(slice(True)) + Traceback (most recent call last): + ... + TypeError: 'bool' object cannot be interpreted as an integer + + See Also + ======== + .NDIndex.newshape + + """ + # Every class except for Tuple has a more direct efficient + # implementation. The logic for checking if a Tuple index is valid is + # basically the same as the logic in reduce/expand, so there's no + # point in duplicating it. + try: + self.reduce(shape) + return True + except IndexError: + return False + + def expand(self, shape): + r""" + Expand a Tuple index on an array of shape `shape` + + An expanded index is as explicit as possible. Unlike :meth:`reduce + `, which tries to simplify an index + and remove redundancies, `expand()` typically makes an index larger. + + If `self` is invalid for the given shape, an `IndexError` is raised. + Otherwise, the returned index satisfies the following: + + - It is always a :class:`Tuple`. + + - All the elements of the :class:`Tuple` are recursively :any:`reduced + `. + + - The length of the `.args` is equal to the length of the shape plus + the number of :class:`~.Newaxis` indices in `self` plus 1 if there + is a scalar :class:`~.BooleanArray` (`True` or `False`). + + - The resulting :class:`Tuple` has no :class:`ellipses + `. If there are axes that would be matched by an + ellipsis or an implicit ellipsis at the end of the tuple, `Slice(0, + n, 1)` indices are inserted, where `n` is the corresponding axis of + the `shape`. + + - Any array indices in `self` are broadcast together. If `self` + contains array indices (:class:`~.IntegerArray` or + :class:`~.BooleanArray`), then any :class:`~.Integer` indices are + converted into :class:`~.IntegerArray` indices of shape `()` and + broadcast. Note that broadcasting is done in a memory efficient way + so that even if the broadcasted shape is large it will not take up + more memory than the original. + + - Scalar :class:`~.BooleanArray` arguments (`True` or `False`) are + combined into a single term (the same as with :meth:`.Tuple.reduce`). + + - Non-scalar :class:`~.BooleanArray`\ s are all converted into + equivalent :class:`~.IntegerArray`\ s via `nonzero()` and + broadcasted. + + >>> from ndindex import Tuple, Slice + >>> Slice(None).expand((2, 3)) + Tuple(slice(0, 2, 1), slice(0, 3, 1)) + + >>> idx = Tuple(slice(0, 10), ..., None, -3) + >>> idx.expand((5, 3)) + Tuple(slice(0, 5, 1), None, 0) + >>> idx.expand((1, 2, 3)) + Tuple(slice(0, 1, 1), slice(0, 2, 1), None, 0) + >>> idx.expand((5,)) + Traceback (most recent call last): + ... + IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed + >>> idx.expand((5, 2)) + Traceback (most recent call last): + ... + IndexError: index -3 is out of bounds for axis 1 with size 2 + + >>> idx = Tuple(..., [0, 1], -1) + >>> idx.expand((1, 2, 3)) + Tuple(slice(0, 1, 1), [0, 1], [2, 2]) + + See Also + ======== + + .Tuple.reduce + broadcast_arrays + + """ + from .tuple import Tuple + + return Tuple(self).expand(shape) + + def newshape(self, shape): + """ + Returns the shape of `a[idx.raw]`, assuming `a` has shape `shape`. + + `shape` should be a tuple of ints, or an int, which is equivalent to a + 1-D shape. + + Raises `IndexError` if `self` would be invalid for an array of shape + `shape`. + + >>> from ndindex import Slice, Integer, Tuple + >>> shape = (6, 7, 8) + >>> Integer(1).newshape(shape) + (7, 8) + >>> Integer(10).newshape(shape) + Traceback (most recent call last): + ... + IndexError: index 10 is out of bounds for axis 0 with size 6 + >>> Slice(2, 5).newshape(shape) + (3, 7, 8) + >>> Tuple(0, ..., Slice(1, 3)).newshape(shape) + (7, 2) + + See Also + ======== + .NDIndex.isvalid + + """ + raise NotImplementedError + + def as_subindex(self, index): + """ + `i.as_subindex(j)` produces an index `k` such that `a[j][k]` gives all of + the elements of `a[j]` that are also in `a[i]`. + + If `a[j]` is a subset of `a[i]`, then `a[j][k] == a[i]`. Otherwise, + `a[j][k] == a[i & j]`, where `i & j` is the intersection of `i` and + `j`, that is, the elements of `a` that are indexed by both `i` and + `j`. + + For example, in the below diagram, `i` and `j` index a subset of the + array `a`. `k = i.as_subindex(j)` is an index on `a[j]` that gives the + subset of `a[j]` also included in `a[i]`:: + + +------------ self ------------+ + | | + ------------------- a ----------------------- + | | + +------------- index -------------+ + | | + +- self.as_subindex(index) -+ + + `i.as_subindex(j)` is currently only implemented when `j` is a slice + with positive steps and nonnegative start and stop, or a Tuple of the + same. To use it with slices with negative start or stop, call + :meth:`reduce` with a shape first. + + `as_subindex` can be seen as the left-inverse of composition, that is, + if `a[i] = a[j][k]`, then `k = i.as_subindex(j)`, so that `k "=" + (j^-1)[i]` (this only works as a true inverse if `j` is a subset of + `i`). + + Note that due to symmetry, `a[j][i.as_subindex(j)]` and + `a[i][j.as_subindex(i)]` will give the same subarrays of `a`, which + will be the array of elements indexed by both `a[i]` and `a[j]`. + + `i.as_subindex(j)` may raise `ValueError` in the case that the indices + `i` and `j` do not intersect at all. + + Examples + ======== + + An example usage of `as_subindex` is to split an index up into + subindices of chunks of an array. For example, say a 1-D array + `a` is chunked up into chunks of size `N`, so that `a[0:N]`, + `a[N:2*N]`, `[2*N:3*N]`, etc. are stored separately. Then an index + `a[i]` can be reindexed onto the chunks via `i.as_subindex(Slice(0, + N))`, `i.as_subindex(Slice(N, 2*N))`, etc. + + >>> from ndindex import Slice + >>> i = Slice(5, 15) + >>> j1 = Slice(0, 10) + >>> j2 = Slice(10, 20) + >>> a = list(range(20)) + >>> a[i.raw] + [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + >>> a[j1.raw] + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + >>> a[j2.raw] + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + + >>> k1 = i.as_subindex(j1) + >>> k1 + Slice(5, 10, 1) + >>> k2 = i.as_subindex(j2) + >>> k2 + Slice(0, 5, 1) + >>> a[j1.raw][k1.raw] + [5, 6, 7, 8, 9] + >>> a[j2.raw][k2.raw] + [10, 11, 12, 13, 14] + + See Also + ======== + + ndindex.ChunkSize.as_subchunks: + a high-level iterator that efficiently gives only those chunks + that intersect with a given index + ndindex.ChunkSize.num_subchunks + + """ + index = ndindex(index) # pragma: no cover + raise NotImplementedError(f"{type(self).__name__}.as_subindex({type(index).__name__}) isn't implemented yet") + + def isempty(self, shape=None): + """ + Returns whether self always indexes an empty array + + An empty array is an array whose shape contains at least one 0. Note + that scalars (arrays with shape `()`) are not considered empty. + + `shape` can be `None` (the default), or an array shape. If it is + `None`, isempty() will return `True` when `self` is always empty for + any array shape. However, if it gives `False`, it could still give an + empty array for some array shapes, but not all. If you know the shape + of the array that will be indexed, use `idx.isempty(shape)` and the + result will be correct for arrays of shape `shape`. If `shape` is + given and `self` would raise an `IndexError` on an array of shape + `shape`, `isempty()` also raises `IndexError`. + + >>> from ndindex import Tuple, Slice + >>> Tuple(0, slice(0, 1)).isempty() + False + >>> Tuple(0, slice(0, 0)).isempty() + True + >>> Slice(5, 10).isempty() + False + >>> Slice(5, 10).isempty(4) + True + + See Also + ======== + ndindex.Slice.__len__ + + """ + raise NotImplementedError + + def broadcast_arrays(self): + """ + Broadcast all the array indices in self to a common shape and convert + boolean array indices into integer array indices. + + The resulting index is equivalent in all contexts where the original + index is allowed. However, it is possible for the original index to + give an IndexError but for the new index to not, since integer array + indices have less stringent shape requirements than boolean array + indices. There are also some instances for empty indices + (:any:`isempty` is True) where bounds would be checked before + broadcasting but not after. + + Any :class:`~.BooleanArray` indices are converted to + :class:`~.IntegerArray` indices. Furthermore, if there are + :class:`~.BooleanArray` or :class:`~.IntegerArray` indices, then any + :class:`~.Integer` indices are also converted into scalar + :class:`~.IntegerArray` indices and broadcast. Furthermore, if there are + multiple boolean scalar indices (`True` or `False`), they are combined + into a single one. + + Note that array broadcastability is checked in the :class:`Tuple` + constructor, so this method will not raise any exceptions. + + This is part of what is performed by :any:`expand`, but unlike + :any:`expand`, this method does not do any other manipulations, and it + does not require a shape. + + >>> from ndindex import Tuple + >>> idx = Tuple([[False], [True], [True]], [[4], [5], [5]], -1) + >>> print(idx.broadcast_arrays()) + Tuple(IntegerArray([[1 2] [1 2] [1 2]]), + IntegerArray([[0 0] [0 0] [0 0]]), + IntegerArray([[4 4] [5 5] [5 5]]), + IntegerArray([[-1 -1] [-1 -1] [-1 -1]])) + + See Also + ======== + + expand + + """ + return self + + def selected_indices(self, shape, axis=0): + """ + Return an iterator over all indices that are selected by `self` on an + array of shape `shape`. + + The result is a set of indices `i` such that `[a[i] for i in + idx.selected_indices(a.shape)]` is all the elements of `a[idx]`. The + indices are all iterated over in C (i.e., row major) order. + + >>> from ndindex import Slice, Tuple + >>> idx = Slice(5, 10) + >>> list(idx.selected_indices(20)) + [Integer(5), Integer(6), Integer(7), Integer(8), Integer(9)] + >>> idx = Tuple(Slice(5, 10), Slice(0, 2)) + >>> list(idx.selected_indices((20, 3))) + [Tuple(5, 0), Tuple(5, 1), + Tuple(6, 0), Tuple(6, 1), + Tuple(7, 0), Tuple(7, 1), + Tuple(8, 0), Tuple(8, 1), + Tuple(9, 0), Tuple(9, 1)] + + To correspond these indices to the elements of `a[idx]`, you can use + `iter_indices(idx.newshape(shape))`, since both iterators iterate the + indices in C order. + + >>> from ndindex import iter_indices + >>> idx = Tuple(Slice(3, 5), Slice(0, 2)) + >>> shape = (5, 5) + >>> import numpy as np + >>> a = np.arange(25).reshape(shape) + >>> for a_idx, (new_idx,) in zip( + ... idx.selected_indices(shape), + ... iter_indices(idx.newshape(shape))): + ... print(a_idx, new_idx, a[a_idx.raw], a[idx.raw][new_idx.raw]) + Tuple(3, 0) Tuple(0, 0) 15 15 + Tuple(3, 1) Tuple(0, 1) 16 16 + Tuple(4, 0) Tuple(1, 0) 20 20 + Tuple(4, 1) Tuple(1, 1) 21 21 + + See Also + ======== + + ndindex.iter_indices: + An iterator of indices to select every element for arrays of a given shape. + ndindex.ChunkSize.as_subchunks: + A high-level iterator that efficiently gives only those chunks + that intersect with a given index + """ + return self.expand(shape).selected_indices(shape) + +class NDIndex(NDIndexCommon, ImmutableObject): + """ + Represents an index into an nd-array (i.e., a numpy array). + + This is a base class for all ndindex types. All types that subclass this + class should redefine the following methods + + - `_typecheck(self, *args)`. See the docstring of + :class:`ImmutableObject`. + + - `raw` (a **@property** method) should return the raw index that can be + passed as an index to a numpy array. + + In addition other methods on this should be re-defined as necessary. Some + methods have a default implementation on this class, which is sufficient + for some subclasses. + + The methods `__init__` and `__eq__` should *not* be overridden. Equality + (and hashability) on `NDIndex` subclasses is determined by equality of + types and `.args`. Equivalent indices should not attempt to redefine + equality. Rather they should define canonicalization via `reduce()`. + `__hash__` is defined so that the hash matches the hash of `.raw`. If + `.raw` is unhashable, `__hash__` should be overridden to use + `hash(self.args)`. + + See Also + ======== + + ImmutableObject + + """ + __slots__ = () + + # TODO: Make NDIndex and ImmutableObject abstract base classes + @property + def raw(self): + """ + Return the equivalent of `self` that can be used as an index + + NumPy does not allow custom objects to be used as indices, with the + exception of integer indices, so to use an ndindex object as an + index, it is necessary to use `raw`. + + >>> from ndindex import Slice + >>> import numpy as np + >>> a = np.arange(5) + >>> s = Slice(2, 4) + >>> a[s] + Traceback (most recent call last): + ... + IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices + >>> a[s.raw] + array([2, 3]) + + """ + raise NotImplementedError + + # This is still here as a fallback implementation, but it isn't actually + # used by any present subclasses, because it is faster to implement __eq__ + # on each class specifically. + def __eq__(self, other): # pragma: no cover + if not isinstance(other, NDIndexCommon): + try: + other = ndindex(other) + return self == other + except IndexError: + return False + return super().__eq__(other) + + def __hash__(self): + # Make the hash match the raw hash when the raw type is hashable. + # Note: subclasses where .raw is not hashable should define __hash__ + # as hash(self.args) + return hash(self.raw) + + +def operator_index(idx): + """ + Convert `idx` into an integer index using `__index__()` or raise + `TypeError`. + + This is the same as `operator.index()` except it disallows boolean types. + + This is a slight break in NumPy compatibility, as NumPy allows bools in + some contexts where `__index__()` is used, for instance, in slices. It + does disallow it in others, such as in shapes. The main motivation for + disallowing bools entirely is 1) `numpy.bool_.__index__()` is deprecated + (currently it matches the built-in `bool.__index__()` and returns the + object unchanged, but prints a deprecation warning), and 2) for raw + indices, booleans and `0`/`1` are completely different, i.e., `a[True]` is + *not* the same as `a[1]`. + + >>> from ndindex.ndindex import operator_index + >>> operator_index(1) + 1 + >>> operator_index(1.0) + Traceback (most recent call last): + ... + TypeError: 'float' object cannot be interpreted as an integer + >>> operator_index(True) + Traceback (most recent call last): + ... + TypeError: 'bool' object cannot be interpreted as an integer + + """ + if isinstance(idx, bool): + raise TypeError("'bool' object cannot be interpreted as an integer") + if 'numpy' in sys.modules and isinstance(idx, sys.modules['numpy'].bool_): + raise TypeError("'np.bool_' object cannot be interpreted as an integer") + return operator.index(idx) diff --git a/venv/Lib/site-packages/ndindex/newaxis.py b/venv/Lib/site-packages/ndindex/newaxis.py new file mode 100644 index 0000000..5e0474f --- /dev/null +++ b/venv/Lib/site-packages/ndindex/newaxis.py @@ -0,0 +1,99 @@ +from .ndindex import NDIndex +from .shapetools import asshape + +class Newaxis(NDIndex): + """ + Represents a `np.newaxis` (i.e., `None`) index. + + `Newaxis` adds a shape 1 dimension to the array. If a `Newaxis` is inside + of a tuple index, it adds a shape 1 dimension at that location in the + index. + + For example, if `a` has shape `(2, 3)`, then `a[newaxis]` has shape `(1, + 2, 3)`, `a[:, newaxis]` has shape `(2, 1, 3)`, and so on. + + >>> from ndindex import Newaxis + >>> from numpy import arange + >>> a = arange(0,6).reshape(2,3) + >>> a[Newaxis().raw].shape + (1, 2, 3) + >>> a[:, Newaxis().raw, :].shape + (2, 1, 3) + + Using `Newaxis().raw` as an index is equivalent to using `numpy.newaxis`. + + See :doc:`../indexing-guide/multidimensional-indices/newaxis` for a + description of the semantics of newaxis. + + .. note:: + + Unlike the NumPy `newaxis`, `Newaxis` is the type, not the object (the + name is uppercase to avoid conflicting with the NumPy type). Use + `Newaxis()`, `ndindex(np.newaxis)`, or `ndindex(None)` to create the + object. In most ndindex contexts, `np.newaxis` or `None` can be used + instead of `Newaxis()`, for instance, when creating a `Tuple` object. + Also unlike `None`, `Newaxis()` is not singletonized, so you should not + use `is` to compare it. See the document on :any:`type-confusion` for + more details. + + """ + __slots__ = () + + def _typecheck(self): + return () + + @property + def raw(self): + return None + + def reduce(self, shape=None, *, axis=0, negative_int=False): + """ + Reduce a `Newaxis` index + + There is no other index that is equivalent to a newaxis index by + itself, so `Newaxis().reduce()` always returns `Newaxis()` unchanged. + + >>> from ndindex import Newaxis + >>> Newaxis().reduce() + Newaxis() + + See Also + ======== + + .NDIndex.reduce + .Tuple.reduce + .Slice.reduce + .Integer.reduce + .ellipsis.reduce + .IntegerArray.reduce + .BooleanArray.reduce + + """ + if shape is not None: + shape = asshape(shape) + return self + + def isvalid(self, shape): + shape = asshape(shape) + return True + + def newshape(self, shape): + # The docstring for this method is on the NDIndex base class + shape = asshape(shape) + + # reduce will raise IndexError if it should be raised + self.reduce(shape) + + return (1,) + shape + + def isempty(self, shape=None): + if shape is not None: + return 0 in self.newshape(shape) + + return False + + def __eq__(self, other): + return other is None or isinstance(other, Newaxis) + + def __hash__(self): + return super().__hash__() diff --git a/venv/Lib/site-packages/ndindex/shapetools.py b/venv/Lib/site-packages/ndindex/shapetools.py new file mode 100644 index 0000000..4da386a --- /dev/null +++ b/venv/Lib/site-packages/ndindex/shapetools.py @@ -0,0 +1,496 @@ +import numbers +import itertools +from collections.abc import Sequence +from ._crt import prod + +from .ndindex import ndindex, operator_index + +class BroadcastError(ValueError): + """ + Exception raised by :func:`iter_indices()` and + :func:`broadcast_shapes()` when the input shapes are not broadcast + compatible. + + """ + def __init__(self, arg1, shape1, arg2, shape2): + self.arg1 = arg1 + self.shape1 = shape1 + self.arg2 = arg2 + self.shape2 = shape2 + + def __str__(self): + arg1, shape1, arg2, shape2 = self.args + return f"shape mismatch: objects cannot be broadcast to a single shape. Mismatch is between arg {arg1} with shape {shape1} and arg {arg2} with shape {shape2}." + +class AxisError(ValueError, IndexError): + """ + Exception raised by :func:`iter_indices()` and + :func:`broadcast_shapes()` when the `skip_axes` argument is out of bounds. + + This is used instead of the NumPy exception of the same name so that + `iter_indices` does not need to depend on NumPy. + + """ + def __init__(self, axis, ndim): + # NumPy allows axis=-1 for 0-d arrays + if (ndim < 0 or -ndim <= axis < ndim) and not (ndim == 0 and axis == -1): + raise ValueError(f"Invalid AxisError ({axis}, {ndim})") + self.axis = axis + self.ndim = ndim + + def __str__(self): + return f"axis {self.axis} is out of bounds for array of dimension {self.ndim}" + +def broadcast_shapes(*shapes, skip_axes=()): + """ + Broadcast the input shapes `shapes` to a single shape. + + This is the same as :external+numpy:py:func:`np.broadcast_shapes() + `, except is also supports skipping axes in the + shape with `skip_axes`. + + `skip_axes` can be a tuple of integers which apply to all shapes, or a + list of tuples of integers, one for each shape, which apply to each + respective shape. The `skip_axes` argument works the same as in + :func:`iter_indices`. See its docstring for more details. + + If the shapes are not broadcast compatible (excluding `skip_axes`), + :class:`BroadcastError` is raised. + + >>> from ndindex import broadcast_shapes + >>> broadcast_shapes((2, 3), (3,), (4, 2, 1)) + (4, 2, 3) + >>> broadcast_shapes((2, 3), (5,), (4, 2, 1)) + Traceback (most recent call last): + ... + ndindex.shapetools.BroadcastError: shape mismatch: objects cannot be broadcast to a single shape. Mismatch is between arg 0 with shape (2, 3) and arg 1 with shape (5,). + + Axes in `skip_axes` apply to each shape *before* being broadcasted. Each + shape will be broadcasted together with these axes removed. The dimensions + in `skip_axes` do not need to be equal or broadcast compatible with one + another. The final broadcasted shape be the result of broadcasting all the + non-skip axes. + + >>> broadcast_shapes((10, 3, 2), (2, 20), skip_axes=[(0,), (1,)]) + (3, 2) + + """ + shapes = [asshape(shape, allow_int=False) for shape in shapes] + skip_axes = normalize_skip_axes(shapes, skip_axes) + + if not shapes: + return () + + non_skip_shapes = [remove_indices(shape, skip_axis) for shape, skip_axis in zip(shapes, skip_axes)] + dims = [len(shape) for shape in non_skip_shapes] + N = max(dims) + + broadcasted_shape = [1]*N + + arg = None + for i in range(-1, -N-1, -1): + for j in range(len(shapes)): + if dims[j] < -i: + continue + shape = non_skip_shapes[j] + broadcasted_side = broadcasted_shape[i] + shape_side = shape[i] + if shape_side == 1: + continue + elif broadcasted_side == 1: + broadcasted_side = shape_side + arg = j + elif shape_side != broadcasted_side: + raise BroadcastError(arg, shapes[arg], j, shapes[j]) + broadcasted_shape[i] = broadcasted_side + + return tuple(broadcasted_shape) + +def iter_indices(*shapes, skip_axes=(), _debug=False): + """ + Iterate indices for every element of an arrays of shape `shapes`. + + Each shape in `shapes` should be a shape tuple, which are broadcast + compatible along the non-skipped axes. Each iteration step will produce a + tuple of indices, one for each shape, which would correspond to the same + elements if the arrays of the given shapes were first broadcast together. + + This is a generalization of the NumPy :external+numpy:py:class:`np.ndindex() + ` function (which otherwise has no relation). + `np.ndindex()` only iterates indices for a single shape, whereas + `iter_indices()` supports generating indices for multiple broadcast + compatible shapes at once. This is equivalent to first broadcasting the + arrays then generating indices for the single broadcasted shape. + + Additionally, this function supports the ability to skip axes of the + shapes using `skip_axes`. These axes will be fully sliced in each index. + The remaining axes will be indexed one element at a time with integer + indices. + + `skip_axes` should be a tuple of axes to skip or a list of tuples of axes + to skip. If it is a single tuple, it applies to all shapes. Otherwise, + each tuple applies to each shape respectively. It can use negative + integers, e.g., `skip_axes=(-1,)` will skip the last axis. The order of + the axes in `skip_axes` does not matter. Mixing negative and nonnegative + skip axes is supported, but the skip axes must refer to unique dimensions + for each shape. + + The axes in `skip_axes` refer to the shapes *before* broadcasting (if you + want to refer to the axes after broadcasting, either broadcast the shapes + and arrays first, or refer to the axes using negative integers). For + example, `iter_indices((10, 2), (20, 1, 2), skip_axes=(0,))` will skip the + size `10` axis of `(10, 2)` and the size `20` axis of `(20, 1, 2)`. The + result is two sets of indices, one for each element of the non-skipped + dimensions: + + >>> from ndindex import iter_indices + >>> for idx1, idx2 in iter_indices((10, 2), (20, 1, 2), skip_axes=(0,)): + ... print(idx1, idx2) + Tuple(slice(None, None, None), 0) Tuple(slice(None, None, None), 0, 0) + Tuple(slice(None, None, None), 1) Tuple(slice(None, None, None), 0, 1) + + The skipped axes do not themselves need to be broadcast compatible, but + the shapes with all the skipped axes removed should be broadcast + compatible. + + For example, suppose `a` is an array with shape `(3, 2, 4, 4)`, which we + wish to think of as a `(3, 2)` stack of 4 x 4 matrices. We can generate an + iterator for each matrix in the "stack" with `iter_indices((3, 2, 4, 4), + skip_axes=(-1, -2))`: + + >>> for idx in iter_indices((3, 2, 4, 4), skip_axes=(-1, -2)): + ... print(idx) + (Tuple(0, 0, slice(None, None, None), slice(None, None, None)),) + (Tuple(0, 1, slice(None, None, None), slice(None, None, None)),) + (Tuple(1, 0, slice(None, None, None), slice(None, None, None)),) + (Tuple(1, 1, slice(None, None, None), slice(None, None, None)),) + (Tuple(2, 0, slice(None, None, None), slice(None, None, None)),) + (Tuple(2, 1, slice(None, None, None), slice(None, None, None)),) + + .. note:: + + The iterates of `iter_indices` are always a tuple, even if only a + single shape is provided (one could instead use `for idx, in + iter_indices(...)` above). + + As another example, say `a` is shape `(1, 3)` and `b` is shape `(2, 1)`, + and we want to generate indices for every value of the broadcasted + operation `a + b`. We can do this by using `a[idx1.raw] + b[idx2.raw]` for every + `idx1` and `idx2` as below: + + >>> import numpy as np + >>> a = np.arange(3).reshape((1, 3)) + >>> b = np.arange(100, 111, 10).reshape((2, 1)) + >>> a + array([[0, 1, 2]]) + >>> b + array([[100], + [110]]) + >>> for idx1, idx2 in iter_indices((1, 3), (2, 1)): + ... print(f"{idx1 = }; {idx2 = }; {(a[idx1.raw], b[idx2.raw]) = }") # doctest: +SKIPNP1 + idx1 = Tuple(0, 0); idx2 = Tuple(0, 0); (a[idx1.raw], b[idx2.raw]) = (np.int64(0), np.int64(100)) + idx1 = Tuple(0, 1); idx2 = Tuple(0, 0); (a[idx1.raw], b[idx2.raw]) = (np.int64(1), np.int64(100)) + idx1 = Tuple(0, 2); idx2 = Tuple(0, 0); (a[idx1.raw], b[idx2.raw]) = (np.int64(2), np.int64(100)) + idx1 = Tuple(0, 0); idx2 = Tuple(1, 0); (a[idx1.raw], b[idx2.raw]) = (np.int64(0), np.int64(110)) + idx1 = Tuple(0, 1); idx2 = Tuple(1, 0); (a[idx1.raw], b[idx2.raw]) = (np.int64(1), np.int64(110)) + idx1 = Tuple(0, 2); idx2 = Tuple(1, 0); (a[idx1.raw], b[idx2.raw]) = (np.int64(2), np.int64(110)) + >>> a + b + array([[100, 101, 102], + [110, 111, 112]]) + + To include an index into the final broadcasted array, you can simply + include the final broadcasted shape as one of the shapes (the function + :func:`broadcast_shapes` is useful here). + + >>> np.broadcast_shapes((1, 3), (2, 1)) + (2, 3) + >>> for idx1, idx2, broadcasted_idx in iter_indices((1, 3), (2, 1), (2, 3)): + ... print(broadcasted_idx) + Tuple(0, 0) + Tuple(0, 1) + Tuple(0, 2) + Tuple(1, 0) + Tuple(1, 1) + Tuple(1, 2) + + """ + skip_axes = normalize_skip_axes(shapes, skip_axes) + shapes = [asshape(shape, allow_int=False) for shape in shapes] + + if not shapes: + yield () + return + + shapes = [asshape(shape) for shape in shapes] + S = len(shapes) + + iters = [[] for i in range(S)] + broadcasted_shape = broadcast_shapes(*shapes, skip_axes=skip_axes) + + idxes = [-1]*S + + while any(i is not None for i in idxes): + for s, it, shape, sk in zip(range(S), iters, shapes, skip_axes): + i = idxes[s] + if i is None: + continue + if -i > len(shape): + if not shape: + pass + elif len(shape) == len(sk): + # The whole shape is skipped. Just repeat the most recent slice + it[0] = ncycles(it[0], prod(broadcasted_shape)) + else: + # Find the first non-skipped axis and repeat by however + # many implicit axes are left in the broadcasted shape + for j in range(-len(shape), 0): + if j not in sk: + break + it[j] = ncycles(it[j], prod(broadcasted_shape[:len(sk)-len(shape)+len(broadcasted_shape)])) + + idxes[s] = None + continue + + val = associated_axis(broadcasted_shape, i, sk) + if i in sk: + it.insert(0, [slice(None)]) + else: + if val == 0: + return + elif val != 1 and shape[i] == 1: + it.insert(0, ncycles(range(shape[i]), val)) + else: + it.insert(0, range(shape[i])) + idxes[s] -= 1 + + if _debug: # pragma: no cover + print(f"{iters = }") + for idxes in itertools.zip_longest(*[itertools.product(*i) for i in + iters], fillvalue=()): + yield tuple(ndindex(idx) for idx in idxes) + +#### Internal helpers + + +def asshape(shape, axis=None, *, allow_int=True, allow_negative=False): + """ + Cast `shape` as a valid NumPy shape. + + The input can be an integer `n` (if `allow_int=True`), which is equivalent + to `(n,)`, or a tuple of integers. + + If the `axis` argument is provided, an `IndexError` is raised if it is out + of bounds for the shape. + + The resulting shape is always a tuple of nonnegative integers. If + `allow_negative=True`, negative integers are also allowed. + + All ndindex functions that take a shape input should use:: + + shape = asshape(shape) + + or:: + + shape = asshape(shape, axis=axis) + + """ + from .integer import Integer + from .tuple import Tuple + if isinstance(shape, (Tuple, Integer)): + raise TypeError("ndindex types are not meant to be used as a shape - " + "did you mean to use the built-in tuple type?") + + if isinstance(shape, numbers.Number): + if allow_int: + shape = (operator_index(shape),) + else: + raise TypeError(f"expected sequence of integers, not {type(shape).__name__}") + + if not isinstance(shape, Sequence) or isinstance(shape, str): + raise TypeError("expected sequence of integers" + allow_int*" or a single integer" + ", not " + type(shape).__name__) + l = len(shape) + + newshape = [] + # numpy uses __getitem__ rather than __iter__ to index into shape, so we + # match that + for i in range(l): + # Raise TypeError if invalid + val = shape[i] + if val is None: + raise ValueError("unknonwn (None) dimensions are not supported") + + newshape.append(operator_index(shape[i])) + + if not allow_negative and val < 0: + raise ValueError("unknown (negative) dimensions are not supported") + + if axis is not None: + if len(newshape) <= axis: + raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional, but {axis + 1} were indexed") + + return tuple(newshape) + +def associated_axis(broadcasted_shape, i, skip_axes): + """ + Return the associated element of `broadcasted_shape` corresponding to + `shape[i]` given `skip_axes`. If there is not such element (i.e., it's out + of bounds), returns None. + + This function makes implicit assumptions about its input and is only + designed for internal use. + + """ + skip_axes = sorted(skip_axes, reverse=True) + if i >= 0: + raise NotImplementedError + if i in skip_axes: + return None + # We assume skip_axes are all negative and sorted + j = i + for sk in skip_axes: + if sk >= i: + j += 1 + else: + break + if ndindex(j).isvalid(len(broadcasted_shape)): + return broadcasted_shape[j] + return None + +def remove_indices(x, idxes): + """ + Return `x` with the indices `idxes` removed. + + This function is only intended for internal usage. + """ + if isinstance(idxes, int): + idxes = (idxes,) + dim = len(x) + _idxes = sorted({i if i >= 0 else i + dim for i in idxes}) + _idxes = [i - a for i, a in zip(_idxes, range(len(_idxes)))] + _x = list(x) + for i in _idxes: + _x.pop(i) + return tuple(_x) + +def unremove_indices(x, idxes, *, val=None): + """ + Insert `val` in `x` so that it appears at `idxes`. + + Note that idxes must be either all negative or all nonnegative. + + This function is only intended for internal usage. + """ + if any(i >= 0 for i in idxes) and any(i < 0 for i in idxes): + # A mix of positive and negative indices presents a fundamental + # problem: sometimes the result is not unique. For example, x = [0]; + # idxes = [1, -1] could be satisfied by both [0, None] or [0, None, + # None], depending on whether each index refers to a separate None or + # not (note that both cases are supported by remove_indices(), because + # there it is unambiguous). But even worse, in some cases, there may + # be no way to satisfy the given requirement. For example, given x = + # [0, 1, 2, 3]; idxes = [3, -3], there is no way to insert None into x + # so that remove_indices(res, idxes) == x. To see this, simply observe + # that there is no size list x such that remove_indices(x, [3, -3]) + # returns a tuple of size 4: + # + # >>> [len(remove_indices(list(range(n)), [3, -3])) for n in range(4, 10)] + # [2, 3, 5, 5, 6, 7] + raise NotImplementedError("Mixing both negative and nonnegative idxes is not yet supported") + x = list(x) + n = len(idxes) + len(x) + _idxes = sorted({i if i >= 0 else i + n for i in idxes}) + for i in _idxes: + x.insert(i, val) + return tuple(x) + +class ncycles: + """ + Iterate `iterable` repeated `n` times. + + This is based on a recipe from the `Python itertools docs + `_, + but improved to give a repr, and to denest when it can. This makes + debugging :func:`~.iter_indices` easier. + + This is only intended for internal usage. + + >>> from ndindex.shapetools import ncycles + >>> ncycles(range(3), 2) + ncycles(range(0, 3), 2) + >>> list(_) + [0, 1, 2, 0, 1, 2] + >>> ncycles(ncycles(range(3), 3), 2) + ncycles(range(0, 3), 6) + + """ + def __new__(cls, iterable, n): + if n == 1: + return iterable + return object.__new__(cls) + + def __init__(self, iterable, n): + if isinstance(iterable, ncycles): + self.iterable = iterable.iterable + self.n = iterable.n*n + else: + self.iterable = iterable + self.n = n + + def __repr__(self): + return f"ncycles({self.iterable!r}, {self.n!r})" + + def __iter__(self): + return itertools.chain.from_iterable(itertools.repeat(tuple(self.iterable), self.n)) + +def normalize_skip_axes(shapes, skip_axes): + """ + Return a canonical form of `skip_axes` corresponding to `shapes`. + + A canonical form of `skip_axes` is a list of tuples of integers, one for + each shape in `shapes`, which are a unique set of axes for each + corresponding shape. + + If `skip_axes` is an integer, this is basically `[(skip_axes,) for s + in shapes]`. If `skip_axes` is a tuple, it is like `[skip_axes for s in + shapes]`. + + The `skip_axes` must always refer to unique axes in each shape. + + The returned `skip_axes` will always be negative integers and will be + sorted. + + This function is only intended for internal usage. + + """ + # Note: we assume asshape has already been called on the shapes in shapes + if isinstance(skip_axes, Sequence): + if skip_axes and all(isinstance(i, Sequence) for i in skip_axes): + if len(skip_axes) != len(shapes): + raise ValueError(f"Expected {len(shapes)} skip_axes") + return [normalize_skip_axes([shape], skip_axis)[0] for shape, skip_axis in zip(shapes, skip_axes)] + else: + try: + [operator_index(i) for i in skip_axes] + except TypeError: + raise TypeError("skip_axes must be an integer, a tuple of integers, or a list of tuples of integers") + + skip_axes = asshape(skip_axes, allow_negative=True) + + # From here, skip_axes is a single tuple of integers + + if not shapes and skip_axes: + raise ValueError("skip_axes must be empty if there are no shapes") + + new_skip_axes = [] + err = None + for shape in shapes: + s = tuple(sorted(ndindex(i).reduce(len(shape), negative_int=True, axiserror=True).raw for i in skip_axes)) + if len(s) != len(set(s)): + err = ValueError(f"skip_axes {skip_axes} are not unique for shape {shape}") + # For testing + err.skip_axes = skip_axes + err.shape = shape + new_skip_axes.append(s) + if err: + raise err + return new_skip_axes diff --git a/venv/Lib/site-packages/ndindex/slice.py b/venv/Lib/site-packages/ndindex/slice.py new file mode 100644 index 0000000..9c14542 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/slice.py @@ -0,0 +1,546 @@ +from .ndindex import NDIndexCommon +from .subindex_helpers import subindex_slice +from .shapetools import asshape +from ._slice import _Slice + +class default: + """ + A default keyword argument value. + + Used as the default value for keyword arguments where `None` is also a + meaningful value but not the default. + + """ + pass + +class Slice(_Slice, NDIndexCommon): + """ + Represents a slice on an axis of an nd-array. + + `Slice(x)` with one argument is equivalent to `Slice(None, x)`. `Slice(x, + y)` with two arguments is equivalent to `Slice(x, y, None)`. + + `start` and `stop` can be any integer, or `None`. `step` can be any + nonzero integer or `None`. + + `Slice(a, b)` is the same as the syntax `a:b` in an index and `Slice(a, b, + c)` is the same as `a:b:c`. An argument being `None` is equivalent to the + syntax where the item is omitted, for example, `Slice(None, None, k)` is + the same as the syntax `::k`. + + `Slice.args` always has three arguments, and does not make any distinction + between, for instance, `Slice(x, y)` and `Slice(x, y, None)`. This is + because Python itself does not make the distinction between `x:y` and + `x:y:` syntactically. + + See :doc:`../indexing-guide/slices` for a complete description of the + semantics of slices. + + Slice has attributes `start`, `stop`, and `step` to access the + corresponding attributes. + + >>> from ndindex import Slice + >>> s = Slice(10) + >>> s + Slice(None, 10, None) + >>> print(s.start) + None + >>> s.args + (None, 10, None) + >>> s.raw + slice(None, 10, None) + + For most use cases, it's more convenient to create Slice objects using + `ndindex[slice]`, which allows using `a:b` slicing syntax: + + >>> from ndindex import ndindex + >>> ndindex[0:10] + Slice(0, 10, None) + + """ + __slots__ = () + + def __repr__(self): + return f"{self.__class__.__name__}({', '.join(map(repr, self.args))})" + + def __hash__(self): + # Slices are only hashable in Python 3.12+ + try: + return hash(self.raw) + except TypeError: # pragma: no cover + return hash(self.args) + + def __len__(self): + """ + `len()` gives the maximum size of an axis sliced with `self`. + + An actual array may produce a smaller size if it is smaller than the + bounds of the slice. For instance, `[0, 1, 2][2:4]` only has 1 element + but the maximum length of the slice `2:4` is 2. + + >>> from ndindex import Slice + >>> [0, 1, 2][2:4] + [2] + >>> len(Slice(2, 4)) + 2 + >>> [0, 1, 2, 3][2:4] + [2, 3] + + If there is no such maximum, it raises `ValueError`. + + >>> # From the second element to the end, which could have any size + >>> len(Slice(1, None)) + Traceback (most recent call last): + ... + ValueError: Cannot determine max length of slice + + The :meth:`Slice.reduce` method with a `shape` argument returns a + `Slice` that always has a correct `len` which doesn't raise + `ValueError`. + + >>> Slice(2, 4).reduce(3) + Slice(2, 3, 1) + >>> len(_) + 1 + + Be aware that `len(Slice)` only gives the size of the axis being + sliced. It does not say anything about the total shape of the array. + In particular, the array may be empty after slicing if one of its + dimensions is 0, but the other dimensions may be nonzero. To check if + an array will empty after indexing, use :meth:`isempty`. + + See Also + ======== + isempty + + """ + s = self + if None in self.args or self.start < 0 or self.stop < 0: + s = s.reduce() + start, stop, step = s.args + error = ValueError("Cannot determine max length of slice") + # We reuse the logic in range.__len__. However, it is only correct if + # start and stop are nonnegative. + if step > 0: + # start cannot be None + if stop is None: + if start >= 0: + # a[n:]. Extends to the end of the array. + raise error + else: + # a[-n:]. From n from the end to the end. Same as + # range(-n, 0). + stop = 0 + elif start < 0 and stop >= 0: + # a[-n:m] indexes from nth element from the end to the + # m-1th element from the beginning. + start, stop = 0, min(-start, stop) + elif start >=0 and stop < 0: + # a[n:-m]. The max length depends on the size of the array. + raise error + else: + if stop is None: + if start >= 0: + # a[n::-1] (start != None by above). Same as range(n, -1, -1) + stop = -1 + else: + # a[-n::-1]. From n from the end to the beginning of the + # array backwards. The max length depends on the size of + # the array. + raise error + elif start < 0 and stop >= 0: + # a[-n:m:-1]. The max length depends on the size of the array + raise error + elif start >=0 and stop < 0: + # a[n:-m:-1] indexes from the nth element backwards to the mth + # element from the end. + start, stop = 0, min(start+1, -stop - 1) + step = -step + + return len(range(start, stop, step)) + + def reduce(self, shape=None, *, axis=0, negative_int=False): + """ + `Slice.reduce` returns a slice that is canonicalized for an array of the + given shape, or for any shape if `shape` is `None` (the default). + + `Slice.reduce` is a perfect canonicalization, meaning that two slices + are equal---for all array shapes if `shape=None` or for arrays of + shape `shape` otherwise---if and only if they `reduce` to the same + `Slice` object. Note that ndindex objects do not simplify + automatically, and `==` only does exact equality comparison, so to + test that two slices are equal, use `slice1.reduce(shape) == + slice2.reduce(shape)`. + + - If `shape` is `None`, the following properties hold after calling + `reduce()`: + + - `start` is not `None`. + + - `stop` is not `None`, when possible. The reduced `stop` can only + be `None` if the original `stop` is. + + - `step` is not `None`. + + - `step` is as close to 0 as possible. + + - If the slice is always empty, the resulting slice will be + `Slice(0, 0, 1)`. However, one should prefer the :any:`isempty` + method to test if a slice is always empty. + + In particular, `stop` may be `None`, even after canonicalization + with `reduce()` with no `shape`. This is because some slices are + impossible to represent without `None` without making assumptions + about the array shape. For example, `Slice(0, None)` cannot be + equivalent to a slice with `stop != None` for all array shapes. To + get a slice where the `start`, `stop`, and `step` are always + integers, use `reduce(shape)` with an explicit array shape. + + Note that `Slice` objects that index a single element are not + canonicalized to `Integer`, because integer indices always remove an + axis whereas slices keep the axis. Furthermore, slices cannot raise + `IndexError` except on arrays with shape equal to `()`. + + >>> from ndindex import Slice + >>> Slice(10).reduce() + Slice(0, 10, 1) + >>> Slice(1, 3, 3).reduce() + Slice(1, 2, 1) + + - If an explicit shape is given, the following properties are true + after calling `Slice.reduce(shape)`: + + - `start`, `stop`, and `step` are not `None`, + + - `start` is nonnegative. + + - `stop` is nonnegative whenever possible. In particular, `stop` is + only negative when it has to be to represent the given slice, + i.e., a slice with negative `step` that indexes more than 1 + element and indexes the first (index `0`) element (in this case, + it will be `-n - 1` where `n` is the size of the axis being + sliced). + + - `stop` is as small as possible for positive `step` or large as + possible for negative `step`. + + - `step` is as close to 0 as possible. + + - If the slice is empty for the given shape, the resulting slice + will be `Slice(0, 0, 1)`. However, one should prefer the + :any:`isempty` method to test if a slice is always empty. + + - If the slice indexes a single element, the resulting slice will be + of the form `Slice(i, i+1, 1)`. However, one should prefer using + `len(s.reduce(shape)) == 1` to test if a slice indexes exactly 1 + element. + + - :any:`len() ` gives the true size of the axis for a + sliced array of the given shape, and never raises `ValueError`. + + The `axis` argument can be used to specify an axis of the shape (by + default, `axis=0`). For convenience, `shape` can be passed as an integer + for a single dimension. + + + >>> from ndindex import Slice + >>> Slice(1, 10).reduce(3) + Slice(1, 3, 1) + >>> Slice(-1, 1, -2).reduce(4) + Slice(3, 4, 1) + >>> Slice(1, 10, 3).reduce((4, 5), axis=0) + Slice(1, 2, 1) + >>> Slice(1, 10, 3).reduce((4, 5), axis=1) + Slice(1, 5, 3) + + >>> s = Slice(2, None) + >>> len(s) + Traceback (most recent call last): + ... + ValueError: Cannot determine max length of slice + >>> s.reduce((5,)) + Slice(2, 5, 1) + >>> len(_) + 3 + + See Also + ======== + + .NDIndex.reduce + .Tuple.reduce + .Integer.reduce + .ellipsis.reduce + .Newaxis.reduce + .IntegerArray.reduce + .BooleanArray.reduce + + """ + if self._reduced and shape is None: + return self + + start, stop, step = self.args + + # Canonicalize with no shape + + if step is None: + step = 1 + if start is None: + if step > 0: + start = 0 + else: # step < 0 + start = -1 + + if start is not None and stop is not None: + if start >= 0 and stop >= 0 or start < 0 and stop < 0: + if step > 0: + if stop <= start: + start, stop, step = 0, 0, 1 + elif start >= 0 and start + step >= stop: + # Indexes 1 element. Start has to be >= 0 because a + # negative start could be less than the size of the + # axis, in which case it will clip and the single + # element will be element 0. We can only do that + # reduction if we know the shape. + + # Note that returning Integer here is wrong, because + # slices keep the axis and integers remove it. + stop, step = start + 1, 1 + elif start < 0 and start + step > stop: + # The exception is this case where stop is already + # start + 1. + step = stop - start + if start >= 0: + stop -= (stop - start - 1) % step + else: # step < 0 + if stop >= start: + start, stop, step = 0, 0, 1 + elif start < 0 and start + step <= stop: + if start < -1: + stop, step = start + 1, 1 + else: # start == -1 + stop, step = start - 1, -1 + elif stop == start - 1: + stop, step = start + 1, 1 + elif start >= 0 and start + step <= stop: + # Indexes 0 or 1 elements. We can't change stop + # because start might clip to a smaller true start if + # the axis is smaller than it, and increasing stop + # would prevent it from indexing an element in that + # case. The exception is the case right before this + # one (stop == start - 1). In that case start cannot + # clip past the stop (it always indexes the same one + # element in the cases where it indexes anything at + # all). + step = stop - start + if start < 0: + stop -= (stop - start + 1) % step + elif start >= 0 and stop < 0 and step < 0 and (start < -step or + -stop - 1 < -step): + if stop == -1: + start, stop, step = 0, 0, 1 + else: + step = max(-start - 1, stop + 1) + elif start < 0 and stop == 0 and step > 0: + start, stop, step = 0, 0, 1 + elif start < 0 and stop >= 0 and step >= min(-start, stop): + step = min(-start, stop) + if start == -1 or stop == 1: + # Can only index 0 or 1 elements. We can either pick a + # version with positive start and negative step, or + # negative start and positive step. We prefer the former + # as it matches what is done for reduce() with a shape + # (start is always nonnegative). + assert step == 1 + start, stop, step = stop - 1, start - 1, -1 + elif start is not None and stop is None: + if start == -1 and step > 0: + start, stop, step = (-1, -2, -1) + elif start < 0 and step >= -start: + step = -start + elif step < 0: + if start == 0: + start, stop, step = 0, 1, 1 + elif 0 <= start < -step: + step = -start - 1 + if shape is None: + return type(self)(start, stop, step, _reduced=True) + + # Further canonicalize with an explicit array shape + + shape = asshape(shape, axis=axis) + size = shape[axis] + + if stop is None: + if step > 0: + stop = size + else: + stop = -size - 1 + + if stop < -size: + stop = -size - 1 + + if size == 0: + start, stop, step = 0, 0, 1 + elif step > 0: + # start cannot be None + if start < 0: + start = size + start + if start < 0: + start = 0 + if start >= size: + start, stop, step = 0, 0, 1 + + if stop < 0: + stop = size + stop + if stop < 0: + stop = 0 + else: + stop = min(stop, size) + stop -= (stop - start - 1) % step + + if stop - start == 1: + # Indexes 1 element. + step = 1 + elif stop - start <= 0: + start, stop, step = 0, 0, 1 + else: + if start < 0: + if start >= -size: + start = size + start + else: + start, stop = 0, 0 + if start >= 0: + start = min(size - 1, start) + + if -size <= stop < 0: + stop += size + + if stop >= 0: + if start - stop == 1: + stop, step = start + 1, 1 + elif start - stop <= 0: + start, stop, step = 0, 0, 1 + else: + stop += (start - stop - 1) % -step + + # start >= 0 + if (stop < 0 and start - size - stop <= -step + or stop >= 0 and start - stop <= -step): + stop, step = start + 1, 1 + if stop < 0 and start % step != 0: + # At this point, negative stop is only necessary to index the + # first element. If that element isn't actually indexed, we + # prefer a nonnegative stop. Otherwise, stop will be -size - 1. + stop = start % -step - 1 + return self.__class__(start, stop, step, _reduced=True) + + def isvalid(self, shape): + # The docstring for this method is on the NDIndex base class + shape = asshape(shape) + + # All slices are valid as long as there is at least one dimension + return bool(shape) + + def newshape(self, shape): + # The docstring for this method is on the NDIndex base class + shape = asshape(shape) + + idx = self.reduce(shape) + + # len() won't raise an error after reducing with a shape + return (len(idx),) + shape[1:] + + # TODO: Better name? + def as_subindex(self, index): + # The docstring of this method is currently on NDindex.as_subindex, as + # this is the only method that is actually implemented so far. + index = ndindex(index) + index_orig = index + + s = self.reduce() + index = index.reduce() + + if isinstance(index, Tuple): + return Tuple(self).as_subindex(index) + + if isinstance(index, Integer): + if index == -1: + s = self.as_subindex(Slice(index.args[0], None)) + else: + s = self.as_subindex(Slice(index.args[0], index.args[0] + 1)) + if s == Slice(0, 0, 1): + # There is no index that we can return here. The intersection + # of `self` and `index` is empty. Ideally we want to give an + # index that gives an empty array, but we cannot make the + # shape match. If a is dimension 1, then a[index] is dimension + # 0, so a[index][slice(0, 0)] will not work. A possibility + # would be to return False, which would add a length-0 + # dimension to the array. But + # + # 1. this isn't implemented yet, and + # 2. a False can only add a length-0 dimension once, so it + # still wouldn't work in every case. For example, + # Tuple(slice(0), slice(0)).as_subindex((0, 0)) would need + # to return an index that replaces the first two + # dimensions with length-0 dimensions. + raise ValueError(f"{self} and {index_orig} do not intersect") + assert len(s) == 1 + return Tuple() + + if s.step < 0: + raise NotImplementedError("Slice.as_subindex() is only implemented for slices with positive steps") + + # After reducing, start is not None when step > 0 + if s.stop is None or s.start < 0 or s.stop < 0: + raise NotImplementedError("Slice.as_subindex() is only implemented for slices with nonnegative start and stop. Try calling reduce() with a shape first.") + + if isinstance(index, IntegerArray): + idx = index.array + if (idx < 0).any(): + raise NotImplementedError("Slice.as_subindex(IntegerArray) is not yet implemented for arrays with negative values. Try calling reduce with a shape first.") + start, stop, step = subindex_slice(s.start, s.stop, s.step, + idx, idx+1, 1) + res = BooleanArray(start < stop) + + if not res.count_nonzero: + raise ValueError("Indices do not intersect") + + return res + + if not isinstance(index, Slice): + raise NotImplementedError("Slice.as_subindex() is only implemented for tuples, integers, arrays and slices") + + if index.step < 0: + raise NotImplementedError("Slice.as_subindex() is only implemented for slices with positive steps") + + # After reducing, start is not None when step > 0 + if index.stop is None or index.start < 0 or index.stop < 0: + raise NotImplementedError("Slice.as_subindex() is only implemented for slices with nonnegative start and stop. Try calling reduce() with a shape first.") + + return Slice(*subindex_slice(s.start, s.stop, s.step, index.start, + index.stop, index.step)).reduce() + + def isempty(self, shape=None): + if shape is not None: + return 0 in self.newshape(shape) + + try: + l = len(self) + except (TypeError, ValueError): + return False + return l == 0 + + def selected_indices(self, shape, axis=None): + if axis is None: + yield from self.expand(shape).selected_indices(shape) + else: + shape = asshape(shape, axis=axis) + for i in range(shape[axis])[self.raw]: + yield Integer(i) + +# Imports at the bottom to avoid circular import issues +from .ndindex import ndindex +from .tuple import Tuple +from .integer import Integer +from .integerarray import IntegerArray +from .booleanarray import BooleanArray diff --git a/venv/Lib/site-packages/ndindex/subindex_helpers.py b/venv/Lib/site-packages/ndindex/subindex_helpers.py new file mode 100644 index 0000000..9784464 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/subindex_helpers.py @@ -0,0 +1,115 @@ +""" +This file has the main algorithm for Slice.as_subindex(Slice) + +Since Integer can use the same algorithm via Slice(i, i+1), and IntegerArray +needs to do this but in a way that only uses array friendly operations, we +need to have this factored out into a separately callable function. +""" + +import sys + +from ._crt import crt, ilcm + +def _crt(m1, m2, v1, v2): + """ + Chinese Remainder Theorem + + Returns x such that x = v1 (mod m1) and x = v2 (mod m2), or None if no + such solution exists. + + """ + # Avoid calling crt in the cases where the inputs would be arrays. + if m1 == 1: + return v2 % m2 + if m2 == 1: + return v1 % m1 + + assert m1 > 0 + assert m2 > 0 + res = crt([m1, m2], [v1, v2]) + if res is None: + return res + return res + +def _ilcm(a, b): + # Avoid calling ilcm in the cases where the inputs would be arrays. + if a == 1: + return b + if b == 1: + return a + + assert a > 0 + assert b > 0 + + return ilcm(a, b) + +def where(cond, x, y): + if 'numpy' in sys.modules: + from numpy import where + return where(cond, x, y) + return x if cond else y # pragma: no cover + +def ceiling(a, b): + """ + Returns ceil(a/b) + """ + return -(-a//b) + +def _max(a, b): + if isinstance(a, int) and isinstance(b, int): + return max(a, b) + + from numpy import broadcast_arrays, amax + + return amax(broadcast_arrays(a, b), axis=0) + +def _min(a, b): + if isinstance(a, int) and isinstance(b, int): + return min(a, b) + + from numpy import broadcast_arrays, amin + + return amin(broadcast_arrays(a, b), axis=0) + +def _smallest(x, a, m): + """ + Gives the smallest integer >= x that equals a (mod m) + + Assumes x >= 0, m >= 1, and 0 <= a < m. + """ + n = ceiling(x - a, m) + return a + n*m + +def subindex_slice(s_start, s_stop, s_step, i_start, i_stop, i_step): + """ + Computes s.as_subindex(i) for slices s and i in a way that is (mostly) + compatible with NumPy arrays. + + Returns (start, stop, step). + + """ + # Chinese Remainder Theorem. We are looking for a solution to + # + # x = s.start (mod s.step) + # x = index.start (mod index.step) + # + # If crt() returns None, then there are no solutions (the slices do + # not overlap). + common = _crt(s_step, i_step, s_start, i_start) + + if common is None: + return (0, 0, 1) + lcm = _ilcm(s_step, i_step) + start = _max(s_start, i_start) + + # Get the smallest lcm multiple of common that is >= start + start = _smallest(start, common, lcm) + # Finally, we need to shift start so that it is relative to index + start = (start - i_start)//i_step + + stop = ceiling((_min(s_stop, i_stop) - i_start), i_step) + stop = where(stop < 0, 0, stop) + + step = lcm//i_step # = s_step//igcd(s_step, i_step) + + return (start, stop, step) diff --git a/venv/Lib/site-packages/ndindex/tests/__init__.py b/venv/Lib/site-packages/ndindex/tests/__init__.py new file mode 100644 index 0000000..e08d011 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/__init__.py @@ -0,0 +1,60 @@ +""" +Tests are extremely important for ndindex. All operations should produce +correct results. We test this by checking against numpy arange (the array +values do not matter, so long as they are distinct). + +There are three primary types of tests that we employ to verify this: + +- Exhaustive tests. These test every possible value in some range. See for + example test_slice. This is the best type of test, but unfortunately, it is + often impossible to do due to combinatorial explosion. + +- Hypothesis tests. Hypothesis is a library that can intelligently check a + combinatorial search space. This requires writing hypothesis strategies that + can generate all the relevant types of indices (see helpers.py). For more + information on hypothesis, see + https://hypothesis.readthedocs.io/en/latest/index.html. + +- Explicit tests. These are hand crafted tests that test that the output of a + function is some exact value. These are used when a property-based test is + difficult to write, and it is simpler to test the exact output value. These + tests still include a correctness check (check_same()) to make sure the + hard-coded output is actually correct. + +The basic idea in both cases is the same. Take the pure index and the +ndindex(index).raw, or in the case of a transformation, the before and after +raw index, and index an arange with them. If they do not give the same output +array, or do not both produce the same error, the code is not correct. + +Why bother with hypothesis if the same thing is already tested exhaustively? +The main reason is that hypothesis is much better at producing human-readable +failure examples. When an exhaustive test fails, the failure will always be +from the first set of inputs in the loop that produces a failure. Hypothesis +on the other hand attempts to "shrink" the failure input to smallest input +that still fails. For example, a failing exhaustive slice test might give +Slice(-10, -9, -10) as a the failing example, but hypothesis would shrink it +to Slice(-2, -1, -1). Another reason for the duplication is that hypothesis +can sometimes test a slightly expanded test space without any additional +consequences. For example, test_slice_reduce_hypothesis() tests all types of +array shapes, whereas test_slice_reduce_exhaustive() tests only 1-dimensional +shapes. This doesn't affect things because hypothesis will always shrink large +shapes to a 1-dimensional shape in the case of a failure. Consequently every +exhaustive test should have a corresponding hypothesis test. + +For things that can only be tested with hypothesis, you can use @example, to +force certain combinations to be tested. This is useful because we require +100% test coverage, and hypothesis's randomness can cause this to be flaky +otherwise. + +""" + +# Variable naming conventions in the tests: + +# a: numpy arange. May be reshaped to be multidimensional +# shape: a tuple of integers +# i: integer used as an integer index +# idx: generic index (Python type) +# index: generic index (ndindex type) +# s: slice (Python type) +# S: Slice (ndindex type) +# size: integer passed to arange diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..4e96ad3 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/doctest.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/doctest.cpython-311.pyc new file mode 100644 index 0000000..1e9599c Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/doctest.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/helpers.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/helpers.cpython-311.pyc new file mode 100644 index 0000000..014c35b Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/helpers.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_array.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_array.cpython-311.pyc new file mode 100644 index 0000000..21d0a08 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_array.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_as_subindex.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_as_subindex.cpython-311.pyc new file mode 100644 index 0000000..66b2b90 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_as_subindex.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_booleanarray.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_booleanarray.cpython-311.pyc new file mode 100644 index 0000000..01f3502 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_booleanarray.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_broadcast_arrays.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_broadcast_arrays.cpython-311.pyc new file mode 100644 index 0000000..53bd003 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_broadcast_arrays.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_chunking.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_chunking.cpython-311.pyc new file mode 100644 index 0000000..e996d60 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_chunking.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_crt.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_crt.cpython-311.pyc new file mode 100644 index 0000000..cfac790 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_crt.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_ellipsis.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_ellipsis.cpython-311.pyc new file mode 100644 index 0000000..f8792b7 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_ellipsis.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_expand.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_expand.cpython-311.pyc new file mode 100644 index 0000000..7131614 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_expand.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_integer.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_integer.cpython-311.pyc new file mode 100644 index 0000000..5d26e47 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_integer.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_integerarray.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_integerarray.cpython-311.pyc new file mode 100644 index 0000000..5a32821 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_integerarray.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_isvalid.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_isvalid.cpython-311.pyc new file mode 100644 index 0000000..3ad3d87 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_isvalid.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_ndindex.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_ndindex.cpython-311.pyc new file mode 100644 index 0000000..6cad7d8 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_ndindex.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_newaxis.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_newaxis.cpython-311.pyc new file mode 100644 index 0000000..0878900 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_newaxis.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_newshape.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_newshape.cpython-311.pyc new file mode 100644 index 0000000..10c4d3a Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_newshape.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_no_dependencies.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_no_dependencies.cpython-311.pyc new file mode 100644 index 0000000..2b5f313 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_no_dependencies.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_selected_indices.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_selected_indices.cpython-311.pyc new file mode 100644 index 0000000..dcb6dfa Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_selected_indices.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_shapetools.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_shapetools.cpython-311.pyc new file mode 100644 index 0000000..72d1eb1 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_shapetools.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_slice.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_slice.cpython-311.pyc new file mode 100644 index 0000000..f5edd0f Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_slice.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/__pycache__/test_tuple.cpython-311.pyc b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_tuple.cpython-311.pyc new file mode 100644 index 0000000..e3fd4d0 Binary files /dev/null and b/venv/Lib/site-packages/ndindex/tests/__pycache__/test_tuple.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/ndindex/tests/doctest.py b/venv/Lib/site-packages/ndindex/tests/doctest.py new file mode 100644 index 0000000..f16ba9f --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/doctest.py @@ -0,0 +1,98 @@ +""" +Custom script to run the doctests + +This runs the doctests but ignores trailing ``` in Markdown documents. + +Running this separately from pytest also allows us to not include the doctests +in the coverage. It also allows us to force a separate namespace for each +docstring's doctest, which the pytest doctest integration does not allow. + +TODO: Make these tests also run with pytest, but still keeping them out of the +coverage. + +WARNING: This file registers these flags globally in the standard library +doctest module and monkeypatches the DoctestRunner object. It should only be +run as a standalone script. Do not attempt to import it. + +""" + +import numpy + +import sys +import unittest +import glob +import os +from contextlib import contextmanager +from doctest import (DocTestRunner, DocFileSuite, DocTestSuite, + NORMALIZE_WHITESPACE, ELLIPSIS, register_optionflag) +import doctest + +SKIPNP1 = register_optionflag("SKIPNP1") +NP1 = numpy.__version__.startswith('1') +if NP1: + SKIP_THIS_VERSION = SKIPNP1 +else: + SKIP_THIS_VERSION = 0 + +@contextmanager +def patch_doctest(): + """ + Context manager to patch the doctester + + The doctester must be patched + """ + orig_run = DocTestRunner.run + orig_indent = doctest._indent + + def run(self, test, **kwargs): + filtered_examples = [] + + for example in test.examples: + if SKIP_THIS_VERSION not in example.options: + filtered_examples.append(example) + + # Remove ``` + example.want = example.want.replace('```\n', '') + example.exc_msg = example.exc_msg and example.exc_msg.replace('```\n', '') + + test.examples = filtered_examples + return orig_run(self, test, **kwargs) + + # Doctest indents the output, which is annoying for copy-paste, so disable + # it. + def _indent(s, **kwargs): + return s + + try: + DocTestRunner.run = run + doctest._indent = _indent + yield + finally: + DocTestRunner.run = orig_run + doctest._indent = orig_indent + +DOCS = os.path.realpath(os.path.join(__file__, os.path.pardir, os.path.pardir, + os.pardir, 'docs')) +MARKDOWN = glob.glob(os.path.join(DOCS, '**', '*.md'), recursive=True) +RST = glob.glob(os.path.join(DOCS, '**', '*.rst'), recursive=True) +README = os.path.realpath(os.path.join(__file__, os.path.pardir, os.path.pardir, + os.pardir, 'README.md')) +def load_tests(loader, tests, ignore): + for mod in sys.modules: + if mod.startswith('ndindex'): + # globs={} makes the doctests not include module names + tests.addTests(DocTestSuite(sys.modules[mod], globs={}, + optionflags=NORMALIZE_WHITESPACE)) + tests.addTests(DocFileSuite(*MARKDOWN, *RST, README, + optionflags=NORMALIZE_WHITESPACE | ELLIPSIS, + module_relative=False)) + return tests + +def run_doctests(): + numpy.seterr(all='ignore') + with patch_doctest(): + return unittest.main(module='ndindex.tests.doctest', exit=False).result + +if __name__ == '__main__': + # TODO: Allow specifying which doctests to run at the command line + run_doctests() diff --git a/venv/Lib/site-packages/ndindex/tests/helpers.py b/venv/Lib/site-packages/ndindex/tests/helpers.py new file mode 100644 index 0000000..b8be7e4 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/helpers.py @@ -0,0 +1,531 @@ +import sys +from itertools import chain +import warnings +from functools import wraps, partial + +from numpy import ndarray, generic, intp, bool_, asarray, broadcast_shapes +import numpy.testing + +from pytest import fail + +from hypothesis import assume, note +from hypothesis.strategies import (integers, none, one_of, lists, just, + builds, shared, composite, sampled_from, + nothing, tuples as hypothesis_tuples) +from hypothesis.extra.numpy import (arrays, mutually_broadcastable_shapes as + mbs, BroadcastableShapes, valid_tuple_axes) +from hypothesis.errors import UnsatisfiedAssumption + +from ..ndindex import ndindex +from ..shapetools import remove_indices, unremove_indices +from .._crt import prod + +# Hypothesis strategies for generating indices. Note that some of these +# strategies are nominally already defined in hypothesis, but we redefine them +# here because the hypothesis definitions are too restrictive. For example, +# hypothesis's slices strategy does not generate slices with negative indices. +# Similarly, hypothesis.extra.numpy.basic_indices only generates tuples. + +nonnegative_ints = integers(0, 10) +negative_ints = integers(-10, -1) +ints = lambda: one_of(nonnegative_ints, negative_ints) + +def slices(start=one_of(none(), ints()), stop=one_of(none(), ints()), + step=one_of(none(), ints())): + return builds(slice, start, stop, step) + +ellipses = lambda: just(...) +newaxes = lambda: just(None) + +# hypotheses.strategies.tuples only generates tuples of a fixed size +def tuples(elements, *, min_size=0, max_size=None, unique_by=None, unique=False): + return lists(elements, min_size=min_size, max_size=max_size, + unique_by=unique_by, unique=unique).map(tuple) + +MAX_ARRAY_SIZE = 100000 +SHORT_MAX_ARRAY_SIZE = 1000 +shapes = tuples(integers(0, 10)).filter( + # numpy gives errors with empty arrays with large shapes. + # See https://github.com/numpy/numpy/issues/15753 + lambda shape: prod([i for i in shape if i]) < MAX_ARRAY_SIZE) + +_short_shapes = lambda n: tuples(integers(0, 10), min_size=n, max_size=32).filter( + # numpy gives errors with empty arrays with large shapes. + # See https://github.com/numpy/numpy/issues/15753 + lambda shape: prod([i for i in shape if i]) < SHORT_MAX_ARRAY_SIZE) + +# short_shapes should be used in place of shapes in any test function that +# uses ndindices, boolean_arrays, or tuples +short_shapes = shared(_short_shapes(0)) + +_integer_arrays = arrays(intp, short_shapes) +integer_scalars = arrays(intp, ()).map(lambda x: x[()]) +integer_arrays = one_of(integer_scalars, _integer_arrays.flatmap(lambda x: one_of(just(x), just(x.tolist())))) + +# We need to make sure shapes for boolean arrays are generated in a way that +# makes them related to the test array shape. Otherwise, it will be very +# difficult for the boolean array index to match along the test array, which +# means we won't test any behavior other than IndexError. + +@composite +def subsequences(draw, sequence): + seq = draw(sequence) + start = draw(integers(0, max(0, len(seq)-1))) + stop = draw(integers(start, len(seq))) + return seq[start:stop] + +_boolean_arrays = arrays(bool_, one_of(subsequences(short_shapes), short_shapes)) +boolean_scalars = arrays(bool_, ()).map(lambda x: x[()]) +boolean_arrays = one_of(boolean_scalars, _boolean_arrays.flatmap(lambda x: one_of(just(x), just(x.tolist())))) + +def _doesnt_raise(idx): + try: + ndindex(idx) + except (IndexError, ValueError, NotImplementedError): + return False + return True + +Tuples = tuples(one_of(ellipses(), ints(), slices(), newaxes(), + integer_arrays, boolean_arrays)).filter(_doesnt_raise) + +ndindices = one_of( + ints(), + slices(), + ellipses(), + newaxes(), + Tuples, + integer_arrays, + boolean_arrays, +).filter(_doesnt_raise) + +# Note: We could use something like this: + +# mutually_broadcastable_shapes = shared(integers(1, 32).flatmap(lambda i: mbs(num_shapes=i).filter( +# lambda broadcastable_shapes: prod([i for i in broadcastable_shapes.result_shape if i]) < MAX_ARRAY_SIZE))) + +@composite +def _mutually_broadcastable_shapes(draw, *, shapes=short_shapes, min_shapes=0, max_shapes=32, min_side=0): + # mutually_broadcastable_shapes() with the default inputs doesn't generate + # very interesting examples (see + # https://github.com/HypothesisWorks/hypothesis/issues/3170). It's very + # difficult to get it to do so by tweaking the max_* parameters, because + # making them too big leads to generating too large shapes and filtering + # too much. So instead, we trick it into generating more interesting + # examples by telling it to create shapes that broadcast against some base + # shape. + + # Unfortunately, this, along with the filtering below, has a downside that + # it tends to generate a result shape of () more often than you might + # like. But it generates enough "real" interesting shapes that both of + # these workarounds are worth doing (plus I don't know if any other better + # way of handling the situation). + base_shape = draw(shapes) + + input_shapes, result_shape = draw( + mbs( + num_shapes=max_shapes, + base_shape=base_shape, + min_side=min_side, + )) + + # The hypothesis mutually_broadcastable_shapes doesn't allow num_shapes to + # be a strategy. It's tempting to do something like num_shapes = + # draw(integers(min_shapes, max_shapes)), but this shrinks poorly. See + # https://github.com/HypothesisWorks/hypothesis/issues/3151. So instead of + # using a strategy to draw the number of shapes, we just generate max_shapes + # shapes and pick a subset of them. + final_input_shapes = draw(lists(sampled_from(input_shapes), + min_size=min_shapes, max_size=max_shapes)) + + + # Note: result_shape is input_shapes broadcasted with base_shape, but + # base_shape itself is not part of input_shapes. We "really" want our base + # shape to be (). We are only using it here to trick + # mutually_broadcastable_shapes into giving more interesting examples. + final_result_shape = broadcast_shapes(*final_input_shapes) + + # The broadcast compatible shapes can be bigger than the base shape. This + # is already somewhat limited by the mutually_broadcastable_shapes + # defaults, and pretty unlikely, but we filter again here just to be safe. + if not prod([i for i in final_result_shape if i]) < SHORT_MAX_ARRAY_SIZE: # pragma: no cover + note(f"Filtering the shape {result_shape} (too many elements)") + assume(False) + + return BroadcastableShapes(final_input_shapes, final_result_shape) + +mutually_broadcastable_shapes = shared(_mutually_broadcastable_shapes()) + +def _fill_shape(draw, + *, + result_shape, + skip_axes, + skip_axes_values): + max_n = max([i + 1 if i >= 0 else -i for i in skip_axes], default=0) + assume(max_n <= len(skip_axes) + len(result_shape)) + dim = draw(integers(min_value=max_n, max_value=len(skip_axes) + len(result_shape))) + new_shape = ['placeholder']*dim + for i in skip_axes: + assume(new_shape[i] is not None) # skip_axes must be unique + new_shape[i] = None + j = -1 + for i in range(-1, -dim - 1, -1): + if new_shape[i] is None: + new_shape[i] = draw(skip_axes_values) + else: + new_shape[i] = draw(sampled_from([result_shape[j], 1])) + j -= 1 + while new_shape and new_shape[0] == 'placeholder': # pragma: no cover + # Can happen if positive and negative skip_axes refer to the same + # entry + new_shape.pop(0) + + # This will happen if the skip axes are too large + assume('placeholder' not in new_shape) + + if prod([i for i in new_shape if i]) >= SHORT_MAX_ARRAY_SIZE: + note(f"Filtering the shape {new_shape} (too many elements)") + assume(False) + + return tuple(new_shape) + +skip_axes_with_broadcasted_shape_type = shared(sampled_from([int, tuple, list])) + +@composite +def _mbs_and_skip_axes( + draw, + shapes=short_shapes, + min_shapes=0, + max_shapes=32, + skip_axes_type_st=skip_axes_with_broadcasted_shape_type, + skip_axes_values=integers(0, 20), + num_skip_axes=None, +): + """ + mutually_broadcastable_shapes except skip_axes() axes might not be + broadcastable + + The result_shape will be None in the position of skip_axes. + """ + skip_axes_type = draw(skip_axes_type_st) + _result_shape = draw(shapes) + if _result_shape == (): + assume(num_skip_axes is None) + + ndim = len(_result_shape) + num_shapes = draw(integers(min_value=min_shapes, max_value=max_shapes)) + if not num_shapes: + assume(num_skip_axes is None) + num_skip_axes = 0 + if not ndim: + return BroadcastableShapes([()]*num_shapes, ()), () + + if num_skip_axes is not None: + min_skip_axes = max_skip_axes = num_skip_axes + else: + min_skip_axes = 0 + max_skip_axes = None + + # int and single tuple cases must be limited to N to ensure that they are + # correct for all shapes + if skip_axes_type == int: + assume(num_skip_axes in [None, 1]) + skip_axes = draw(valid_tuple_axes(ndim, min_size=1, max_size=1))[0] + _skip_axes = [(skip_axes,)]*num_shapes + elif skip_axes_type == tuple: + skip_axes = draw(tuples(integers(-ndim, ndim-1), min_size=min_skip_axes, + max_size=max_skip_axes, unique=True)) + _skip_axes = [skip_axes]*num_shapes + elif skip_axes_type == list: + skip_axes = [] + for i in range(num_shapes): + skip_axes.append(draw(tuples(integers(-ndim, ndim+1), min_size=min_skip_axes, + max_size=max_skip_axes, unique=True))) + _skip_axes = skip_axes + + shapes = [] + for i in range(num_shapes): + shapes.append(_fill_shape(draw, result_shape=_result_shape, skip_axes=_skip_axes[i], + skip_axes_values=skip_axes_values)) + + non_skip_shapes = [remove_indices(shape, sk) for shape, sk in + zip(shapes, _skip_axes)] + # Broadcasting the result _fill_shape may produce a shape different from + # _result_shape because it might not have filled all dimensions, or it + # might have chosen 1 for a dimension every time. Ideally we would just be + # using shapes from mutually_broadcastable_shapes, but I don't know how to + # reverse inject skip axes into shapes in general (see the comment in + # unremove_indices). So for now, we just use the actual broadcast of the + # non-skip shapes. Note that we use np.broadcast_shapes here instead of + # ndindex.broadcast_shapes because test_broadcast_shapes itself uses this + # strategy. + broadcasted_shape = broadcast_shapes(*non_skip_shapes) + + return BroadcastableShapes(shapes, broadcasted_shape), skip_axes + +mbs_and_skip_axes = shared(_mbs_and_skip_axes()) + +mutually_broadcastable_shapes_with_skipped_axes = mbs_and_skip_axes.map( + lambda i: i[0]) +skip_axes_st = mbs_and_skip_axes.map(lambda i: i[1]) + +@composite +def _cross_shapes_and_skip_axes(draw): + (shapes, _broadcasted_shape), skip_axes = draw(_mbs_and_skip_axes( + shapes=_short_shapes(2), + min_shapes=2, + max_shapes=2, + num_skip_axes=1, + # TODO: Test other skip axes types + skip_axes_type_st=just(list), + skip_axes_values=just(3), + )) + + broadcasted_skip_axis = draw(integers(-len(_broadcasted_shape)-1, len(_broadcasted_shape))) + broadcasted_shape = unremove_indices(_broadcasted_shape, + [broadcasted_skip_axis], val=3) + skip_axes.append((broadcasted_skip_axis,)) + + return BroadcastableShapes(shapes, broadcasted_shape), skip_axes + +cross_shapes_and_skip_axes = shared(_cross_shapes_and_skip_axes()) +cross_shapes = cross_shapes_and_skip_axes.map(lambda i: i[0]) +cross_skip_axes = cross_shapes_and_skip_axes.map(lambda i: i[1]) + +@composite +def cross_arrays_st(draw): + broadcastable_shapes = draw(cross_shapes) + shapes, broadcasted_shape = broadcastable_shapes + + # Sanity check + assert len(shapes) == 2 + # We need to generate fairly random arrays. Otherwise, if they are too + # similar to each other, like two arange arrays would be, the cross + # product will be 0. We also disable the fill feature in arrays() for the + # same reason, as it would otherwise generate too many vectors that are + # colinear. + a = draw(arrays(dtype=int, shape=shapes[0], elements=integers(-100, 100), fill=nothing())) + b = draw(arrays(dtype=int, shape=shapes[1], elements=integers(-100, 100), fill=nothing())) + + return a, b + +@composite +def _matmul_shapes_and_skip_axes(draw): + (shapes, _broadcasted_shape), skip_axes = draw(_mbs_and_skip_axes( + shapes=_short_shapes(2), + min_shapes=2, + max_shapes=2, + num_skip_axes=2, + # TODO: Test other skip axes types + skip_axes_type_st=just(list), + skip_axes_values=just(None), + )) + + broadcasted_skip_axes = draw(hypothesis_tuples(*[ + integers(-len(_broadcasted_shape)-1, len(_broadcasted_shape)) + ]*2)) + + try: + broadcasted_shape = unremove_indices(_broadcasted_shape, + broadcasted_skip_axes) + except NotImplementedError: + # TODO: unremove_indices only works with both positive or both negative + assume(False) + # Make sure the indices are unique + assume(len(set(broadcasted_skip_axes)) == len(broadcasted_skip_axes)) + + skip_axes.append(broadcasted_skip_axes) + + # (n, m) @ (m, k) -> (n, k) + n, m, k = draw(hypothesis_tuples(integers(0, 10), integers(0, 10), + integers(0, 10))) + shape1, shape2 = map(list, shapes) + ax1, ax2 = skip_axes[0] + shape1[ax1] = n + shape1[ax2] = m + ax1, ax2 = skip_axes[1] + shape2[ax1] = m + shape2[ax2] = k + broadcasted_shape = list(broadcasted_shape) + ax1, ax2 = skip_axes[2] + broadcasted_shape[ax1] = n + broadcasted_shape[ax2] = k + + shapes = (tuple(shape1), tuple(shape2)) + broadcasted_shape = tuple(broadcasted_shape) + + return BroadcastableShapes(shapes, broadcasted_shape), skip_axes + +matmul_shapes_and_skip_axes = shared(_matmul_shapes_and_skip_axes()) +matmul_shapes = matmul_shapes_and_skip_axes.map(lambda i: i[0]) +matmul_skip_axes = matmul_shapes_and_skip_axes.map(lambda i: i[1]) + +@composite +def matmul_arrays_st(draw): + broadcastable_shapes = draw(matmul_shapes) + shapes, broadcasted_shape = broadcastable_shapes + + # Sanity check + assert len(shapes) == 2 + a = draw(arrays(dtype=int, shape=shapes[0], elements=integers(-100, 100))) + b = draw(arrays(dtype=int, shape=shapes[1], elements=integers(-100, 100))) + + return a, b + +reduce_kwargs = sampled_from([{}, {'negative_int': False}, {'negative_int': True}]) + +def assert_equal(actual, desired, allow_scalar_0d=False, err_msg='', verbose=True): + """ + Assert that two objects are equal. + + - If the objects are ndarrays, this is the same as + numpy.testing.assert_equal except it also requires the shapes and dtypes + to be equal + + - If the objects are tuples, recursively call assert_equal to support + tuples of arrays. + + - If allow_scalar_0d=True, scalars will be considered equal to equivalent + 0-D arrays. + + - Require the types of actual and desired to be exactly the same + (excepting for scalars when allow_scalar_0d=True). + + """ + if not (allow_scalar_0d and (isinstance(actual, generic) + or isinstance(desired, generic))): + assert type(actual) is type(desired), err_msg or f"{type(actual)} != {type(desired)}" + + if isinstance(actual, (ndarray, generic)): + numpy.testing.assert_equal(actual, desired, err_msg=err_msg, + verbose=verbose) + assert actual.shape == desired.shape, err_msg or f"{actual.shape} != {desired.shape}" + assert actual.dtype == desired.dtype, err_msg or f"{actual.dtype} != {desired.dtype}" + elif isinstance(actual, tuple): + assert len(actual) == len(desired), err_msg + for i, j in zip(actual, desired): + assert_equal(i, j, err_msg=err_msg, verbose=verbose) + else: + assert actual == desired, err_msg + +assert_equal_allow_scalar_0d = partial(assert_equal, allow_scalar_0d=True) + +def warnings_are_errors(f): + @wraps(f) + def inner(*args, **kwargs): + with warnings.catch_warnings(): + warnings.simplefilter("error") + return f(*args, **kwargs) + return inner + +@warnings_are_errors +def check_same(a, idx, *, raw_func=lambda a, idx: a[idx], + ndindex_func=lambda a, index: a[index.raw], + conversion_func=ndindex, + same_exception=True, assert_equal=assert_equal): + """ + Check that a raw index idx produces the same result on an array a before + and after being transformed by ndindex. + + Tests that raw_func(a, idx) == ndindex_func(a, ndindex(idx)) or that they + raise the same exception. If same_exception=False, it will still check + that they both raise an exception, but will not require the exception type + and message to be the same. + + By default, raw_func(a, idx) is a[idx] and ndindex_func(a, index) is + a[index.raw]. + + The assert_equal argument changes the function used to test equality. By + default it is the custom assert_equal() function in this file that extends + numpy.testing.assert_equal. If the func functions return something other + than arrays, assert_equal should be set to something else, like + + def assert_equal(x, y): + assert x == y + + """ + exception = None + try: + # Handle list indices that NumPy treats as tuple indices with a + # deprecation warning. We want to test against the post-deprecation + # behavior. + e_inner = None + try: + try: + a_raw = raw_func(a, idx) + except IndexError as e: + # It's not straightforward to avoid indexing too many + # dimensions in the strategy generation, because the total + # number of dimensions in the result array is not a trivial + # thing. Furthermore, some versions of NumPy limit this to 32 + # and some limit it to 64. + if "number of dimensions must be within" in str(e): # pragma: no cover + raise UnsatisfiedAssumption + raise + except Warning as w: + # In NumPy < 1.23, this is a FutureWarning. In 1.23 the + # deprecation was removed and lists are always interpreted as + # array indices. + if ("Using a non-tuple sequence for multidimensional indexing is deprecated" in w.args[0]): # pragma: no cover + idx = asarray(idx, dtype=intp) + a_raw = raw_func(a, idx) + elif "Out of bound index found. This was previously ignored when the indexing result contained no elements. In the future the index error will be raised. This error occurs either due to an empty slice, or if an array has zero elements even before indexing." in w.args[0]: # pragma: no cover + same_exception = False + raise IndexError + else: # pragma: no cover + fail(f"Unexpected warning raised: {w}") + except Exception: + _, e_inner, _ = sys.exc_info() + if e_inner: + raise e_inner + except UnsatisfiedAssumption: # pragma: no cover + raise + except Exception as e: + exception = e + + index = '' + try: + index = conversion_func(idx) + a_ndindex = ndindex_func(a, index) + except Exception as e: + if not exception: + fail(f"Raw form does not raise but ndindex form does ({e!r}): {index})") # pragma: no cover + if same_exception: + assert type(e) == type(exception), (e, exception) + assert e.args == exception.args, (e.args, exception.args) + else: + if exception: + fail(f"ndindex form did not raise but raw form does ({exception!r}): {index})") # pragma: no cover + + if not exception: + assert_equal(a_raw, a_ndindex) + + +def iterslice(start_range=(-10, 10), + stop_range=(-10, 10), + step_range=(-10, 10), + one_two_args=True +): + # one_two_args is unnecessary if the args are being passed to slice(), + # since slice() already canonicalizes missing arguments to None. We do it + # for Slice to test that behavior. + if one_two_args: + for start in chain(range(*start_range), [None]): + yield (start,) + + for start in chain(range(*start_range), [None]): + for stop in chain(range(*stop_range), [None]): + yield (start, stop) + + for start in chain(range(*start_range), [None]): + for stop in chain(range(*stop_range), [None]): + for step in chain(range(*step_range), [None]): + yield (start, stop, step) + + +chunk_shapes = short_shapes + +@composite +def chunk_sizes(draw, shapes=chunk_shapes): + shape = draw(shapes) + return draw(tuples(integers(1, 10), min_size=len(shape), + max_size=len(shape)).filter(lambda shape: prod(shape) < MAX_ARRAY_SIZE)) diff --git a/venv/Lib/site-packages/ndindex/tests/test_array.py b/venv/Lib/site-packages/ndindex/tests/test_array.py new file mode 100644 index 0000000..2f750e2 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_array.py @@ -0,0 +1,38 @@ +from numpy import intp, array, int16 + +from pytest import raises + +from ..array import ArrayIndex +from ..integerarray import IntegerArray + +from .helpers import assert_equal + +# Everything else is tested in the subclasses + +def test_ArrayIndex(): + raises(TypeError, lambda: ArrayIndex([])) + +def test_attributes(): + a = array([[0, 1], [1, 0]]) + + idx = IntegerArray(a) + assert_equal(idx.array, array(a, dtype=intp)) + assert idx.dtype == intp + assert idx.ndim == a.ndim == 2 + assert idx.shape == a.shape == (2, 2) + assert idx.size == a.size == 4 + +def test_cast_raises(): + with raises(TypeError): + a = array([[0, 1], [1, 0]]) + idx = IntegerArray(a) + assert array(idx) == idx + +def test_copy(): + idx = IntegerArray([1, 2]) + idx2 = IntegerArray(idx.raw) + assert idx.raw is not idx2.raw + idx3 = IntegerArray(idx.raw, _copy=False) + assert idx.raw is idx3.raw + raises(ValueError, lambda: IntegerArray([], _copy=False)) + raises(ValueError, lambda: IntegerArray(array([1], dtype=int16), _copy=False)) diff --git a/venv/Lib/site-packages/ndindex/tests/test_as_subindex.py b/venv/Lib/site-packages/ndindex/tests/test_as_subindex.py new file mode 100644 index 0000000..c600c65 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_as_subindex.py @@ -0,0 +1,111 @@ +from pytest import raises + +from numpy import array, arange, isin, unique, intp + +from hypothesis import given, assume, example +from hypothesis.strategies import integers, one_of + +from ..ndindex import ndindex +from ..integerarray import IntegerArray +from ..tuple import Tuple +from .helpers import ndindices, prod, short_shapes, assert_equal, warnings_are_errors + +@example((slice(0, 8), slice(0, 9), slice(0, 10)), + ([2, 5, 6, 7], slice(1, 9, 1), slice(5, 10, 1)), + (20, 20, 20)) +@example((), (None, array([], dtype=intp)), 0) +@example((), array([], dtype=bool), 0) +@example((), IntegerArray(0), 1) +@example((), (None, True), 0) +@example(..., False, 0) +@example(..., (False,), 0) +@example((IntegerArray(2),), (slice(0, 1),), 3) +@example(([0],), (slice(1, 2),), 3) +@example(array([True]), slice(0, 1), 2) +@example((slice(0, 2), slice(0, 2)), (array([0, 2]), array([0, 2])), 3) +@example(IntegerArray([0, 1]), (slice(0, 1),), 2) +@example(array([1]), slice(0, 1), 1) +@example(IntegerArray(0), slice(0, 1), 3) +@example(IntegerArray(0), slice(1, 2), 3) +@example(IntegerArray(2), slice(0, 1), 3) +@example(..., (-1, array([0])), (1, 1)) +@example(..., (-1, array([0, 0])), (1, 1)) +@example((slice(None, 1, None), slice(None, 1, None)), + (array(0), array([0, 0])), + (1, 1)) +@example([[0, 11], [0, 0]], slice(0, 10), 20) +@example(slice(0, 0), 9007199254741193, 1) +@example((0,), (slice(1, 2),), 3) +@example(slice(0, 10), slice(5, 15), 20) +@example((), (slice(None, None, -1),), (2,)) +@example((), (..., slice(None, None, -1),), (2,)) +@example((slice(0, 1),), (2,), (3,)) +@example((slice(0, 5), slice(0, 5)), (slice(3, 10), slice(3, 10)), (20, 20)) +@example((slice(0, 5), slice(0, 5)), (1, 1), (10, 10)) +@example(0, slice(0, 0), 1) +@example([0], slice(0, 0), 1) +@example(0, slice(0, 1), 1) +@example([0], slice(0, 1), 1) +@example(slice(0, 5), 2, 10) +@example(0, (slice(None, 0, None), Ellipsis), 1) +@example(0, (slice(1, 2),), 1) +@given(ndindices, ndindices, one_of(integers(0, 100), short_shapes)) +@warnings_are_errors +def test_as_subindex_hypothesis(idx1, idx2, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + try: + index1 = ndindex(idx1) + index2 = ndindex(idx2) + except ValueError: # pragma: no cover + assume(False) + + empty = False + try: + Subindex = index1.as_subindex(index2) + except NotImplementedError: + return + except ValueError as e: + assert "do not intersect" in e.args[0] + empty = True + + try: + a1 = a[index1.raw] + a2 = a[index2.raw] + except IndexError: # pragma: no cover + assume(False) + except DeprecationWarning as w: # pragma: no cover + if "Out of bound index found. This was previously ignored when the indexing result contained no elements. In the future the index error will be raised. This error occurs either due to an empty slice, or if an array has zero elements even before indexing." in w.args[0]: + assume(False) + else: # pragma: no cover + raise + + if empty: + assert not isin(a1, a2).any() + assert not isin(a2, a1).any() + with raises(ValueError, match="do not intersect"): + try: + index2.as_subindex(index1) + except NotImplementedError: + raise ValueError('do not intersect') + else: + asubindex = a2[Subindex.raw] + + if (isinstance(index2, IntegerArray) + or (isinstance(index2, Tuple) + and any(isinstance(i, IntegerArray) for i in index2.args))): + # isin(x, y) has the same shape as x. If idx2 has an integer array + # it may index the same element more than once, but idx1 will not. + assert_equal(unique(asubindex.flatten()), unique(a1[isin(a1, a2)])) + else: + assert_equal(asubindex.flatten(), a1[isin(a1, a2)]) + + try: + subindex2 = index2.as_subindex(index1) + except NotImplementedError: + return + asubindex2 = a1[subindex2.raw] + assert_equal(asubindex2, asubindex) diff --git a/venv/Lib/site-packages/ndindex/tests/test_booleanarray.py b/venv/Lib/site-packages/ndindex/tests/test_booleanarray.py new file mode 100644 index 0000000..75a1b8e --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_booleanarray.py @@ -0,0 +1,119 @@ +from numpy import prod, arange, array, bool_, empty, full, __version__ as np_version + +NP1 = np_version.startswith('1') + +from hypothesis import given, example +from hypothesis.strategies import one_of, integers + +from pytest import raises + +from .helpers import boolean_arrays, short_shapes, check_same, assert_equal, reduce_kwargs + +from ..booleanarray import BooleanArray + +def test_booleanarray_constructor(): + raises(ValueError, lambda: BooleanArray([False], shape=(1,))) + raises(ValueError, lambda: BooleanArray([], shape=(1,))) + raises(TypeError, lambda: BooleanArray([0])) + raises(TypeError, lambda: BooleanArray(array(0.0))) + raises(TypeError, lambda: BooleanArray((True,))) + idx = BooleanArray(array([True], dtype=bool_)) + assert_equal(idx.array, array([True], dtype=bool_)) + + idx = BooleanArray([], shape=(0, 1)) + assert_equal(idx.array, empty((0, 1), dtype=bool_)) + + # Make sure the underlying array is immutable + idx = BooleanArray([True]) + with raises(ValueError): + idx.array[0] = False + assert_equal(idx.array, array([True], dtype=bool_)) + + # Make sure the underlying array is copied + a = array([True, False]) + idx = BooleanArray(a) + a[0] = False + assert idx == BooleanArray([True, False]) + +@given(boolean_arrays, short_shapes) +def test_booleanarray_hypothesis(idx, shape): + a = arange(prod(shape)).reshape(shape) + check_same(a, idx) + +@given(boolean_arrays, one_of(short_shapes, integers(0, 10)), reduce_kwargs) +def test_booleanarray_reduce_no_shape_hypothesis(idx, shape, kwargs): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = BooleanArray(idx) + + check_same(a, index.raw, ndindex_func=lambda a, x: a[x.reduce(**kwargs).raw]) + +@example(full((1, 9), True), 3, {}) +@example(full((1, 9), True), (3, 3), {}) +@example(full((1, 9), False), (3, 3), {}) +@given(boolean_arrays, one_of(short_shapes, integers(0, 10)), reduce_kwargs) +def test_booleanarray_reduce_hypothesis(idx, shape, kwargs): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = BooleanArray(idx) + + same_exception = not NP1 + check_same(a, index.raw, ndindex_func=lambda a, x: a[x.reduce(shape, **kwargs).raw], + same_exception=same_exception) + + try: + reduced = index.reduce(shape, **kwargs) + except IndexError: + pass + else: + # At present, reduce() always returns the same index if it doesn't + # give an IndexError + assert reduced == index + + # Idempotency + assert reduced.reduce(**kwargs) == reduced + assert reduced.reduce(shape, **kwargs) == reduced + +@given(boolean_arrays, one_of(short_shapes, integers(0, 10))) +def test_booleanarray_isempty_hypothesis(idx, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = BooleanArray(idx) + + def raw_func(a, idx): + return a[idx].size == 0 + + def ndindex_func(a, index): + return index.isempty(), index.isempty(shape) + + def assert_equal(raw_empty, ndindex_empty): + isempty, isempty_shape = ndindex_empty + + # If isempty is True then a[t] should be empty + if isempty: + assert raw_empty, (index, shape) + # We cannot test the converse with hypothesis. isempty may be False + # but a[idx] could still be empty for this specific a (e.g., if a is + # already itself empty). + + # If isempty is true with no shape it should be true for a specific + # shape. The converse is not true because the indexed array could be + # empty. + if isempty: + assert isempty_shape, (index, shape) + + # isempty() should always give the correct result for a specific + # array after reduction + assert isempty_shape == raw_empty, (index, shape) + + check_same(a, idx, raw_func=raw_func, ndindex_func=ndindex_func, + assert_equal=assert_equal, same_exception=False) diff --git a/venv/Lib/site-packages/ndindex/tests/test_broadcast_arrays.py b/venv/Lib/site-packages/ndindex/tests/test_broadcast_arrays.py new file mode 100644 index 0000000..31ee264 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_broadcast_arrays.py @@ -0,0 +1,80 @@ +from numpy import arange, prod, array + +from hypothesis import given, example +from hypothesis.strategies import integers, one_of + +from ..ndindex import ndindex +from ..array import ArrayIndex +from ..booleanarray import BooleanArray +from ..integerarray import IntegerArray +from ..integer import Integer +from ..tuple import Tuple +from .helpers import ndindices, check_same, short_shapes, warnings_are_errors + +@example((..., False, False), 1) +@example((True, False), 1) +@example((True, True), 1) +@example(array([False]), 0) +@example((slice(None, -1, None), array([[1]])), (1, 1)) +@example((1, False), 0) +@example(True, 1) +@example(False, 1) +@example([[True, False], [False, False]], (2, 2, 3)) +@given(ndindices, one_of(short_shapes, integers(0, 10))) +@warnings_are_errors +def test_broadcast_arrays_hypothesis(idx, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = ndindex(idx) + + # It is possible for the original index to give an IndexError, but for + # broadcast_arrays() may return an index that doesn't (but the other way + # around should not happen). + check = True + try: + a[index.raw] + except IndexError as e: + if "boolean index did not match indexed array" in e.args[0]: + check = False + if index.isempty() and "out of bounds" in e.args[0]: + # Integers are bounds checked even when the resulting index + # broadcasts to empty (but not so for IntegerArray). + check = False + except DeprecationWarning as w: # pragma: no cover + if "Out of bound index found. This was previously ignored when the indexing result contained no elements. In the future the index error will be raised. This error occurs either due to an empty slice, or if an array has zero elements even before indexing." in w.args[0]: + pass + else: # pragma: no cover + raise + if check: + check_same(a, index.raw, ndindex_func=lambda a, x: + a[x.broadcast_arrays().raw], same_exception=False) + + broadcasted = index.broadcast_arrays() + + if isinstance(index, (Tuple, BooleanArray)): + assert isinstance(broadcasted, Tuple) + else: + assert broadcasted == index + + if isinstance(broadcasted, Tuple): + arrays = [arg for arg in broadcasted.args if isinstance(arg, + ArrayIndex) + and arg not in [True, False]] + if arrays: + assert len(set([i.shape for i in arrays])) == 1 + assert not any(isinstance(i, Integer) for i in broadcasted.args) + assert all(isinstance(i, IntegerArray) or i in [True, False] for + i in arrays) + + assert broadcasted.args.count(True) <= 1 + assert broadcasted.args.count(False) <= 1 + assert not (True in broadcasted.args and False in broadcasted.args) + if True in broadcasted.args or False in broadcasted.args: + assert index in [True, False] or True in index.args or False in index.args + if index in [True, False]: + assert broadcasted == Tuple(index) + elif isinstance(index, BooleanArray): + assert all(isinstance(i, IntegerArray) for i in broadcasted.args) diff --git a/venv/Lib/site-packages/ndindex/tests/test_chunking.py b/venv/Lib/site-packages/ndindex/tests/test_chunking.py new file mode 100644 index 0000000..ac71861 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_chunking.py @@ -0,0 +1,266 @@ +from itertools import zip_longest, tee + +from numpy import all as np_all, arange, isin, sort, concatenate + +from hypothesis import given, assume, example +from hypothesis.strategies import one_of + +from pytest import raises + +from ..chunking import ChunkSize +from ..slice import Slice +from ..tuple import Tuple +from ..ndindex import ndindex + +from .helpers import assert_equal, chunk_sizes, chunk_shapes, prod, ints, slices, ndindices + +def test_ChunkSize_constructor(): + raises(TypeError, lambda: ChunkSize(Tuple(1, 2, 3))) + raises(TypeError, lambda: ChunkSize(1, 2, 3)) + raises(TypeError, lambda: ChunkSize(1)) + raises(ValueError, lambda: ChunkSize((-1, 2, 3))) + raises(ValueError, lambda: ChunkSize((0, 2, 3))) + raises(NotImplementedError, lambda: ChunkSize((None, 2, 3))) + +@given(chunk_sizes()) +def test_ChunkSize_eq(chunk_size_tuple): + chunk_size = ChunkSize(chunk_size_tuple) + new = type(chunk_size)(*chunk_size.args) + + assert chunk_size == chunk_size_tuple + assert chunk_size_tuple == chunk_size + assert new == chunk_size + assert chunk_size == new + assert new == chunk_size_tuple + assert chunk_size_tuple == new + + assert hash(new) == hash(chunk_size) + assert not (chunk_size == 'a') + assert not ('a' == chunk_size) + assert (chunk_size != 'a') + assert ('a' != chunk_size) + + h = hash(chunk_size_tuple) + assert hash(chunk_size) == h + +@given(chunk_sizes(), one_of(ints(), slices())) +def test_ChunkSize_args(chunk_size_tuple, idx): + chunk_size = ChunkSize(chunk_size_tuple) + assert chunk_size.args == (chunk_size_tuple,) + + try: + ndindex(idx) + except ValueError: # pragma: no cover + # Filter out invalid slices (TODO: do this in the strategy) + assume(False) + + # Should index the same way + # TODO: Refactor check_same() so we can use that + try: + chunk_size_idx = chunk_size[idx] + except IndexError: + try: + tuple_idx = chunk_size_tuple[idx] + except IndexError: + pass + else: + raise AssertionError("ChunkSize raised but tuple did not") + else: + tuple_idx = chunk_size_tuple[idx] + assert chunk_size_idx == tuple_idx + +@given(chunk_sizes()) +def test_ChunkSize_tuple(chunk_size_tuple): + # Test that ChunkSize behaves like a tuple + chunk_size = ChunkSize(chunk_size_tuple) + assert tuple(chunk_size) == chunk_size_tuple + +def test_indices_error(): + raises(ValueError, lambda: next(ChunkSize((1, 2)).indices((1, 2, 3)))) + +@given(chunk_sizes(), chunk_shapes) +def test_num_chunks(chunk_size, shape): + chunk_size = ChunkSize(chunk_size) + assert chunk_size.num_chunks(shape) == len(list(chunk_size.indices(shape))) + +@given(chunk_sizes(), chunk_shapes) +def test_indices(chunk_size, shape): + chunk_size = ChunkSize(chunk_size) + indices = chunk_size.indices(shape) + size = prod(shape) + a = arange(size).reshape(shape) + + subarrays = [] + for idx in indices: + # The indices should be fully expanded + assert idx.expand(shape) == idx + # Except for indices at the edges, they should index a full chunk + if not any(s.stop == i for s, i in zip(idx.args, shape)): + assert idx.newshape(shape) == chunk_size + # Make sure they can be indexed + subarrays.append(a[idx.raw]) + # Check that indices together index every element of the array exactly + # once. + elements = [i for x in subarrays for i in x.flatten()] + assert sorted(elements) == list(range(size)) + +@example(chunk_size=(1, 1), idx=[[False, True], [True, True]], + shape=(2, 2)) +@example(chunk_size=(1,), idx=slice(None, None, -1), shape=(2,)) +@example((1,), True, (1,)) +@example(chunk_size=(1, 1), idx=slice(1, None, 2), shape=(4, 1)) +@example((1,), ..., (0,)) +@example((2, 2), (0, 3), (5, 5)) +@example((2, 2), (slice(0, 5, 2), slice(0, 5, 3)), (5, 5)) +@example((2, 2), ([0, 0],), (5, 5)) +@given(chunk_sizes(), ndindices, chunk_shapes) +def test_as_subchunks(chunk_size, idx, shape): + chunk_size = ChunkSize(chunk_size) + size = prod(shape) + a = arange(size).reshape(shape) + idx = ndindex(idx) + + try: + idx.reduce(shape) + except IndexError: + assume(False) + + full_idx = a[idx.raw] + + subarrays = [] + fast = chunk_size.as_subchunks(idx, shape) + slow = chunk_size.as_subchunks(idx, shape, _force_slow=True) + slow2 = chunk_size.as_subchunks(idx, shape, _force_slow=True) + no_fallback = chunk_size.as_subchunks(idx, shape, _force_slow=False) + slow_raised_notimplementederror = False + try: + next(slow2) + except StopIteration: + pass + except NotImplementedError: + # The fallback isn't implemented, but the fast case may still be. + slow, fast = tee(fast, 2) + slow_raised_notimplementederror = True + if not slow_raised_notimplementederror: + # If it works (no NotImplementedError), it shouldn't use the fallback. + try: + next(no_fallback) + except StopIteration: + pass + try: + for c, cslow in zip_longest(fast, slow): + assert c == cslow + index = idx.expand(shape).as_subindex(c) + chunk = a[c.raw] + subchunk = chunk[index.raw] + # Not empty + assert subchunk.size > 0 + # Indexes the right elements (c.f. test_as_subindex) + assert_equal(subchunk.flatten(), full_idx[isin(full_idx, chunk)]) + subarrays.append(subchunk) + except NotImplementedError: + # NotImplementedError should only be allowed from the fallback algorithm + if not slow_raised_notimplementederror: + raise + return + + # Picks all elements + if subarrays: + elements = concatenate([x.flatten() for x in subarrays]) + else: + elements = arange(0) + assert_equal(sort(elements), sort(full_idx.flatten())) + +def test_as_subchunks_error(): + raises(ValueError, lambda: next(ChunkSize((1, 2)).as_subchunks(..., (1, 2, 3)))) + +@example(chunk_size=(1,), idx=(slice(None), slice(None)), shape=(0,)) +@example(chunk_size=(), idx=(), shape=()) +@example(chunk_size=(1, 1), idx=[[False, True], [True, True]], + shape=(2, 2)) +@example(chunk_size=(1,), idx=None, shape=(1,)) +@example((1,), True, (1,)) +@example(chunk_size=(1, 1), idx=slice(1, None, 2), shape=(4, 1)) +@example((1,), ..., (0,)) +@example((2, 2), (0, 3), (5, 5)) +@example((2, 2), (slice(0, 5, 2), slice(0, 5, 3)), (5, 5)) +@example((2, 2), ([0, 0],), (5, 5)) +@given(chunk_sizes(), ndindices, chunk_shapes) +def test_num_subchunks(chunk_size, idx, shape): + chunk_size = ChunkSize(chunk_size) + idx = ndindex(idx) + + try: + idx.reduce(shape) + except IndexError: + assume(False) + + subchunks = chunk_size.as_subchunks(idx, shape) + try: + actual_num_subchunks = len(list(subchunks)) + computed_num_subchunks = chunk_size.num_subchunks(idx, shape) + except NotImplementedError: + return + assert computed_num_subchunks == actual_num_subchunks + +def test_num_subchunks_error(): + raises(ValueError, lambda: next(ChunkSize((1, 2)).num_subchunks(..., (1, 2, 3)))) + + +@example((2, 2), (0, False), (5, 5)) +@example((5,), [0, 7], (15,)) +@example((5,), [], (15,)) +@given(chunk_sizes(), ndindices, chunk_shapes) +def test_containing_block(chunk_size, idx, shape): + chunk_size = ChunkSize(chunk_size) + idx = ndindex(idx) + + size = prod(shape) + a = arange(size).reshape(shape) + + try: + idx.reduce(shape) + except IndexError: + assume(False) + + try: + block = chunk_size.containing_block(idx, shape) + except NotImplementedError: + return + + assert isinstance(block, Tuple), block + assert len(block.args) == len(chunk_size) + assert all(isinstance(s, Slice) for s in block.args) + assert all(s.start >= 0 and s.start % n == 0 for s, n in zip(block.args, chunk_size)), block + assert all(s.stop >= 0 and (s.stop == i or s.stop % n == 0) and s.stop <= + i for s, i, n in zip(block.args, shape, chunk_size)), block + assert all(s.step == 1 for s in block.args), block + + a_idx = a[idx.raw] + a_block = a[block.raw] + + assert np_all(isin(a_idx, a_block)) + + # Verify that the block is indeed the smallest possible by shrinking it + # and making sure that misses some of the index. This check doesn't work + # for empty indices, so those are handled separately. Also shape == () + # cannot give an empty tuple index because we want only tuples of slices.. + if (0 in shape or idx.reduce(shape).isempty()) and shape != (): + assert block.isempty() + else: + for i in range(len(block.args)): + s = block.args[i] + new_s1 = Slice(s.start + chunk_size[i], s.stop) + if s.stop == shape[i] and shape[i] % chunk_size[i]: + new_s2 = Slice(s.start, s.stop - s.stop % chunk_size[i]) + else: + new_s2 = Slice(s.start, s.stop - chunk_size[i]) + new_block1 = Tuple(*(block.args[:i] + (new_s1,) + block.args[i+1:])) + new_block2 = Tuple(*(block.args[:i] + (new_s2,) + block.args[i+1:])) + a_block1 = a[new_block1.raw] + a_block2 = a[new_block2.raw] + assert not np_all(isin(a_idx, a_block1)) + assert not np_all(isin(a_idx, a_block2)) + +def test_containing_block_error(): + raises(ValueError, lambda: ChunkSize((1, 2)).containing_block(..., (1, 2, 3))) diff --git a/venv/Lib/site-packages/ndindex/tests/test_crt.py b/venv/Lib/site-packages/ndindex/tests/test_crt.py new file mode 100644 index 0000000..e37dd84 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_crt.py @@ -0,0 +1,54 @@ +from math import gcd + +from .._crt import crt, ilcm, gcdex + +from hypothesis import given, example +from hypothesis.strategies import integers, lists, shared + +from sympy.ntheory.modular import crt as crt_sympy +size = shared(integers(min_value=1, max_value=10)) +@given( + size.flatmap(lambda s: lists(integers(min_value=1), min_size=s, max_size=s)), + size.flatmap(lambda s: lists(integers(), min_size=s, max_size=s)), +) +def test_crt(m, v): + res = crt(m, v) + + if res is not None: + for m_i, v_i in zip(m, v): + assert v_i % m_i == res % m_i + + assert res == crt_sympy(m, v)[0] + else: + assert crt_sympy(m, v) is None + +@example(1, 2) +@given(integers(min_value=0), integers(min_value=0)) +def test_ilcm(x, y): + L = ilcm(x, y) + + if 0 in [x, y]: + assert L == 0 + return + + assert L >= x + assert L >= y + + # L is a common multiple + assert L % x == 0 + assert L % y == 0 + + if L - min(x, y) <= 1000: + # L is the least common multiple + for i in range(min(x, y), L): + assert i % x != 0 or i % y != 0 + +@example(0, 3) +@example(3, 0) +@example(0, 0) +@given(integers(), integers()) +def test_gcdex(a, b): + x, y, g = gcdex(a, b) + + assert g == gcd(a, b) + assert x*a + y*b == g diff --git a/venv/Lib/site-packages/ndindex/tests/test_ellipsis.py b/venv/Lib/site-packages/ndindex/tests/test_ellipsis.py new file mode 100644 index 0000000..335ef24 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_ellipsis.py @@ -0,0 +1,72 @@ +from numpy import arange + +from hypothesis import given +from hypothesis.strategies import one_of, integers + +from ..ndindex import ndindex +from .helpers import (check_same, prod, shapes, ellipses, reduce_kwargs, + assert_equal_allow_scalar_0d) + +def test_ellipsis_exhaustive(): + for n in range(10): + a = arange(n) + check_same(a, ...) + +@given(ellipses(), shapes) +def test_ellipsis_hypothesis(idx, shape): + a = arange(prod(shape)).reshape(shape) + check_same(a, idx) + +def test_ellipsis_reduce_exhaustive(): + for n in range(10): + a = arange(n) + check_same(a, ..., ndindex_func=lambda a, x: a[x.reduce((n,)).raw]) + +@given(ellipses(), shapes, reduce_kwargs) +def test_ellipsis_reduce_hypothesis(idx, shape, kwargs): + a = arange(prod(shape)).reshape(shape) + check_same(a, idx, + ndindex_func=lambda a, x: a[x.reduce(shape, **kwargs).raw], + assert_equal=assert_equal_allow_scalar_0d) + +def test_ellipsis_reduce_no_shape_exhaustive(): + for n in range(10): + a = arange(n) + check_same(a, ..., ndindex_func=lambda a, x: a[x.reduce().raw]) + +@given(ellipses(), shapes, reduce_kwargs) +def test_ellipsis_reduce_no_shape_hypothesis(idx, shape, kwargs): + a = arange(prod(shape)).reshape(shape) + check_same(a, idx, ndindex_func=lambda a, x: a[x.reduce(**kwargs).raw], + assert_equal=assert_equal_allow_scalar_0d) + +@given(ellipses(), one_of(shapes, integers(0, 10))) +def test_ellipsis_isempty_hypothesis(idx, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = ndindex(idx) + + def raw_func(a, idx): + return a[idx].size == 0 + + def ndindex_func(a, index): + return index.isempty(), index.isempty(shape) + + def assert_equal(raw_empty, ndindex_empty): + isempty, isempty_shape = ndindex_empty + + # Since idx is an ellipsis, it should never be unconditionally empty + assert not isempty + # We cannot test the converse with hypothesis. isempty may be False + # but a[idx] could still be empty for this specific a (e.g., if a is + # already itself empty). + + # isempty() should always give the correct result for a specific + # array after reduction + assert isempty_shape == raw_empty, (index, shape) + + check_same(a, idx, raw_func=raw_func, ndindex_func=ndindex_func, + assert_equal=assert_equal, same_exception=False) diff --git a/venv/Lib/site-packages/ndindex/tests/test_expand.py b/venv/Lib/site-packages/ndindex/tests/test_expand.py new file mode 100644 index 0000000..80aefdb --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_expand.py @@ -0,0 +1,81 @@ +from numpy import arange, array, intp, empty + +from hypothesis import given, example +from hypothesis.strategies import integers, one_of + +from ..ndindex import ndindex +from ..array import ArrayIndex +from ..booleanarray import BooleanArray +from ..integerarray import IntegerArray +from ..integer import Integer +from ..tuple import Tuple +from .helpers import (ndindices, check_same, short_shapes, prod, + assert_equal_allow_scalar_0d) + +@example(True, (1,)) +@example((Ellipsis, array([[ True, True]])), (1, 2)) +@example((..., 0, 0, False), 1) +@example((empty((0, 0), dtype=bool)), 0) +@example((0, empty((0, 0), dtype=bool)), 0) +@example((..., empty((0, 0), dtype=bool)), 0) +@example((..., 0, empty((0, 0), dtype=bool)), 0) +@example((array([], dtype=intp), 0), (0, 0)) +@example((array([], dtype=intp), [0]), (0, 0)) +@example((..., 0, array([], dtype=intp)), (0, 0)) +@example((..., array(0), array([], dtype=intp)), (0, 0)) +@example((False, False), ()) +@example((-1, False), 1) +@example((..., False), ()) +@example((array([0]),), ()) +@example(([0, 1], 0), (2, 2)) +@example((..., [0, 1], 0), (2, 2)) +@example((..., None, 0), 1) +@example((0, 1, ..., 2, 3), (2, 3, 4, 5, 6, 7)) +@example(None, 2) +@given(ndindices, one_of(short_shapes, integers(0, 10))) +def test_expand_hypothesis(idx, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = ndindex(idx) + + check_same(a, index.raw, ndindex_func=lambda a, x: a[x.expand(shape).raw], + same_exception=False, assert_equal=assert_equal_allow_scalar_0d) + + try: + expanded = index.expand(shape) + except IndexError: + pass + else: + assert isinstance(expanded, Tuple) + assert ... not in expanded.args + n_newaxis = 0 + boolean_scalars = 0 + if isinstance(idx, tuple): + n_newaxis = index.args.count(None) + if True in index.args or False in index.args: + boolean_scalars = 1 + elif index == None: + n_newaxis = 1 + elif index in [True, False]: + boolean_scalars = 1 + if isinstance(shape, int): + assert len(expanded.args) == 1 + n_newaxis + boolean_scalars + else: + assert len(expanded.args) == len(shape) + n_newaxis + boolean_scalars + + # Make sure arrays are broadcasted + if any(isinstance(i, ArrayIndex) and i not in [True, False] for i in expanded.args): + assert not any(isinstance(i, Integer) for i in expanded.args) + assert not any(isinstance(i, BooleanArray) and i not in [True, False] for i in expanded.args) + assert len({i.shape for i in expanded.args if isinstance(i, + IntegerArray)}) in [0, 1] + + assert expanded.args.count(True) <= 1 + assert expanded.args.count(False) <= 1 + assert not (True in expanded.args and False in expanded.args) + + # Idempotency + assert expanded.expand(shape) == expanded diff --git a/venv/Lib/site-packages/ndindex/tests/test_integer.py b/venv/Lib/site-packages/ndindex/tests/test_integer.py new file mode 100644 index 0000000..3802c48 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_integer.py @@ -0,0 +1,253 @@ +from numpy import arange, int64, isin, bool_ + +from pytest import raises + +from hypothesis import given, example +from hypothesis.strategies import integers, one_of + +from ..integer import Integer +from ..slice import Slice +from .helpers import check_same, ints, prod, shapes, iterslice, assert_equal, reduce_kwargs + +def test_integer_args(): + zero = Integer(0) + assert zero.raw == 0 + idx = Integer(int64(0)) + assert idx == zero + assert idx.raw == 0 + assert isinstance(idx.raw, int) + assert Integer(zero) == zero + + raises(TypeError, lambda: Integer(1.0)) + # See the docstring of operator_index() + raises(TypeError, lambda: Integer(True)) + raises(TypeError, lambda: Integer(bool_(True))) + + + class HasIndex: + def __init__(self, x): + self.x = x + + def __index__(self): + return self.x + + idx = Integer(HasIndex(0)) + assert idx.args == (0,) + assert idx.raw == 0 + assert type(idx.args[0]) is int + assert type(idx.raw) is int + + class HasInt: + def __init__(self, x): + self.x = x + + def __int__(self): + return self.x # pragma: no cover + + raises(TypeError, lambda: Integer(HasInt(0))) + +def test_integer_exhaustive(): + a = arange(10) + for i in range(-12, 12): + check_same(a, i) + + +@given(ints(), integers(5, 100)) +def test_integer_hypothesis(i, size): + a = arange(size) + check_same(a, i) + + +def test_integer_len_exhaustive(): + for i in range(-12, 12): + idx = Integer(i) + assert len(idx) == 1 + + +@given(ints()) +def test_integer_len_hypothesis(i): + idx = Integer(i) + assert len(idx) == 1 + +def test_integer_reduce_exhaustive(): + a = arange(10) + for i in range(-12, 12): + for kwargs in [{'negative_int': False}, {'negative_int': True}, {}]: + check_same(a, i, ndindex_func=lambda a, x: a[x.reduce((10,), **kwargs).raw]) + + negative_int = kwargs.get('negative_int', False) + + try: + reduced = Integer(i).reduce(10, **kwargs) + except IndexError: + pass + else: + if negative_int: + assert reduced.raw < 0 + else: + assert reduced.raw >= 0 + + # Idempotency + assert reduced.reduce(**kwargs) == reduced + assert reduced.reduce(10, **kwargs) == reduced + +@example(0, (1,), {'negative_int': True}) +@given(ints(), shapes, reduce_kwargs) +def test_integer_reduce_hypothesis(i, shape, kwargs): + a = arange(prod(shape)).reshape(shape) + # The axis argument is tested implicitly in the Tuple.reduce test. It is + # difficult to test here because we would have to pass in a Tuple to + # check_same. + check_same(a, i, ndindex_func=lambda a, x: a[x.reduce(shape, **kwargs).raw]) + + negative_int = kwargs.get('negative_int', False) + + try: + reduced = Integer(i).reduce(shape, **kwargs) + except IndexError: + pass + else: + if negative_int: + assert reduced.raw < 0 + else: + assert reduced.raw >= 0 + + # Idempotency + assert reduced.reduce(**kwargs) == reduced + assert reduced.reduce(shape, **kwargs) == reduced + +def test_integer_reduce_no_shape_exhaustive(): + a = arange(10) + for i in range(-12, 12): + check_same(a, i, ndindex_func=lambda a, x: a[x.reduce().raw]) + +@given(ints(), shapes, reduce_kwargs) +def test_integer_reduce_no_shape_hypothesis(i, shape, kwargs): + a = arange(prod(shape)).reshape(shape) + check_same(a, i, ndindex_func=lambda a, x: a[x.reduce(**kwargs).raw]) + +@given(ints()) +def test_integer_reduce_no_shape_unchanged(i): + idx = Integer(i) + assert idx.reduce() == idx.reduce(negative_int=False) == idx.reduce(negative_int=True) == i + +def test_integer_newshape_exhaustive(): + shape = 5 + a = arange(shape) + + def raw_func(a, idx): + return a[idx].shape + + def ndindex_func(a, index): + return index.newshape(shape) + + def assert_equal(raw_shape, newshape): + assert raw_shape == newshape + + for i in range(-10, 10): + check_same(a, i, raw_func=raw_func, ndindex_func=ndindex_func, + assert_equal=assert_equal) + +def test_integer_as_subindex_slice_exhaustive(): + for n in range(10): + a = arange(n) + for i in range(-10, 10): + try: + a[i] + except IndexError: + continue + + for indexargs in iterslice(): + idx = Integer(i) + + try: + Index = Slice(*indexargs) + except ValueError: + continue + + empty = False + try: + Subindex = idx.as_subindex(Index) + except NotImplementedError: + continue + except ValueError as e: + assert "do not intersect" in e.args[0] + empty = True + + aidx = a[idx.raw] + aindex = a[Index.raw] + if empty: + assert not isin(aidx, aindex).any() + assert not isin(aindex, aidx).any() + with raises(ValueError, match="do not intersect"): + Index.as_subindex(idx) + else: + asubindex = aindex[Subindex.raw] + + assert_equal(asubindex.flatten(), aidx[isin(aidx, aindex)]) + + subindex2 = Index.as_subindex(idx) + asubindex2 = aidx[subindex2.raw] + assert_equal(asubindex2, asubindex) + +def test_integer_isempty_exhaustive(): + for i in range(-10, 10): + idx = Integer(i) + + isempty = idx.isempty() + + for n in range(30): + a = arange(n) + + exception = False + try: + aidx = a[idx.raw] + except IndexError: + exception = True + else: + if aidx.size != 0: + # If a[i] doesn't give an index error, it should always be nonempty + assert not isempty + # isempty() should always give the correct result for a specific + # array shape + try: + isemptyn = idx.isempty(n) + except IndexError: + if not exception: + raise AssertionError(f"idx.isempty(n) raised but a[idx] did not (idx = {idx}, n = {n}).") + else: + if exception: + raise AssertionError(f"a[idx] raised but idx.isempty(n) did not (idx = {idx}, n = {n}).") + assert isemptyn == (aidx.size == 0) + +@example(1, (2, 0)) +@given(ints(), one_of(shapes, integers(0, 10))) +def test_integer_isempty_hypothesis(i, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = Integer(i) + + def raw_func(a, idx): + return a[idx].size == 0 + + def ndindex_func(a, index): + return index.isempty(), index.isempty(shape) + + def assert_equal(raw_empty, ndindex_empty): + isempty, isempty_shape = ndindex_empty + + # Since i is an integer, it should never be unconditionally empty + assert not isempty + # We cannot test the converse with hypothesis. isempty may be False + # but a[i] could still be empty for this specific a (e.g., if a is + # already itself empty). + + # isempty() should always give the correct result for a specific + # array after reduction + assert isempty_shape == raw_empty, (index, shape) + + check_same(a, i, raw_func=raw_func, ndindex_func=ndindex_func, + assert_equal=assert_equal, same_exception=False) diff --git a/venv/Lib/site-packages/ndindex/tests/test_integerarray.py b/venv/Lib/site-packages/ndindex/tests/test_integerarray.py new file mode 100644 index 0000000..87446c0 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_integerarray.py @@ -0,0 +1,142 @@ +from numpy import arange, array, int8, intp, empty + +from hypothesis import given, example +from hypothesis.strategies import one_of, integers + +from pytest import raises + +from .helpers import (integer_arrays, short_shapes, check_same, assert_equal, + reduce_kwargs, prod) + +from ..integer import Integer +from ..integerarray import IntegerArray + +def test_integerarray_constructor(): + raises(ValueError, lambda: IntegerArray([0], shape=(1,))) + raises(ValueError, lambda: IntegerArray([], shape=(1,))) + raises(TypeError, lambda: IntegerArray([False])) + raises(TypeError, lambda: IntegerArray(array(0.0))) + raises(TypeError, lambda: IntegerArray((1,))) + idx = IntegerArray(array([0], dtype=int8)) + assert_equal(idx.array, array([0], dtype=intp)) + + idx = IntegerArray([], shape=(0, 1)) + assert_equal(idx.array, empty((0, 1), dtype=intp)) + + # Make sure the underlying array is immutable + idx = IntegerArray([1, 2]) + with raises(ValueError): + idx.array[0] = 0 + assert_equal(idx.array, array([1, 2], dtype=intp)) + + # Make sure the underlying array is copied + a = array([1, 2]) + idx = IntegerArray(a) + a[0] = 0 + assert idx == IntegerArray([1, 2]) + +@given(integer_arrays, short_shapes) +def test_integerarray_hypothesis(idx, shape): + a = arange(prod(shape)).reshape(shape) + check_same(a, idx) + +@given(integer_arrays, one_of(short_shapes, integers(0, 10)), reduce_kwargs) +def test_integerarray_reduce_no_shape_hypothesis(idx, shape, kwargs): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = IntegerArray(idx) + + check_same(a, index.raw, ndindex_func=lambda a, x: a[x.reduce(**kwargs).raw]) + +@given(integer_arrays) +def test_integerarray_reduce_no_shape_unchanged(idx): + index = IntegerArray(idx) + assert index.reduce() == index.reduce(negative_int=False) == index.reduce(negative_int=True) + if index.ndim != 0: + assert index.reduce() == index + + +@example(array([2, -2]), (4,), {'negative_int': True}) +@example(array(2), (4,), {'negative_int': True}) +@example(array([2, 0]), (1, 0), {}) +@example(array(0), 1, {}) +@example(array([], dtype=intp), 0, {}) +@given(integer_arrays, one_of(short_shapes, integers(0, 10)), reduce_kwargs) +def test_integerarray_reduce_hypothesis(idx, shape, kwargs): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = IntegerArray(idx) + + check_same(a, index.raw, ndindex_func=lambda a, x: a[x.reduce(shape, **kwargs).raw]) + + negative_int = kwargs.get('negative_int', False) + + try: + reduced = index.reduce(shape, **kwargs) + except IndexError: + pass + else: + if isinstance(reduced, Integer): + if negative_int: + assert reduced.raw < 0 + else: + assert reduced.raw >= 0 + else: + assert isinstance(reduced, IntegerArray) + if negative_int: + assert (reduced.raw < 0).all() + else: + assert (reduced.raw >= 0).all() + + # Idempotency + assert reduced.reduce(**kwargs) == reduced + assert reduced.reduce(shape, **kwargs) == reduced + +@example([], (1,)) +@example([0], (1, 0)) +@example(idx=empty((0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), dtype=intp), + shape=(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)) +@given(integer_arrays, one_of(short_shapes, integers(0, 10))) +def test_integerarray_isempty_hypothesis(idx, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = IntegerArray(idx) + + + def raw_func(a, idx): + return a[idx].size == 0 + + def ndindex_func(a, index): + return index.isempty(), index.isempty(shape) + + def assert_equal(raw_empty, ndindex_empty): + isempty, isempty_shape = ndindex_empty + + # If isempty is True then a[t] should be empty + if isempty: + assert raw_empty, (index, shape) + # We cannot test the converse with hypothesis. isempty may be False + # but a[idx] could still be empty for this specific a (e.g., if a is + # already itself empty). + + # If isempty is true with no shape it should be true for a specific + # shape. The converse is not true because the indexed array could be + # empty. + if isempty: + assert isempty_shape, (index, shape) + + # isempty() should always give the correct result for a specific + # array after reduction + assert isempty_shape == raw_empty, (index, shape) + + check_same(a, idx, raw_func=raw_func, ndindex_func=ndindex_func, + assert_equal=assert_equal, same_exception=False) diff --git a/venv/Lib/site-packages/ndindex/tests/test_isvalid.py b/venv/Lib/site-packages/ndindex/tests/test_isvalid.py new file mode 100644 index 0000000..4c6d0a7 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_isvalid.py @@ -0,0 +1,43 @@ +from hypothesis import given, example +from hypothesis.strategies import one_of, integers + +from numpy import arange + +from .helpers import ndindices, shapes, MAX_ARRAY_SIZE, check_same, prod + +@example([0], (1,)) +@example(..., (1, 2, 3)) +@example(slice(0, 1), ()) +@example(slice(0, 1), (1,)) +@example((0, 1), (2, 2)) +@example((0,), ()) +@example([[1]], (0, 0, 1)) +@example(None, ()) +@given(ndindices, one_of(shapes, integers(0, MAX_ARRAY_SIZE))) +def test_isvalid_hypothesis(idx, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + def raw_func(a, idx): + try: + a[idx] + return True + except Warning as w: + # check_same unconditionally turns this warning into raise + # IndexError, so we have to handle it separately here. + if "Out of bound index found. This was previously ignored when the indexing result contained no elements. In the future the index error will be raised. This error occurs either due to an empty slice, or if an array has zero elements even before indexing." in w.args[0]: # pragma: no cover + return False + raise # pragma: no cover + except IndexError: + return False + + def ndindex_func(a, index): + return index.isvalid(a.shape) + + def assert_equal(x, y): + assert x == y + + check_same(a, idx, raw_func=raw_func, ndindex_func=ndindex_func, + assert_equal=assert_equal) diff --git a/venv/Lib/site-packages/ndindex/tests/test_ndindex.py b/venv/Lib/site-packages/ndindex/tests/test_ndindex.py new file mode 100644 index 0000000..f02c724 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_ndindex.py @@ -0,0 +1,188 @@ +import inspect +import warnings +import copy +import pickle + +import numpy as np + +from hypothesis import given, example, settings +from hypothesis.strategies import sampled_from + +from pytest import raises + +from ..array import ArrayIndex +from ..ndindex import ndindex +from ..booleanarray import BooleanArray +from ..integer import Integer +from ..ellipsis import ellipsis +from ..integerarray import IntegerArray +from ..slice import Slice +from ..tuple import Tuple +from .helpers import ndindices, check_same, assert_equal + +@example(None) +@example([1, 2]) +@given(ndindices) +def test_eq(idx): + index = ndindex(idx) + new = type(index)(*index.args) + + assert_equal(new.raw, index.raw) + + def check_eq(a, b): + assert a == b + assert b == a + assert not (a != b) + assert not (b != a) + + def check_neq(a, b): + assert a != b + assert b != a + assert not (a == b) + assert not (b == a) + + check_eq(new, index) + check_eq(new.raw, index) + check_eq(new, index.raw) + + check_eq(index.raw, index) + assert hash(new) == hash(index) + check_neq(index, 'a') + + try: + h = hash(idx) + except TypeError: + pass + else: + assert hash(index) == h + + try: + h = hash(index.raw) + except TypeError: + pass + else: + assert hash(index) == h + +def test_eq_array_raises(): + index = ndindex([1, 2, 3]) + with raises(TypeError): + np.equal(index.raw, index) + with raises(TypeError): + np.array_equal(index.raw, index) + +def test_eq_explicit(): + assert Integer(0) != False + assert Integer(1) != True + assert Integer(0) != IntegerArray(0) + assert IntegerArray([0, 1]) != [False, True] + assert IntegerArray([0, 1]) == [0, 1] + assert BooleanArray([False, True]) != [0, 1] + assert BooleanArray([False, True]) == [False, True] + +@example((np.array([1, 2]), 0)) +@example([1, 2, 3]) +@given(ndindices) +def test_ndindex(idx): + index = ndindex(idx) + assert index == idx + assert ndindex[idx] == index + + def assert_raw_eq(idx, index): + if isinstance(idx, (list, bool, np.bool_)): + assert isinstance(index, ArrayIndex) + assert index.dtype in [np.intp, np.bool_] + assert_equal(index.raw, np.asarray(idx, dtype=index.dtype)) + elif isinstance(idx, np.integer): + assert type(index) is Integer + assert_equal(index.raw, int(idx)) + elif isinstance(idx, tuple): + assert type(index.raw) is tuple + assert len(idx) == len(index.raw) + assert index.args == index.raw + for i, j in zip(index.args, idx): + assert_raw_eq(j, i) + else: + assert_equal(index.raw, idx) + + assert_raw_eq(idx, index) + assert ndindex(index.raw) == index + +def test_ndindex_invalid(): + a = np.arange(10) + for idx in [1.0, [1.0], np.array([1.0]), np.array([1], dtype=object), + np.array([])]: + check_same(a, idx) + + # Older versions of NumPy gives a deprecation warning for this index. We + # are not going to allow indices that give deprecation warnings in + # ndindex. + with warnings.catch_warnings(record=True) as r: + # Make sure no warnings are emitted from ndindex() + warnings.simplefilter("error") + # Newer numpy versions raise ValueError with this index (although + # perhaps they shouldn't) + raises((IndexError, ValueError), lambda: ndindex([1, []])) + assert not r + +def test_ndindex_ellipsis(): + raises(IndexError, lambda: ndindex(ellipsis)) + +def test_signature(): + sig = inspect.signature(Integer) + assert sig.parameters.keys() == {'idx'} + + +@example(([0, 1],)) +@example((IntegerArray([], (0, 1)),)) +@example(IntegerArray([], (0, 1))) +@example((1, ..., slice(1, 2))) +# eval can sometimes be slower than the default deadline of 200ms for large +# array indices +@settings(deadline=None) +@given(ndindices) +def test_repr_str(idx): + # The repr form should be re-creatable + index = ndindex(idx) + d = {} + exec("from ndindex import *", d) + assert eval(repr(index), d) == idx + + # Str may not be re-creatable. Just test that it doesn't give an exception. + str(index) + +# _Tuple does not serialize properly with protocols 0 and 1. Support could +# probably be added if this is necessary. +LOWEST_SUPPORTED_PROTOCOL = 2 +protocols = ["copy", "deepcopy"] + list(range(LOWEST_SUPPORTED_PROTOCOL, pickle.HIGHEST_PROTOCOL + 1)) + +@given(ndindices, sampled_from(protocols)) +def test_serialization(idx, protocol): + index = ndindex(idx) + + def serialize(index): + if protocol == "copy": + return copy.copy(index) + elif protocol == "deepcopy": + return copy.deepcopy(index) + else: + return pickle.loads(pickle.dumps(index, protocol=protocol)) + + roundtripped = serialize(index) + assert type(roundtripped) is type(index) + assert roundtripped == index + assert_equal(roundtripped.raw, index.raw) + assert_equal(roundtripped.args, index.args) + + if isinstance(index, Slice): + assert index._reduced == roundtripped._reduced == False + s = index.reduce() + assert s._reduced == True + roundtripped_s = serialize(s) + assert roundtripped_s._reduced == True + + if isinstance(index, Tuple): + assert all([i._reduced == False for i in index.args if isinstance(i, Slice)]) + t = index.reduce() + assert all([i._reduced == True for i in t.args if isinstance(i, Slice)]) + roundtripped_t = serialize(t) + assert all([i._reduced == True for i in roundtripped_t.args if isinstance(i, Slice)]) diff --git a/venv/Lib/site-packages/ndindex/tests/test_newaxis.py b/venv/Lib/site-packages/ndindex/tests/test_newaxis.py new file mode 100644 index 0000000..abe4450 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_newaxis.py @@ -0,0 +1,73 @@ +from numpy import arange, newaxis + +from hypothesis import given +from hypothesis.strategies import one_of, integers + +from ..ndindex import ndindex +from .helpers import check_same, prod, shapes, newaxes, reduce_kwargs + +def test_newaxis_exhaustive(): + for n in range(10): + a = arange(n) + check_same(a, newaxis) + + +@given(newaxes(), shapes) +def test_newaxis_hypothesis(idx, shape): + a = arange(prod(shape)).reshape(shape) + check_same(a, idx) + + +def test_newaxis_reduce_exhaustive(): + for n in range(10): + a = arange(n) + check_same(a, newaxis, ndindex_func=lambda a, x: a[x.reduce((n,)).raw]) + + +@given(newaxes(), shapes, reduce_kwargs) +def test_newaxis_reduce_hypothesis(idx, shape, kwargs): + a = arange(prod(shape)).reshape(shape) + check_same(a, idx, ndindex_func=lambda a, x: a[x.reduce(shape, **kwargs).raw]) + + +def test_newaxis_reduce_no_shape_exhaustive(): + for n in range(10): + a = arange(n) + check_same(a, newaxis, ndindex_func=lambda a, x: a[x.reduce().raw]) + +@given(newaxes(), shapes, reduce_kwargs) +def test_newaxis_reduce_no_shape_hypothesis(idx, shape, kwargs): + a = arange(prod(shape)).reshape(shape) + check_same(a, idx, ndindex_func=lambda a, x: a[x.reduce(**kwargs).raw]) + +@given(newaxes(), one_of(shapes, integers(0, 10))) +def test_newaxis_isempty_hypothesis(idx, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = ndindex(idx) + + + def raw_func(a, idx): + return a[idx].size == 0 + + def ndindex_func(a, index): + return index.isempty(), index.isempty(shape) + + def assert_equal(raw_empty, ndindex_empty): + isempty, isempty_shape = ndindex_empty + + # Since idx is a newaxis, it should never be unconditionally empty + assert not isempty + # We cannot test the converse with hypothesis. isempty may be False + # but a[idx] could still be empty for this specific a (e.g., if a is + # already itself empty). + + # isempty() should always give the correct result for a specific + # array after reduction + assert isempty_shape == raw_empty, (index, shape) + + check_same(a, idx, raw_func=raw_func, ndindex_func=ndindex_func, + assert_equal=assert_equal, same_exception=False) diff --git a/venv/Lib/site-packages/ndindex/tests/test_newshape.py b/venv/Lib/site-packages/ndindex/tests/test_newshape.py new file mode 100644 index 0000000..eacc01e --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_newshape.py @@ -0,0 +1,52 @@ +from pytest import raises + +from numpy import arange, array, full + +from hypothesis import given, example +from hypothesis.strategies import integers, one_of + +from ..ndindex import ndindex +from ..tuple import Tuple +from ..integer import Integer +from .helpers import ndindices, check_same, short_shapes, prod + +@example(..., 0) +@example((True,), ()) +@example(([[True, False], [True, False]], [True, True], slice(0, 2)), ((2, 2, 2, 3, 3))) +@example((array([], dtype=bool),), (0, 0)) +@example((False, False), ()) +@example(array([], dtype=bool), 0) +@example((array([], dtype=bool),), 0) +@example(array([[[True], [False]]]), (1, 1, 2)) +@example(full((1, 9), False), (3, 3)) +@example(([0, 1], 0), (2, 2)) +@example(([0, 0, 0], [0, 0]), (2, 2)) +@example((0, None, 0, ..., 0, None, 0), (2, 2, 2, 2, 2, 2, 2)) +@example((0, slice(None), ..., slice(None), 3), (2, 3, 4, 5, 6, 7)) +@given(ndindices, one_of(short_shapes, integers(0, 10))) +def test_newshape_hypothesis(idx, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + try: + index = ndindex(idx) + except IndexError: + pass + else: + # Make sure ndindex input gives an error + raises(TypeError, lambda: index.newshape(Tuple(2, 1))) + raises(TypeError, lambda: index.newshape(Integer(2))) + + def raw_func(a, idx): + return a[idx].shape + + def ndindex_func(a, index): + return index.newshape(shape) + + def assert_equal(raw_shape, newshape): + assert raw_shape == newshape + + check_same(a, idx, raw_func=raw_func, ndindex_func=ndindex_func, + assert_equal=assert_equal, same_exception=False) diff --git a/venv/Lib/site-packages/ndindex/tests/test_no_dependencies.py b/venv/Lib/site-packages/ndindex/tests/test_no_dependencies.py new file mode 100644 index 0000000..f43e595 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_no_dependencies.py @@ -0,0 +1,231 @@ +""" +Test that ndindex does not depend on NumPy, as long as no functionality +involving array indices is used. + +Also test that it does not depend on SymPy (which used to be a hard +dependency, see ndindex/_crt.py). + +Because the tests themselves require NumPy, we must test this in a subprocess +where we know that NumPy is not yet imported. + +""" + +import sys +import subprocess + +import pytest + +from ndindex import (ndindex, iter_indices, Integer, Slice, Tuple, ellipsis, + Newaxis, ChunkSize, BroadcastError, AxisError) + +def _test_dependency_ndindex(mod): + assert mod not in sys.modules + + # Run various functions that shouldn't depend on NumPy and check that they + # don't import it. + + ndindex(1) + assert mod not in sys.modules + + ndindex[0:1] + assert mod not in sys.modules + + ndindex(ndindex(1)) + assert mod not in sys.modules + + ndindex(None) + assert mod not in sys.modules + + ndindex[1, 0:1] + assert mod not in sys.modules + + ndindex(...) + assert mod not in sys.modules + + try: + ndindex('') + except IndexError: + pass + assert mod not in sys.modules + +def _test_dependency_iter_indices(mod): + assert mod not in sys.modules + + list(iter_indices((1, 2), (2, 1))) + assert mod not in sys.modules + + list(iter_indices((1, 2), (2, 1), skip_axes=(1,))) + assert mod not in sys.modules + + try: + list(iter_indices((2, 3), (3, 2))) + except BroadcastError: + pass + else: + assert False + assert mod not in sys.modules + + try: + list(iter_indices((2, 3), skip_axes=(2,))) + except AxisError: + pass + else: + assert False + assert mod not in sys.modules + +def _test_dependency_indices(mod): + for index_type, args in [ + (Integer, (1,)), + (Slice, (2, 10, 3)), + (ellipsis, ()), + (Newaxis, ()), + (Tuple, (slice(0, 3), 1)), + ]: + assert mod not in sys.modules, index_type + + idx = index_type(*args) + assert mod not in sys.modules, index_type + + idx.raw + assert mod not in sys.modules, index_type + + if hasattr(idx, '__len__'): + len(idx) + assert mod not in sys.modules, index_type + + idx.reduce() + assert mod not in sys.modules, index_type + + idx.reduce((2, 3)) + assert mod not in sys.modules, index_type + + idx.newshape((2, 3)) + assert mod not in sys.modules, index_type + + try: + idx.as_subindex(slice(1, 20, 5)) + except NotImplementedError: + pass + assert mod not in sys.modules, index_type + + idx.isempty((2, 3)) + assert mod not in sys.modules, index_type + + idx == idx + assert mod not in sys.modules, index_type + + idx == 1 + assert mod not in sys.modules, index_type + + hash(idx) + assert mod not in sys.modules, index_type + + idx.broadcast_arrays() + assert mod not in sys.modules, index_type + + idx.expand((2, 3)) + assert mod not in sys.modules, index_type + + str(idx) + assert mod not in sys.modules, index_type + + repr(idx) + assert mod not in sys.modules, index_type + +def _test_dependency_ChunkSize(mod): + assert mod not in sys.modules + + c = ChunkSize((10, 10)) + assert mod not in sys.modules + + hash(c) + assert mod not in sys.modules + + c[0] + assert mod not in sys.modules + + len(c) + assert mod not in sys.modules + + c.num_chunks((100, 100)) + assert mod not in sys.modules + + list(c.indices((100, 100))) + assert mod not in sys.modules + + list(c.as_subchunks(slice(0, 50), (100, 100))) + assert mod not in sys.modules + + c.num_subchunks(slice(0, 50), (100, 100)) + assert mod not in sys.modules + + c.containing_block(slice(0, 50), (100, 100)) + assert mod not in sys.modules + + list(c.as_subchunks(None, (100, 100))) + assert mod not in sys.modules + + c.num_subchunks(None, (100, 100)) + assert mod not in sys.modules + + c.containing_block(None, (100, 100)) + assert mod not in sys.modules + + list(c.as_subchunks(1, (100, 100))) + assert mod not in sys.modules + + c.num_subchunks(1, (100, 100)) + assert mod not in sys.modules + + c.containing_block(1, (100, 100)) + assert mod not in sys.modules + + list(c.as_subchunks(..., (100, 100))) + assert mod not in sys.modules + + c.num_subchunks(..., (100, 100)) + assert mod not in sys.modules + + c.containing_block(..., (100, 100)) + assert mod not in sys.modules + + list(c.as_subchunks((0, slice(0, 50)), (100, 100))) + assert mod not in sys.modules + + c.num_subchunks((0, slice(0, 50)), (100, 100)) + assert mod not in sys.modules + + c.containing_block((0, slice(0, 50)), (100, 100)) + assert mod not in sys.modules + +@pytest.mark.no_cover +@pytest.mark.parametrize('func', [func for func in globals() if func.startswith('_test')]) +def test_numpy_dependency(func): + # This unfortunately won't go through any of the pytest machinery. We + # reraise the exception as an AssertionError so that pytest will show it + # in a semi-reasonable way + + try: + subprocess.run([sys.executable, '-c', f'''\ +from ndindex.tests.test_no_dependencies import {func} + +{func}("numpy")'''], check=True, capture_output=True, encoding='utf-8') + except subprocess.CalledProcessError as e: + print(e.stdout, end='') + raise AssertionError(e.stderr) + +@pytest.mark.no_cover +@pytest.mark.parametrize('func', [func for func in globals() if func.startswith('_test')]) +def test_sympy_dependency(func): + # This unfortunately won't go through any of the pytest machinery. We + # reraise the exception as an AssertionError so that pytest will show it + # in a semi-reasonable way + + try: + subprocess.run([sys.executable, '-c', f'''\ +from ndindex.tests.test_no_dependencies import {func} + +{func}("sympy")'''], check=True, capture_output=True, encoding='utf-8') + except subprocess.CalledProcessError as e: + print(e.stdout, end='') + raise AssertionError(e.stderr) diff --git a/venv/Lib/site-packages/ndindex/tests/test_selected_indices.py b/venv/Lib/site-packages/ndindex/tests/test_selected_indices.py new file mode 100644 index 0000000..7ebd2ca --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_selected_indices.py @@ -0,0 +1,45 @@ +from numpy import arange + +from hypothesis import given, example +from hypothesis.strategies import integers, one_of + +from ..ndindex import ndindex +from ..tuple import Tuple +from ..integer import Integer +from .helpers import ndindices, check_same, short_shapes, prod + +@example(([False], None), (1,)) +@example((False, slice(0, 10)), (5, 2)) +@example((None, True, 0), (5, 2)) +@example((slice(0, 10), [0, -1]), (5, 2)) +@example(slice(0, 10), 5) +@example([0, 1, 2], 3) +@example(([0, 1, 2],), 3) +@given(ndindices, one_of(short_shapes, integers(0, 10))) +def test_selected_indices_hypothesis(idx, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + ndindex(idx) + + def raw_func(a, idx): + return list(a[idx].flat) + + def ndindex_func(a, index): + indices = list(index.selected_indices(shape)) + for i in indices: + if len(a.shape) == 1: + assert isinstance(i, Integer) + else: + assert isinstance(i, Tuple) + assert all(isinstance(j, Integer) for j in i.args) + + return [a[i.raw] for i in indices] + + def assert_equal(a, b): + assert a == b + + check_same(a, idx, raw_func=raw_func, ndindex_func=ndindex_func, + assert_equal=assert_equal, same_exception=False) diff --git a/venv/Lib/site-packages/ndindex/tests/test_shapetools.py b/venv/Lib/site-packages/ndindex/tests/test_shapetools.py new file mode 100644 index 0000000..ec7db05 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_shapetools.py @@ -0,0 +1,612 @@ +import numpy as np +try: + from numpy import AxisError as np_AxisError +except ImportError: # pragma: no cover + from numpy.exceptions import AxisError as np_AxisError + +from hypothesis import assume, given, example +from hypothesis.strategies import (one_of, integers, tuples as + hypothesis_tuples, just, lists, shared, + ) + +from pytest import raises + +from ..ndindex import ndindex +from ..shapetools import (asshape, iter_indices, ncycles, BroadcastError, + AxisError, broadcast_shapes, remove_indices, + unremove_indices, associated_axis, + normalize_skip_axes) +from ..integer import Integer +from ..tuple import Tuple +from .helpers import (prod, mutually_broadcastable_shapes_with_skipped_axes, + skip_axes_st, mutually_broadcastable_shapes, tuples, + shapes, assert_equal, cross_shapes, cross_skip_axes, + cross_arrays_st, matmul_shapes, matmul_skip_axes, + matmul_arrays_st) + +@example([[(1, 1), (1, 1)], (1,)], (0,)) +@example([[(0,), (0,)], ()], (0,)) +@example([[(1, 2), (2, 1)], (2,)], 1) +@given(mutually_broadcastable_shapes_with_skipped_axes, skip_axes_st) +def test_iter_indices(broadcastable_shapes, skip_axes): + # broadcasted_shape will contain None on the skip_axes, as those axes + # might not be broadcast compatible + shapes, broadcasted_shape = broadcastable_shapes + # We need no more than 31 dimensions so that the np.stack call below + # doesn't fail. + assume(len(broadcasted_shape) < 32) + + # 1. Normalize inputs + _skip_axes = normalize_skip_axes(shapes, skip_axes) + _skip_axes_kwarg_default = [()]*len(shapes) + + # Skipped axes may not be broadcast compatible. Since the index for a + # skipped axis should always be a slice(None), the result should be the + # same if the skipped axes are all moved to the end of the shape. + canonical_shapes = [] + for s, sk in zip(shapes, _skip_axes): + c = remove_indices(s, sk) + canonical_shapes.append(c) + + non_skip_shapes = [remove_indices(shape, sk) for shape, sk in zip(shapes, _skip_axes)] + assert np.broadcast_shapes(*non_skip_shapes) == broadcasted_shape + + nitems = prod(broadcasted_shape) + + if skip_axes == (): # kwarg default + res = iter_indices(*shapes) + else: + res = iter_indices(*shapes, skip_axes=skip_axes) + broadcasted_res = iter_indices(broadcasted_shape) + + sizes = [prod(shape) for shape in shapes] + arrays = [np.arange(size).reshape(shape) for size, shape in zip(sizes, shapes)] + canonical_sizes = [prod(shape) for shape in canonical_shapes] + canonical_arrays = [np.arange(size).reshape(shape) for size, shape in zip(canonical_sizes, canonical_shapes)] + canonical_broadcasted_array = np.arange(nitems).reshape(broadcasted_shape) + + # 2. Check that iter_indices is the same whether or not the shapes are + # broadcasted together first. Also check that every iterated index is the + # expected type and there are as many as expected. + vals = [] + bvals = [] + n = -1 + + def _remove_slices(idx): + assert isinstance(idx, Tuple) + idx2 = [i for i in idx.args if i != slice(None)] + return Tuple(*idx2) + + for n, (idxes, bidxes) in enumerate(zip(res, broadcasted_res)): + assert len(idxes) == len(shapes) + assert len(bidxes) == 1 + for idx, shape, sk in zip(idxes, shapes, _skip_axes): + assert isinstance(idx, Tuple) + assert len(idx.args) == len(shape) + + for i in range(-1, -len(idx.args) - 1, -1): + if i in sk: + assert idx.args[i] == slice(None) + else: + assert isinstance(idx.args[i], Integer) + + canonical_idxes = [_remove_slices(idx) for idx in idxes] + a_indexed = tuple([a[idx.raw] for a, idx in zip(arrays, idxes)]) + canonical_a_indexed = tuple([a[idx.raw] for a, idx in + zip(canonical_arrays, canonical_idxes)]) + canonical_b_indexed = canonical_broadcasted_array[bidxes[0].raw] + + for c_indexed in canonical_a_indexed: + assert c_indexed.shape == () + assert canonical_b_indexed.shape == () + + if _skip_axes != _skip_axes_kwarg_default: + vals.append(tuple(canonical_a_indexed)) + else: + vals.append(a_indexed) + + bvals.append(canonical_b_indexed) + + # assert both iterators have the same length + raises(StopIteration, lambda: next(res)) + raises(StopIteration, lambda: next(broadcasted_res)) + + # Check that the correct number of items are iterated + assert n == nitems - 1 + assert len(set(vals)) == len(vals) == nitems + + # 3. Check that every element of the (broadcasted) arrays is represented + # by an iterated index. + + # The indices should correspond to the values that would be matched up + # if the arrays were broadcasted together. + if not arrays: + assert vals == [()] + else: + correct_vals = list(zip(*[x.flat for x in np.broadcast_arrays(*canonical_arrays)])) + # Also test that the indices are produced in a lexicographic order + # (even though this isn't strictly guaranteed by the iter_indices + # docstring) in the case when there are no skip axes. The order when + # there are skip axes is more complicated because the skipped axes are + # iterated together. + if _skip_axes == _skip_axes_kwarg_default: + assert vals == correct_vals + else: + assert set(vals) == set(correct_vals) + assert bvals == list(canonical_broadcasted_array.flat) + +@given(cross_arrays_st(), cross_shapes, cross_skip_axes) +def test_iter_indices_cross(cross_arrays, broadcastable_shapes, _skip_axes): + # Test iter_indices behavior against np.cross, which effectively skips the + # crossed axis. Note that we don't test against cross products of size 2 + # because a 2 x 2 cross product just returns the z-axis (i.e., it doesn't + # actually skip an axis in the result shape), and also that behavior is + # going to be removed in NumPy 2.0. + a, b = cross_arrays + shapes, broadcasted_shape = broadcastable_shapes + + # Sanity check + skip_axes = normalize_skip_axes([*shapes, broadcasted_shape], _skip_axes) + for sh, sk in zip([*shapes, broadcasted_shape], skip_axes): + assert len(sk) == 1 + assert sh[sk[0]] == 3 + + res = np.cross(a, b, axisa=skip_axes[0][0], axisb=skip_axes[1][0], axisc=skip_axes[2][0]) + assert res.shape == broadcasted_shape + + for idx1, idx2, idx3 in iter_indices(*shapes, broadcasted_shape, skip_axes=_skip_axes): + assert a[idx1.raw].shape == (3,) + assert b[idx2.raw].shape == (3,) + assert_equal(np.cross( + a[idx1.raw], + b[idx2.raw]), + res[idx3.raw]) + +@given(matmul_arrays_st(), matmul_shapes, matmul_skip_axes) +def test_iter_indices_matmul(matmul_arrays, broadcastable_shapes, skip_axes): + # Test iter_indices behavior against np.matmul, which effectively skips the + # contracted axis (they aren't broadcasted together, even when they are + # broadcast compatible). + a, b = matmul_arrays + shapes, broadcasted_shape = broadcastable_shapes + + # Note, we don't use normalize_skip_axes here because it sorts the skip + # axes + + ax1, ax2 = skip_axes[0] + ax3 = skip_axes[1][1] + n, m, k = shapes[0][ax1], shapes[0][ax2], shapes[1][ax3] + + # Sanity check + sk0, sk1, sk2 = skip_axes + shape1, shape2 = shapes + assert a.shape == shape1 + assert b.shape == shape2 + assert shape1[sk0[0]] == n + assert shape1[sk0[1]] == m + assert shape2[sk1[0]] == m + assert shape2[sk1[1]] == k + assert broadcasted_shape[sk2[0]] == n + assert broadcasted_shape[sk2[1]] == k + + res = np.matmul(a, b, axes=skip_axes) + assert res.shape == broadcasted_shape + + is_ordered = lambda sk, shape: (Integer(sk[0]).reduce(len(shape)).raw <= Integer(sk[1]).reduce(len(shape)).raw) + orders = [ + is_ordered(sk0, shapes[0]), + is_ordered(sk1, shapes[1]), + is_ordered(sk2, broadcasted_shape), + ] + + for idx1, idx2, idx3 in iter_indices(*shapes, broadcasted_shape, skip_axes=skip_axes): + assert a[idx1.raw].shape == (n, m) if orders[0] else (m, n) + assert b[idx2.raw].shape == (m, k) if orders[1] else (k, m) + sub_res_axes = [ + (0, 1) if orders[0] else (1, 0), + (0, 1) if orders[1] else (1, 0), + (0, 1) if orders[2] else (1, 0), + ] + sub_res = np.matmul(a[idx1.raw], b[idx2.raw], axes=sub_res_axes) + assert_equal(sub_res, res[idx3.raw]) + +def test_iter_indices_errors(): + try: + list(iter_indices((10,), skip_axes=(2,))) + except AxisError as e: + ndindex_msg = str(e) + else: + raise RuntimeError("iter_indices did not raise AxisError") # pragma: no cover + + # Check that the message is the same one used by NumPy + try: + np.sum(np.arange(10), axis=2) + except np_AxisError as e: + np_msg = str(e) + else: + raise RuntimeError("np.sum() did not raise AxisError") # pragma: no cover + + assert ndindex_msg == np_msg + + try: + list(iter_indices((2, 3), (3, 2))) + except BroadcastError as e: + ndindex_msg = str(e) + else: + raise RuntimeError("iter_indices did not raise BroadcastError") # pragma: no cover + + try: + np.broadcast_shapes((2, 3), (3, 2)) + except ValueError as e: + np_msg = str(e) + else: + raise RuntimeError("np.broadcast_shapes() did not raise ValueError") # pragma: no cover + + + if 'Mismatch' in str(np_msg): # pragma: no cover + # Older versions of NumPy do not have the more helpful error message + assert ndindex_msg == np_msg + + with raises(ValueError, match=r"not unique"): + list(iter_indices((1, 2), skip_axes=(0, 1, 0))) + + raises(AxisError, lambda: list(iter_indices((0,), skip_axes=(3,)))) + raises(ValueError, lambda: list(iter_indices(skip_axes=(0,)))) + raises(TypeError, lambda: list(iter_indices(1, 2))) + raises(TypeError, lambda: list(iter_indices(1, 2, (2, 2)))) + raises(TypeError, lambda: list(iter_indices([(1, 2), (2, 2)]))) + +@example(1, 1, 1) +@given(integers(0, 100), integers(0, 100), integers(0, 100)) +def test_ncycles(i, n, m): + N = ncycles(range(i), n) + if n == 1: + assert N == range(i) + else: + assert isinstance(N, ncycles) + assert N.iterable == range(i) + assert N.n == n + assert f"range(0, {i})" in repr(N) + assert str(n) in repr(N) + + L = list(N) + assert len(L) == i*n + for j in range(i*n): + assert L[j] == j % i + + M = ncycles(N, m) + if n*m == 1: + assert M == range(i) + else: + assert isinstance(M, ncycles) + assert M.iterable == range(i) + assert M.n == n*m + +@given(one_of(mutually_broadcastable_shapes, + hypothesis_tuples(tuples(shapes), just(None)))) +def test_broadcast_shapes(broadcastable_shapes): + shapes, broadcasted_shape = broadcastable_shapes + if broadcasted_shape is not None: + assert broadcast_shapes(*shapes) == broadcasted_shape + + arrays = [np.empty(shape) for shape in shapes] + broadcastable = True + try: + broadcasted_shape = np.broadcast(*arrays).shape + except ValueError: + broadcastable = False + + if broadcastable: + assert broadcast_shapes(*shapes) == broadcasted_shape + else: + raises(BroadcastError, lambda: broadcast_shapes(*shapes)) + + +@given(lists(shapes, max_size=32)) +def test_broadcast_shapes_errors(shapes): + error = True + try: + broadcast_shapes(*shapes) + except BroadcastError as exc: + e = exc + else: + error = False + + # The ndindex and numpy errors won't match in general, because + # ndindex.broadcast_shapes gives an error with the first two shapes that + # aren't broadcast compatible, but numpy doesn't always, due to different + # implementation algorithms (e.g., the message from + # np.broadcast_shapes((0,), (0, 2), (2, 0)) mentions the last two shapes + # whereas ndindex.broadcast_shapes mentions the first two). + + # Instead, just confirm that the error message is correct as stated, and + # check against the numpy error message when just broadcasting the two + # reportedly bad shapes. + + if not error: + try: + np.broadcast_shapes(*shapes) + except: # pragma: no cover + raise RuntimeError("ndindex.broadcast_shapes raised but np.broadcast_shapes did not") + return + + assert shapes[e.arg1] == e.shape1 + assert shapes[e.arg2] == e.shape2 + + try: + np.broadcast_shapes(e.shape1, e.shape2) + except ValueError as np_exc: + # Check that they do in fact not broadcast, and the error messages are + # the same modulo the different arg positions. + if 'Mismatch' in str(np_exc): # pragma: no cover + # Older versions of NumPy do not have the more helpful error message + assert str(BroadcastError(0, e.shape1, 1, e.shape2)) == str(np_exc) + else: # pragma: no cover + raise RuntimeError("ndindex.broadcast_shapes raised but np.broadcast_shapes did not") + + raises(TypeError, lambda: broadcast_shapes(1, 2)) + raises(TypeError, lambda: broadcast_shapes(1, 2, (2, 2))) + raises(TypeError, lambda: broadcast_shapes([(1, 2), (2, 2)])) + +@given(mutually_broadcastable_shapes_with_skipped_axes, skip_axes_st) +def test_broadcast_shapes_skip_axes(broadcastable_shapes, skip_axes): + shapes, broadcasted_shape = broadcastable_shapes + assert broadcast_shapes(*shapes, skip_axes=skip_axes) == broadcasted_shape + +@example([[], ()], (0,)) +@example([[(0, 1)], (0, 1)], (2,)) +@example([[(0, 1)], (0, 1)], (0, -1)) +@example([[(0, 1, 0, 0, 0), (2, 0, 0, 0)], (0, 2, 0, 0, 0)], [1]) +@given(mutually_broadcastable_shapes, + one_of( + integers(-20, 20), + tuples(integers(-20, 20), max_size=20), + lists(tuples(integers(-20, 20), max_size=20), max_size=32))) +def test_broadcast_shapes_skip_axes_errors(broadcastable_shapes, skip_axes): + shapes, broadcasted_shape = broadcastable_shapes + + # All errors should come from normalize_skip_axes, which is tested + # separately below. + try: + normalize_skip_axes(shapes, skip_axes) + except (TypeError, ValueError, IndexError) as e: + raises(type(e), lambda: broadcast_shapes(*shapes, + skip_axes=skip_axes)) + return + + try: + broadcast_shapes(*shapes, skip_axes=skip_axes) + except IndexError: + raise RuntimeError("broadcast_shapes raised but should not have") # pragma: no cover + except BroadcastError: + # Broadcastable shapes can become unbroadcastable after skipping axes + # (see the @example above). + pass + +remove_indices_n = shared(integers(0, 100)) + +@given(remove_indices_n, + remove_indices_n.flatmap(lambda n: lists(integers(-n, n), unique=True))) +def test_remove_indices(n, idxes): + if idxes: + assume(max(idxes) < n) + assume(min(idxes) >= -n) + a = tuple(range(n)) + b = remove_indices(a, idxes) + if len(idxes) == 1: + assert remove_indices(a, idxes[0]) == b + + A = list(a) + for i in idxes: + A[i] = None + + assert set(A) - set(b) == ({None} if idxes else set()) + assert set(b) - set(A) == set() + + # Check the order is correct + j = 0 + for i in range(n): + val = A[i] + if val == None: + assert val not in b + else: + assert b[j] == val + j += 1 + + # Test that unremove_indices is the inverse + if all(i >= 0 for i in idxes) or all(i < 0 for i in idxes): + assert unremove_indices(b, idxes) == tuple(A) + else: + raises(NotImplementedError, lambda: unremove_indices(b, idxes)) + +# Meta-test for the hypothesis strategy +@given(mutually_broadcastable_shapes_with_skipped_axes, skip_axes_st) +def test_mutually_broadcastable_shapes_with_skipped_axes(broadcastable_shapes, + skip_axes): # pragma: no cover + shapes, broadcasted_shape = broadcastable_shapes + _skip_axes = normalize_skip_axes(shapes, skip_axes) + + assert len(_skip_axes) == len(shapes) + + for shape in shapes: + assert None not in shape + assert None not in broadcasted_shape + + _shapes = [remove_indices(shape, sk) for shape, sk in zip(shapes, _skip_axes)] + + assert broadcast_shapes(*_shapes) == broadcasted_shape + +@example([[(2, 10, 3, 4), (10, 3, 4)], (2, 3, 4)], (-3,)) +@example([[(0, 10, 2, 3, 10, 4), (1, 10, 1, 0, 10, 2, 3, 4)], + (1, 1, 0, 2, 3, 4)], (1, 4)) +@example([[(2, 0, 3, 4)], (2, 3, 4)], (1,)) +@example([[(0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0)], (0, 0, 0, 0)], (1, 2)) +@given(mutually_broadcastable_shapes_with_skipped_axes, skip_axes_st) +def test_associated_axis(broadcastable_shapes, skip_axes): + shapes, broadcasted_shape = broadcastable_shapes + _skip_axes = normalize_skip_axes(shapes, skip_axes) + + for shape, sk in zip(shapes, _skip_axes): + n = len(shape) + for i in range(-len(shape), 0): + val = shape[i] + + bval = associated_axis(broadcasted_shape, i, sk) + if bval is None: + assert ndindex(i).reduce(n, negative_int=True) in sk, (shape, i) + else: + assert val == 1 or bval == val, (shape, i) + + + sk = max(_skip_axes, key=len, default=()) + for i in range(-len(broadcasted_shape)-len(sk)-10, -len(broadcasted_shape)-len(sk)): + assert associated_axis(broadcasted_shape, i, sk) is None + +# TODO: add a hypothesis test for asshape +def test_asshape(): + assert asshape(1) == (1,) + assert asshape(np.int64(2)) == (2,) + assert type(asshape(np.int64(2))[0]) == int + assert asshape((1, 2)) == (1, 2) + assert asshape([1, 2]) == (1, 2) + assert asshape((1, 2), allow_int=False) == (1, 2) + assert asshape([1, 2], allow_int=False) == (1, 2) + assert asshape((np.int64(1), np.int64(2))) == (1, 2) + assert type(asshape((np.int64(1), np.int64(2)))[0]) == int + assert type(asshape((np.int64(1), np.int64(2)))[1]) == int + assert asshape((-1, -2), allow_negative=True) == (-1, -2) + assert asshape(-2, allow_negative=True) == (-2,) + + + raises(TypeError, lambda: asshape(1.0)) + raises(TypeError, lambda: asshape((1.0,))) + raises(ValueError, lambda: asshape(-1)) + raises(ValueError, lambda: asshape((1, -1))) + raises(ValueError, lambda: asshape((1, None))) + raises(TypeError, lambda: asshape(...)) + raises(TypeError, lambda: asshape(Integer(1))) + raises(TypeError, lambda: asshape(Tuple(1, 2))) + raises(TypeError, lambda: asshape((True,))) + raises(TypeError, lambda: asshape({1, 2})) + raises(TypeError, lambda: asshape({1: 2})) + raises(TypeError, lambda: asshape('1')) + raises(TypeError, lambda: asshape(1, allow_int=False)) + raises(TypeError, lambda: asshape(-1, allow_int=False)) + raises(TypeError, lambda: asshape(-1, allow_negative=True, allow_int=False)) + raises(TypeError, lambda: asshape(np.int64(1), allow_int=False)) + raises(IndexError, lambda: asshape((2, 3), 3)) + +@example([(0,), ()], (0, 0)) +@example([(0, 1), (0,), ()], [(-1,), (0,), ()]) +@example([(5,)], (10,)) +@example([], []) +@example([()], []) +@example([(0, 1)], 0) +@example([(2, 3), (2, 3, 4)], [(3,), (0,)]) +@example([(0, 1)], 0) +@example([(2, 3)], (0, -2)) +@example([(2, 4), (2, 3, 4)], [(0,), (-3,)]) +@example([(1, 2)], [(0,), (1,)]) +@given(lists(tuples(integers(0))), + one_of(integers(), tuples(integers()), lists(tuples(integers())))) +def test_normalize_skip_axes(shapes, skip_axes): + if not shapes: + if skip_axes in [(), []]: + assert normalize_skip_axes(shapes, skip_axes) == [] + else: + raises(ValueError, lambda: normalize_skip_axes(shapes, skip_axes)) + return + + min_dim = min(len(shape) for shape in shapes) + + if isinstance(skip_axes, int): + if not (-min_dim <= skip_axes < min_dim): + raises(AxisError, lambda: normalize_skip_axes(shapes, skip_axes)) + return + _skip_axes = [(skip_axes,)]*len(shapes) + elif isinstance(skip_axes, tuple): + if not all(-min_dim <= s < min_dim for s in skip_axes): + raises(AxisError, lambda: normalize_skip_axes(shapes, skip_axes)) + return + _skip_axes = [skip_axes]*len(shapes) + elif not skip_axes: + # empty list will be interpreted as a single skip_axes tuple + assert normalize_skip_axes(shapes, skip_axes) == [()]*len(shapes) + return + else: + if len(shapes) != len(skip_axes): + raises(ValueError, lambda: normalize_skip_axes(shapes, skip_axes)) + return + _skip_axes = skip_axes + + try: + res = normalize_skip_axes(shapes, skip_axes) + except AxisError as e: + axis, ndim = e.args + assert any(axis in s for s in _skip_axes) + assert any(ndim == len(shape) for shape in shapes) + assert axis < -ndim or axis >= ndim + return + except ValueError as e: + if 'not unique' in str(e): + bad_skip_axes, bad_shape = e.skip_axes, e.shape + assert str(bad_skip_axes) in str(e) + assert str(bad_shape) in str(e) + assert bad_skip_axes in _skip_axes + assert bad_shape in shapes + indexed = [bad_shape[i] for i in bad_skip_axes] + assert len(indexed) != len(set(indexed)) + return + else: # pragma: no cover + raise + + assert isinstance(res, list) + assert all(isinstance(x, tuple) for x in res) + assert all(isinstance(i, int) for x in res for i in x) + + assert len(res) == len(shapes) + for shape, new_skip_axes in zip(shapes, res): + assert len(new_skip_axes) == len(set(new_skip_axes)) + assert new_skip_axes == tuple(sorted(new_skip_axes)) + for i in new_skip_axes: + assert i < 0 + assert ndindex(i).reduce(len(shape), negative_int=True) == i + + # TODO: Assert the order is maintained (doesn't actually matter for now + # but could for future applications) + +def test_normalize_skip_axes_errors(): + raises(TypeError, lambda: normalize_skip_axes([(1,)], {0: 1})) + raises(TypeError, lambda: normalize_skip_axes([(1,)], {0})) + raises(TypeError, lambda: normalize_skip_axes([(1,)], [(0,), 0])) + raises(TypeError, lambda: normalize_skip_axes([(1,)], [0, (0,)])) + +@example(10, 5) +@given(integers(), integers()) +def test_axiserror(axis, ndim): + if ndim == 0 and axis in [0, -1]: + # NumPy allows axis=0 or -1 for 0-d arrays + AxisError(axis, ndim) + return + + try: + if ndim >= 0: + range(ndim)[axis] + except IndexError: + e = AxisError(axis, ndim) + else: + raises(ValueError, lambda: AxisError(axis, ndim)) + return + + try: + raise e + except AxisError as e2: + assert e2.args == (axis, ndim) + if ndim <= 32 and -1000 < axis < 1000: + a = np.empty((0,)*ndim) + try: + np.sum(a, axis=axis) + except np_AxisError as e3: + assert str(e2) == str(e3) + else: + raise RuntimeError("numpy didn't raise AxisError") # pragma: no cover diff --git a/venv/Lib/site-packages/ndindex/tests/test_slice.py b/venv/Lib/site-packages/ndindex/tests/test_slice.py new file mode 100644 index 0000000..2893fc2 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_slice.py @@ -0,0 +1,522 @@ +from pytest import raises + +from numpy import arange, isin, bool_ + +from hypothesis import given, assume, example +from hypothesis.strategies import integers, one_of + +from ..slice import Slice +from ..integer import Integer +from ..ellipsis import ellipsis +from ..shapetools import asshape +from .helpers import check_same, slices, prod, shapes, iterslice, assert_equal, reduce_kwargs + +def test_slice_args(): + # Test the behavior when not all three arguments are given + # TODO: Incorporate this into the normal slice tests + raises(TypeError, lambda: slice()) + raises(TypeError, lambda: Slice()) + raises(TypeError, lambda: Slice(1.0)) + raises(TypeError, lambda: Slice('1')) + # See docstring of operator_index() + raises(TypeError, lambda: Slice(True)) + raises(TypeError, lambda: Slice(bool_(True))) + + S = Slice(1) + assert S == Slice(S) == Slice(slice(1)) == Slice(None, 1) == Slice(None, 1, None) == Slice(None, 1, None) + assert S.raw == slice(None, 1, None) + assert S.args == (S.start, S.stop, S.step) + + S = Slice(0, 1) + assert S == Slice(S) == Slice(slice(0, 1)) == Slice(0, 1, None) + assert S.raw == slice(0, 1, None) + assert S.args == (S.start, S.stop, S.step) + + S = Slice(0, 1, 2) + assert S == Slice(S) == Slice(slice(0, 1, 2)) + assert S.raw == slice(0, 1, 2) + assert S.args == (S.start, S.stop, S.step) + + class HasIndex: + def __init__(self, x): + self.x = x + + def __index__(self): + return self.x + + S = Slice(HasIndex(0), HasIndex(1), HasIndex(2)) + assert S == Slice(0, 1, 2) + assert S.args == (0, 1, 2) + assert type(S.start) is int + assert type(S.stop) is int + assert type(S.step) is int + assert type(S.args[0]) is int + assert type(S.args[1]) is int + assert type(S.args[2]) is int + + class HasInt: + def __init__(self, x): + self.x = x + + def __int__(self): + return self.x # pragma: no cover + + raises(TypeError, lambda: Slice(HasInt(0), None)) + raises(TypeError, lambda: Slice(None, HasInt(0))) + raises(TypeError, lambda: Slice(None, None, HasInt(0))) + +def test_slice_exhaustive(): + for n in range(100): + a = arange(n) + for start, stop, step in iterslice(one_two_args=False): + check_same(a, slice(start, stop, step)) + +@given(slices(), integers(0, 100)) +def test_slice_hypothesis(s, size): + a = arange(size) + check_same(a, s) + +def test_slice_len_exhaustive(): + for args in iterslice(): + try: + S = Slice(*args) + except ValueError: + continue + try: + l = len(S) + except ValueError: + # No maximum + l = 10000 + + m = -1 + for n in range(20): + a = arange(n) + L = len(a[S.raw]) + assert L <= l, S + m = max(L, m) + if l != 10000: + assert m == l, S + else: + # If there is no maximum, the size of the slice should increase + # with larger arrays. + assert len(arange(30)[S.raw]) > m, S + + # TODO + # if l == 0: + # # There should only be one canonical length 0 slice + # assert s == Slice(0, 0) + +@given(slices()) +def test_slice_len_hypothesis(s): + try: + S = Slice(s) + except ValueError: # pragma: no cover + assume(False) + try: + l = len(S) + except ValueError: + # No maximum + l = 10000 + + m = -1 + for n in range(20): + a = arange(n) + L = len(a[S.raw]) + assert L <= l, (S, n) + m = max(L, m) + if l != 10000: + assert m == l, S + else: + # If there is no maximum, the size of the slice should increase + # with larger arrays. + assert len(arange(30)[S.raw]) > m, S + +def test_slice_args_reduce_no_shape(): + S = Slice(1).reduce() + assert S == Slice(None, 1).reduce() == Slice(0, 1, None).reduce() == Slice(0, 1).reduce() == Slice(0, 1, 1) + + S = Slice(0, 1).reduce() + assert S == Slice(0, 1, None).reduce() == Slice(0, 1, 1) + +def test_slice_reduce_no_shape_exhaustive(): + slices = {} + A = [arange(n) for n in range(30)] + for args in iterslice(): + try: + S = Slice(*args) + except ValueError: + continue + + # Check the conditions stated by the Slice.reduce() docstring + reduced = S.reduce() + assert reduced.start != None + if S.start != None and S.start >= 0: + assert reduced.start >= 0 + assert reduced.step != None + if S.step is not None: + assert abs(reduced.step) <= abs(S.step) + if reduced.stop is None: + assert S.stop is None + # Idempotency + assert reduced.reduce() == reduced, S + + B = [] + for a in A: + check_same(a, S.raw, ndindex_func=lambda a, x: a[x.reduce().raw]) + B.append(tuple(a[reduced.raw])) + B = tuple(B) + # Test that Slice.reduce gives a canonical result, i.e., if any two + # slices always give the same sub-arrays, they reduce to the same thing + if B in slices: + assert slices[B] == reduced, f"{S} reduced to {reduced}, but should be equal to {slices[B]}" + else: + slices[B] = reduced + + +@given(slices(), one_of(integers(0, 100), shapes), reduce_kwargs) +def test_slice_reduce_no_shape_hypothesis(s, shape, kwargs): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + try: + S = Slice(s) + except ValueError: # pragma: no cover + assume(False) + + # The axis argument is tested implicitly in the Tuple.reduce test. It is + # difficult to test here because we would have to pass in a Tuple to + # check_same. + check_same(a, S.raw, ndindex_func=lambda a, x: a[x.reduce(**kwargs).raw]) + + # Check the conditions stated by the Slice.reduce() docstring + reduced = S.reduce(**kwargs) + assert reduced.start != None + if S.start != None and S.start >= 0: + assert reduced.start >= 0 + assert reduced.step != None + if S.step is not None: + assert abs(reduced.step) <= abs(S.step) + if reduced.stop is None: + assert S.stop is None + # Idempotency + assert reduced.reduce(**kwargs) == reduced, S + +def test_slice_reduce_exhaustive(): + for n in range(30): + slices = {} + a = arange(n) + for args in iterslice(): + try: + S = Slice(*args) + except ValueError: + continue + + reduced = S.reduce((n,)) + + # Check the conditions stated by the Slice.reduce() docstring + # TODO: Factor this out so we can also test it in the tuple reduce + # tests. + + # len() should not raise after calling reduce() with a shape + L = len(reduced) + # len() should be exact after calling reduce() with a shape + assert L == len(a[reduced.raw]), (S, n) + assert reduced.start >= 0 + # We cannot require stop > 0 because if stop = None and step < 0, the + # only equivalent stop that includes 0 is negative. + assert reduced.stop != None + if S.step != None and S.step < 0: + if reduced.stop < 0: + assert reduced.stop == -n - 1 + assert 0 in a[reduced.raw], (S, n) + assert L > 1 + else: + assert reduced.stop >= 0, (S, n) + assert reduced.step != None + if S.step != None: + assert abs(reduced.step) <= abs(S.step) + if S.stop != None and S.stop >= 0: + # Test that stop is as close to start as possible for the + # given step (the "as possible" is checked by uniqueness + # below). + if L not in [0, 1]: + if reduced.step > 0: + assert reduced.stop <= S.stop, (S, n) + else: + assert reduced.stop >= S.stop + if L == 1: + assert reduced == Slice(reduced.start, reduced.start+1, 1) + + check_same(a, S.raw, ndindex_func=lambda a, x: a[x.reduce((n,)).raw]) + B = tuple(a[reduced.raw]) + # Test that Slice.reduce gives a canonical result, i.e., if any two +# slices always give the same sub-arrays, they reduce to the same thing + if B in slices: + assert slices[B] == reduced, f"{S} reduced to {reduced}, but should be equal to {slices[B]} for shape {n}" + else: + slices[B] = reduced + + # Idempotency + assert reduced.reduce() == reduced, S + assert reduced.reduce((n,)) == reduced, S + +@example(slice(None, None, -1), 2, {}) +@example(slice(-10, 11, 3), 10, {}) +@example(slice(-1, 3, -3), 10, {}) +@given(slices(), one_of(integers(0, 100), shapes), reduce_kwargs) +def test_slice_reduce_hypothesis(s, shape, kwargs): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + try: + S = Slice(s) + except ValueError: # pragma: no cover + assume(False) + + # The axis argument is tested implicitly in the Tuple.reduce test. It is + # difficult to test here because we would have to pass in a Tuple to + # check_same. + check_same(a, S.raw, ndindex_func=lambda a, x: a[x.reduce(shape, **kwargs).raw]) + + # Check the conditions stated by the Slice.reduce() docstring + try: + reduced = S.reduce(shape, **kwargs) + except IndexError: + # shape == () + return + + shape = asshape(shape) + n = shape[0] + + # len() should not raise after calling reduce() with a shape + L = len(reduced) + # len() should be exact after calling reduce() with a shape + assert L == len(a[reduced.raw]), (S, n) + assert reduced.start >= 0 + # We cannot require stop > 0 because if stop = None and step < 0, the + # only equivalent stop that includes 0 is negative. + assert reduced.stop != None + if S.step != None and S.step < 0: + if reduced.stop < 0: + assert reduced.stop == -n - 1 + if a.size != 0: + assert a[0] in a[reduced.raw], (S, n) + assert L > 1 + else: + assert reduced.stop >= 0, (S, n) + assert reduced.step != None + if S.step != None: + assert abs(reduced.step) <= abs(S.step) + if S.stop != None and S.stop >= 0: + # Test that stop is as close to start as possible for the + # given step (the "as possible" is checked by uniqueness + # in the exhaustive test). + if L not in [0, 1]: + if reduced.step > 0: + assert reduced.stop <= S.stop, (S, n) + else: + assert reduced.stop >= S.stop + if L == 1: + assert reduced == Slice(reduced.start, reduced.start+1, 1) + + # Idempotency + assert reduced.reduce(**kwargs) == reduced, S + assert reduced.reduce(shape, **kwargs) == reduced, S + +def test_slice_newshape_exhaustive(): + def raw_func(a, idx): + return a[idx].shape + + def ndindex_func(a, index): + return index.newshape(shape) + + def assert_equal(raw_shape, newshape): + assert raw_shape == newshape + + for n in range(10): + shape = n + a = arange(n) + + for sargs in iterslice(): + try: + S = Slice(*sargs) + except ValueError: + continue + + check_same(a, S.raw, raw_func=raw_func, ndindex_func=ndindex_func, + assert_equal=assert_equal) + +def test_slice_as_subindex_slice_exhaustive(): + # We have to restrict the range of the exhaustive test to get something + # that finishes in a reasonable amount of time (~30 seconds, vs. 30 + # minutes for the original ranges). + + # a = arange(10) + # for sargs in iterslice(): + # for indexargs in iterslice(): + + a = arange(5) + for sargs in iterslice((-5, 5), (-5, 5), (-5, 5), one_two_args=False): + for indexargs in iterslice((-5, 5), (-5, 5), (-5, 5), one_two_args=False): + + try: + S = Slice(*sargs) + except ValueError: + continue + + try: + Index = Slice(*indexargs) + except ValueError: + continue + + try: + Subindex = S.as_subindex(Index) + except NotImplementedError: + continue + + aS = a[S.raw] + aindex = a[Index.raw] + asubindex = aindex[Subindex.raw] + + assert_equal(asubindex, aS[isin(aS, aindex)]) + + subindex2 = Index.as_subindex(S) + asubindex2 = aS[subindex2.raw] + assert_equal(asubindex2, asubindex) + +def test_slice_as_subindex_integer_exhaustive(): + a = arange(10) + for sargs in iterslice(): + for i in range(-10, 10): + + try: + S = Slice(*sargs) + except ValueError: + continue + + Index = Integer(i) + + empty = False + try: + Subindex = S.as_subindex(Index) + except NotImplementedError: + continue + except ValueError as e: + assert "do not intersect" in e.args[0] + empty = True + + aS = a[S.raw] + aindex = a[i] + + if empty: + assert not isin(aS, aindex).any() + assert not isin(aindex, aS).any() + with raises(ValueError, match="do not intersect"): + Index.as_subindex(S) + else: + asubindex = aindex[Subindex.raw] + + assert_equal(asubindex.flatten(), aS[isin(aS, aindex)]) + + subindex2 = Index.as_subindex(S) + asubindex2 = aS[subindex2.raw] + assert_equal(asubindex2, asubindex) + +def test_slice_as_subindex_ellipsis_exhaustive(): + a = arange(10) + for sargs in iterslice(): + try: + S = Slice(*sargs) + except ValueError: + continue + + Index = ellipsis() + + try: + Subindex = S.as_subindex(Index) + except NotImplementedError: + continue + + aS = a[S.raw] + aindex = a[...] + + asubindex = aindex[Subindex.raw] + + assert_equal(asubindex.flatten(), aS[isin(aS, aindex)]) + + try: + subindex2 = Index.as_subindex(S) + except NotImplementedError: + continue + asubindex2 = aS[subindex2.raw] + assert_equal(asubindex2, asubindex) + +def test_slice_isempty_exhaustive(): + for args in iterslice(): + try: + S = Slice(*args) + except ValueError: + continue + + isempty = S.isempty() + + aempty = True + for n in range(30): + a = arange(n) + + aS = a[S.raw] + if aS.size != 0: + if isempty: + raise AssertionError(f"Slice s = {S}.isempty() gave True, a[s] is not empty for a = range({n}).") + else: + aempty = False + # isempty() should always give the correct result for a specific + # array shape + assert S.isempty(n) == (aS.size == 0) + + assert isempty == aempty, S + +@example(slice(None, None, None), ()) +@given(slices(), one_of(shapes, integers(0, 10))) +def test_slice_isempty_hypothesis(s, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + try: + S = Slice(s) + except (IndexError, ValueError): # pragma: no cover + assume(False) + + def raw_func(a, s): + return a[s].size == 0 + + def ndindex_func(a, S): + return S.isempty(), S.isempty(shape) + + def assert_equal(raw_empty, ndindex_empty): + isempty, isempty_shape = ndindex_empty + + # If isempty is True then a[t] should be empty + if isempty: + assert raw_empty, (S, shape) + # We cannot test the converse with hypothesis. isempty may be False + # but a[s] could still be empty for this specific a (e.g., if a is + # already itself empty). + + # If isempty is true with no shape it should be true for a specific + # shape. The converse is not true because the indexed array could be + # empty. + if isempty: + assert isempty_shape, (S, shape) + + # isempty() should always give the correct result for a specific + # array after reduction + assert isempty_shape == raw_empty, (S, shape) + + check_same(a, s, raw_func=raw_func, ndindex_func=ndindex_func, + assert_equal=assert_equal, same_exception=False) diff --git a/venv/Lib/site-packages/ndindex/tests/test_tuple.py b/venv/Lib/site-packages/ndindex/tests/test_tuple.py new file mode 100644 index 0000000..777015b --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tests/test_tuple.py @@ -0,0 +1,255 @@ +from itertools import product + +from numpy import arange, array, intp, empty, all as np_all + +from hypothesis import given, example +from hypothesis.strategies import integers, one_of + +from pytest import raises + +from ..ndindex import ndindex +from ..tuple import Tuple +from ..booleanarray import BooleanArray +from ..integer import Integer +from ..integerarray import IntegerArray +from .helpers import (check_same, Tuples, prod, short_shapes, iterslice, + reduce_kwargs, assert_equal_allow_scalar_0d) + +def test_tuple_constructor(): + # Nested tuples are not allowed + raises(ValueError, lambda: Tuple((1, 2, 3))) + raises(ValueError, lambda: Tuple(0, (1, 2, 3))) + raises(ValueError, lambda: Tuple(Tuple(1, 2, 3))) + raises(ValueError, lambda: Tuple(0, Tuple(1, 2, 3))) + + # Multiple ellipses in a tuple are not allowed + raises(IndexError, lambda: Tuple(..., 0, ...)) + raises(IndexError, lambda: Tuple(0, ..., ...)) + raises(IndexError, lambda: Tuple(..., ...)) + + # Test NotImplementedError behavior for Tuples with arrays split up by + # slices, ellipses, and newaxes. + raises(NotImplementedError, lambda: Tuple(0, slice(None), [0])) + raises(NotImplementedError, lambda: Tuple([0], slice(None), [0])) + raises(NotImplementedError, lambda: Tuple([0], slice(None), [0])) + raises(NotImplementedError, lambda: Tuple(0, ..., [0])) + raises(NotImplementedError, lambda: Tuple([0], ..., [0])) + raises(NotImplementedError, lambda: Tuple([0], ..., [0])) + raises(NotImplementedError, lambda: Tuple(0, None, [0])) + raises(NotImplementedError, lambda: Tuple([0], None, [0])) + raises(NotImplementedError, lambda: Tuple([0], None, [0])) + + # Test NotImplementedError for boolean scalars mixed with other arrays + Tuple(0, True, 0) # Doesn't raise + raises(NotImplementedError, lambda: Tuple([0], True, 0)) + raises(NotImplementedError, lambda: Tuple(False, [0])) + raises(NotImplementedError, lambda: Tuple(False, IntegerArray([0]), 0)) + raises(NotImplementedError, lambda: Tuple(False, 0, IntegerArray([0]))) + raises(NotImplementedError, lambda: Tuple(True, BooleanArray([True]))) + + # Make sure this doesn't raise + Tuple(0, slice(None), 0) + Tuple(0, ..., 0) + Tuple(0, None, 0) + +def test_tuple_exhaustive(): + # Exhaustive tests here have to be very limited because of combinatorial + # explosion. + a = arange(2*2*2).reshape((2, 2, 2)) + types = { + slice: lambda: iterslice((-1, 1), (-1, 1), (-1, 1), one_two_args=False), + # slice: _iterslice, + int: lambda: ((i,) for i in range(-3, 3)), + type(...): lambda: () + } + + for t1, t2, t3 in product(types, repeat=3): + for t1_args in types[t1](): + for t2_args in types[t2](): + for t3_args in types[t3](): + idx1 = t1(*t1_args) + idx2 = t2(*t2_args) + idx3 = t3(*t3_args) + + idx = (idx1, idx2, idx3) + # Disable the same exception check because there could be + # multiple invalid indices in the tuple, and for instance + # numpy may give an IndexError but we would give a + # TypeError because we check the type first. + check_same(a, idx, same_exception=False) + try: + index = Tuple(*idx) + except (IndexError, ValueError): + pass + else: + assert index.has_ellipsis == (type(...) in (t1, t2, t3)) + +@given(Tuples, short_shapes) +def test_tuples_hypothesis(t, shape): + a = arange(prod(shape)).reshape(shape) + check_same(a, t, same_exception=False) + +@given(Tuples, short_shapes) +def test_ellipsis_index(t, shape): + a = arange(prod(shape)).reshape(shape) + # Don't know if there is a better way to test ellipsis_idx + def ndindex_func(a, index): + return a[ndindex((*index.raw[:index.ellipsis_index], ..., + *index.raw[index.ellipsis_index+1:])).raw] + + check_same(a, t, ndindex_func=ndindex_func, assert_equal=assert_equal_allow_scalar_0d) + +@example((True, 0, False), 1, {}) +@example((..., None), (), {}) +@given(Tuples, one_of(short_shapes, integers(0, 10)), reduce_kwargs) +def test_tuple_reduce_no_shape_hypothesis(t, shape, kwargs): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = Tuple(*t) + + check_same(a, index.raw, ndindex_func=lambda a, x: + a[x.reduce(**kwargs).raw], same_exception=False, + assert_equal=assert_equal_allow_scalar_0d) + + reduced = index.reduce(**kwargs) + if isinstance(reduced, Tuple): + assert len(reduced.args) != 1 + assert reduced == () or reduced.args[-1] != ... + + # Idempotency + assert reduced.reduce(**kwargs) == reduced + +@example((..., empty((1, 0), dtype=intp)), (1, 0), {}) +@example((1, -1, [1, -1]), (3, 3, 3), {'negative_int': True}) +@example((..., None), (), {}) +@example((..., empty((0, 0), dtype=bool)), (0, 0), {}) +@example((empty((0, 0), dtype=bool), 0), (0, 0, 1), {}) +@example((array([], dtype=intp), 0), (0, 0), {}) +@example((array([], dtype=intp), array(0)), (0, 0), {}) +@example((array([], dtype=intp), [0]), (0, 0), {}) +@example((0, 1, ..., 2, 3), (2, 3, 4, 5, 6, 7), {}) +@example((0, slice(None), ..., slice(None), 3), (2, 3, 4, 5, 6, 7), {}) +@example((0, ..., slice(None)), (2, 3, 4, 5, 6, 7), {}) +@example((slice(None, None, -1),), (2,), {}) +@example((..., slice(None, None, -1),), (2, 3, 4), {}) +@example((..., False, slice(None)), 0, {}) +@given(Tuples, one_of(short_shapes, integers(0, 10)), reduce_kwargs) +def test_tuple_reduce_hypothesis(t, shape, kwargs): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + index = Tuple(*t) + + check_same(a, index.raw, ndindex_func=lambda a, x: a[x.reduce(shape, + **kwargs).raw], same_exception=False, + assert_equal=assert_equal_allow_scalar_0d) + + negative_int = kwargs.get('negative_int', False) + + try: + reduced = index.reduce(shape, **kwargs) + except IndexError: + pass + else: + if isinstance(reduced, Tuple): + assert len(reduced.args) != 1 + assert reduced == () or reduced.args[-1] != ... + # TODO: Check the other properties from the Tuple.reduce docstring. + + # Idempotency + assert reduced.reduce(**kwargs) == reduced + # This is currently not implemented, for example, (..., False, :) + # takes two steps to remove the redundant slice. + # assert reduced.reduce(shape) == reduced + + for arg in reduced.args: + if isinstance(arg, Integer): + if negative_int: + assert arg.raw < 0 + else: + assert arg.raw >= 0 + elif isinstance(arg, IntegerArray): + if negative_int: + assert np_all(arg.raw < 0) + else: + assert np_all(arg.raw >= 0) + +def test_tuple_reduce_explicit(): + # Some aspects of Tuple.reduce are hard to test as properties, so include + # some explicit tests here. + + # (Before Index, shape): After index + tests = { + # Make sure redundant slices are removed + (Tuple(0, ..., slice(0, 3)), (5, 3)): Integer(0), + (Tuple(slice(0, 5), ..., 0), (5, 3)): Tuple(..., Integer(0)), + # Ellipsis is removed if unnecessary + (Tuple(0, ...), (2, 3)): Integer(0), + (Tuple(0, 1, ...), (2, 3)): Tuple(Integer(0), Integer(1)), + (Tuple(..., 0, 1), (2, 3)): Tuple(Integer(0), Integer(1)), + } + + for (before, shape), after in tests.items(): + reduced = before.reduce(shape) + assert reduced == after, (before, shape) + + a = arange(prod(shape)).reshape(shape) + check_same(a, before.raw, ndindex_func=lambda a, x: + a[x.reduce(shape).raw], + assert_equal=assert_equal_allow_scalar_0d) + + # Idempotency + assert reduced.reduce() == reduced + assert reduced.reduce(shape) == reduced + +@example((slice(0, 0),), 2) +@example((0, slice(0, 0)), (1, 2)) +@given(Tuples, one_of(short_shapes, integers(0, 10))) +def test_tuple_isempty_hypothesis(t, shape): + if isinstance(shape, int): + a = arange(shape) + else: + a = arange(prod(shape)).reshape(shape) + + T = Tuple(*t) + + try: + ndindex(t).isempty(shape) + except NotImplementedError: + return + except IndexError: + pass + + def raw_func(a, t): + return a[t].size == 0 + + def ndindex_func(a, T): + return T.isempty(), T.isempty(shape) + + def assert_equal(raw_empty, ndindex_empty): + isempty, isempty_shape = ndindex_empty + + # If isempty is True then a[t] should be empty + if isempty: + assert raw_empty, (T, shape) + # We cannot test the converse with hypothesis. isempty may be False + # but a[t] could still be empty for this specific a (e.g., if a is + # already itself empty). + + # If isempty is true with no shape it should be true for a specific + # shape. The converse is not true because the indexed array could be + # empty. + if isempty: + assert isempty_shape, (T, shape) + + # isempty() should always give the correct result for a specific + # array after reduction + assert isempty_shape == raw_empty, (T, shape) + + check_same(a, t, raw_func=raw_func, ndindex_func=ndindex_func, + assert_equal=assert_equal, same_exception=False) diff --git a/venv/Lib/site-packages/ndindex/tuple.py b/venv/Lib/site-packages/ndindex/tuple.py new file mode 100644 index 0000000..69a8d78 --- /dev/null +++ b/venv/Lib/site-packages/ndindex/tuple.py @@ -0,0 +1,731 @@ +import itertools + +from .ndindex import NDIndexCommon, ndindex +from .subindex_helpers import subindex_slice +from .shapetools import asshape, broadcast_shapes + +from ._tuple import _Tuple + +class Tuple(_Tuple, NDIndexCommon): + """ + Represents a tuple of single-axis indices. + + Valid single axis indices are + + - :class:`Integer` + - :class:`Slice` + - :class:`ellipsis` + - :class:`Newaxis` + - :class:`IntegerArray` + - :class:`BooleanArray` + + See :doc:`../indexing-guide/multidimensional-indices/tuples` for a + description of the semantics of tuple indices. + + `Tuple(x1, x2, …, xn)` represents the index `a[x1, x2, …, xn]` or, + equivalently, `a[(x1, x2, …, xn)]`. `Tuple()` with no arguments is the + empty tuple index, `a[()]`, which returns `a` unchanged. + + >>> from ndindex import Tuple, Slice + >>> import numpy as np + >>> idx = Tuple(0, Slice(2, 4)) + >>> a = np.arange(10).reshape((2, 5)) + >>> a + array([[0, 1, 2, 3, 4], + [5, 6, 7, 8, 9]]) + >>> a[0, 2:4] + array([2, 3]) + >>> a[idx.raw] + array([2, 3]) + + .. note:: + + `Tuple` does *not* represent a tuple, but rather an *tuple index*. It + does not have most methods that `tuple` has, and should not be used in + non-indexing contexts. See the document on :any:`type-confusion` for + more details. + + """ + __slots__ = () + + def __hash__(self): + # Since self.args is itself a tuple, it will match the hash of + # self.raw when it is hashable. + return hash(self.args) + + def __repr__(self): + # Since tuples are nested, we can print the raw form of the args to + # make them a little more readable. + def _repr(s): + if s == ...: + return '...' + if isinstance(s, ArrayIndex): + if s.shape and 0 not in s.shape: + return repr(s.array.tolist()) + return repr(s) + return repr(s.raw) + return f"{self.__class__.__name__}({', '.join(map(_repr, self.args))})" + + def __str__(self): + # Since tuples are nested, we can print the raw form of the args to + # make them a little more readable. + def _str(s): + if s == ...: + return '...' + if isinstance(s, ArrayIndex): + return str(s) + return str(s.raw) + return f"{self.__class__.__name__}({', '.join(map(_str, self.args))})" + + @property + def has_ellipsis(self): + """ + Returns True if self has an ellipsis + """ + return ... in self.args + + @property + def ellipsis_index(self): + """ + Give the index i of `self.args` where the ellipsis is. + + If `self` doesn't have an ellipsis, it gives `len(self.args)`, since + tuple indices without an ellipsis always implicitly end in an + ellipsis. + + The resulting value `i` is such that `self.args[:i]` indexes the + beginning axes of an array and `self.args[i+1:]` indexes the end axes + of an array. + + >>> from ndindex import Tuple + >>> idx = Tuple(0, 1, ..., 2, 3) + >>> i = idx.ellipsis_index + >>> i + 2 + >>> idx.args[:i] + (Integer(0), Integer(1)) + >>> idx.args[i+1:] + (Integer(2), Integer(3)) + + >>> Tuple(0, 1).ellipsis_index + 2 + + """ + if self.has_ellipsis: + return self.args.index(...) + return len(self.args) + + def reduce(self, shape=None, *, negative_int=False): + r""" + Reduce a Tuple index on an array of shape `shape` + + A `Tuple` with a single argument is always reduced to that single + argument (because `a[idx,]` is the same as `a[idx]`). + + >>> from ndindex import Tuple + + >>> Tuple(slice(2, 4)).reduce() + Slice(2, 4, 1) + + If an explicit array shape is given, the result will either be + `IndexError` if the index is invalid for the given shape, or an index + that is as simple as possible: + + - All the elements of the :any:`Tuple` are recursively :any:`reduced + `. + + - Any axes that can be merged into an :any:`ellipsis` are removed. + This includes the implicit ellipsis at the end of a Tuple that + doesn't contain any explicit ellipses. + + - :any:`Ellipses ` that don't match any axes are removed. + + - An :any:`ellipsis` at the end of the :any:`Tuple` is removed. + + - Scalar :any:`BooleanArray` arguments (`True` or `False`) are + combined into a single term (the first boolean scalar is replaced + with the AND of all the boolean scalars). + + - If the resulting :any:`Tuple` would have a single argument, that + argument is returned. + + >>> idx = Tuple(0, ..., slice(0, 3)) + >>> idx.reduce((5, 4)) + Tuple(0, slice(0, 3, 1)) + >>> idx.reduce((5, 3)) + Integer(0) + + >>> idx = Tuple(slice(0, 10), -3) + >>> idx.reduce((5,)) + Traceback (most recent call last): + ... + IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed + >>> idx.reduce((5, 2)) + Traceback (most recent call last): + ... + IndexError: index -3 is out of bounds for axis 1 with size 2 + + Note + ==== + + ndindex presently does not distinguish between scalar objects and + 0-D arrays. It is possible for the original index to produce one + and the reduced index to produce the other. In particular, the + presence of a redundant ellipsis forces NumPy to return a 0-D array + instead of a scalar. + + >>> import numpy as np + >>> a = np.array([0, 1]) + >>> Tuple(..., 1).reduce(a.shape) + Integer(1) + >>> a[..., 1] + array(1) + >>> a[1] # doctest: +SKIPNP1 + np.int64(1) + + See https://github.com/Quansight-Labs/ndindex/issues/22. + + See Also + ======== + + .Tuple.expand + .NDIndex.reduce + .Slice.reduce + .Integer.reduce + .ellipsis.reduce + .Newaxis.reduce + .IntegerArray.reduce + .BooleanArray.reduce + + """ + args = list(self.args) + + boolean_scalars = [i for i in args if _is_boolean_scalar(i)] + if len(boolean_scalars) > 1: + _args = [] + seen_boolean_scalar = False + for s in args: + if _is_boolean_scalar(s): + if seen_boolean_scalar: + continue + _args.append(BooleanArray(all(i == True for i in boolean_scalars))) + seen_boolean_scalar = True + else: + _args.append(s) + return type(self)(*_args).reduce(shape, negative_int=negative_int) + + arrays = [] + for i in args: + if _is_boolean_scalar(i): + continue + elif isinstance(i, IntegerArray): + arrays.append(i.raw) + elif isinstance(i, BooleanArray): + # TODO: Avoid explicitly calling nonzero + arrays.extend(i.raw.nonzero()) + if arrays: + from numpy import broadcast_to + + broadcast_shape = broadcast_shapes(*[a.shape for a in arrays]) + else: + broadcast_shape = () + + # If the broadcast shape is empty, out of bounds indices in + # non-empty arrays are ignored, e.g., ([], [10]) would broadcast to + # ([], []), so the bounds for 10 are not checked. Thus, we must do + # this before calling reduce() on the arguments. This rule, however, + # is *not* followed for scalar integer indices. + if 0 in broadcast_shape: + for i in range(len(args)): + s = args[i] + if isinstance(s, IntegerArray): + if s.ndim == 0: + args[i] = Integer(s.raw) + else: + # broadcast_to(x) gives a readonly view on x, which is also + # readonly, so set _copy=False to avoid representing the full + # broadcasted array in memory. + args[i] = type(s)(broadcast_to(s.raw, broadcast_shape), + _copy=False) + + if shape is not None: + # assert self.args.count(...) == 1 + # assert self.args.count(False) <= 1 + # assert self.args.count(True) <= 1 + n_newaxis = self.args.count(None) + n_boolean = sum(i.ndim - 1 for i in args if + isinstance(i, BooleanArray) and not _is_boolean_scalar(i)) + if True in args or False in args: + n_boolean -= 1 + indexed_args = len(args) + n_boolean - n_newaxis - 1 # -1 for the + + shape = asshape(shape, axis=indexed_args - 1) + + ellipsis_i = self.ellipsis_index + + preargs = [] + removable = shape is not None + begin_offset = args[:ellipsis_i].count(None) + begin_offset -= sum(j.ndim - 1 for j in args[:ellipsis_i] if + isinstance(j, BooleanArray)) + for i, s in enumerate(reversed(args[:ellipsis_i]), start=1): + if s == None: + begin_offset -= 1 + elif isinstance(s, BooleanArray): + begin_offset += s.ndim - 1 + axis = ellipsis_i - i - begin_offset + reduced = s.reduce(shape, axis=axis, negative_int=negative_int) + if (removable + and isinstance(reduced, Slice) + and reduced == Slice(0, shape[axis], 1)): + continue + else: + removable = False + preargs.insert(0, reduced) + + if shape is None: + endargs = [s.reduce(negative_int=negative_int) for s in args[ellipsis_i+1:]] + else: + endargs = [] + end_offset = 0 + for i, s in enumerate(reversed(args[ellipsis_i+1:]), start=1): + if isinstance(s, BooleanArray): + end_offset -= s.ndim - 1 + elif s == None: + end_offset += 1 + axis = len(shape) - i + end_offset + if not (isinstance(s, IntegerArray) and (0 in broadcast_shape or + False in args)): + # Array bounds are not checked when the broadcast shape is empty + s = s.reduce(shape, axis=axis, negative_int=negative_int) + endargs.insert(0, s) + + if shape is not None: + # Remove redundant slices + axis = len(shape) - len(endargs) + end_offset + for i, s in enumerate(endargs): + axis += i + if (isinstance(s, Slice) + and s == Slice(0, shape[axis], 1)): + i += 1 + continue + else: + break + if endargs: + endargs = endargs[i:] + + if shape is None or (endargs and len(preargs) + len(endargs) + < len(shape) + args.count(None) - n_boolean): + preargs = preargs + [...] + + newargs = preargs + endargs + + if newargs and newargs[-1] == ...: + newargs = newargs[:-1] + + if len(newargs) == 1: + return newargs[0] + + return type(self)(*newargs) + + def broadcast_arrays(self): + args = self.args + boolean_scalars = [i for i in args if _is_boolean_scalar(i)] + if len(boolean_scalars) > 1: + _args = [] + seen_boolean_scalar = False + for s in args: + if _is_boolean_scalar(s): + if seen_boolean_scalar: + continue + _args.append(BooleanArray(all(i == True for i in boolean_scalars))) + seen_boolean_scalar = True + else: + _args.append(s) + return type(self)(*_args).broadcast_arrays() + + # Broadcast all array indices. Note that broadcastability is checked + # in the Tuple constructor, so this should not fail. + boolean_nonzero = {} + arrays = [] + for s in args: + if _is_boolean_scalar(s): + continue + elif isinstance(s, IntegerArray): + arrays.append(s.raw) + elif isinstance(s, BooleanArray): + nz = s.raw.nonzero() + arrays.extend(nz) + boolean_nonzero[s] = nz + if not arrays: + return self + + from numpy import array, broadcast_to, intp + + broadcast_shape = broadcast_shapes(*[a.shape for a in arrays]) + + newargs = [] + for s in args: + if isinstance(s, BooleanArray): + if not _is_boolean_scalar(s): + newargs.extend([IntegerArray(broadcast_to(i, broadcast_shape)) + for i in boolean_nonzero[s]]) + elif isinstance(s, Integer): + # broadcast_to(x) gives a readonly view on x, which is also + # readonly, so set _copy=False to avoid representing the full + # broadcasted array in memory. + newargs.append(IntegerArray(broadcast_to(array(s.raw, dtype=intp), + broadcast_shape), _copy=False)) + elif isinstance(s, IntegerArray): + newargs.append(IntegerArray(broadcast_to(s.raw, broadcast_shape), + _copy=False)) + else: + newargs.append(s) + return Tuple(*newargs) + + def expand(self, shape): + # The expand() docstring is on NDIndex.expand() + args = list(self.args) + if ... not in args: + return type(self)(*args, ...).expand(shape) + + # TODO: Use broadcast_arrays here. The challenge is that we still need + # to do bounds checks on nonscalar integer arrays that get broadcast + # away. + boolean_scalars = [i for i in args if _is_boolean_scalar(i)] + if len(boolean_scalars) > 1: + _args = [] + seen_boolean_scalar = False + for s in args: + if _is_boolean_scalar(s): + if seen_boolean_scalar: + continue + _args.append(BooleanArray(all(i == True for i in boolean_scalars))) + seen_boolean_scalar = True + else: + _args.append(s) + return type(self)(*_args).expand(shape) + + # Broadcast all array indices. Note that broadcastability is checked + # in the Tuple constructor, so this should not fail. + arrays = [] + for i in args: + if _is_boolean_scalar(i): + continue + elif isinstance(i, IntegerArray): + arrays.append(i.raw) + elif isinstance(i, BooleanArray): + # TODO: Avoid calling nonzero twice + arrays.extend(i.raw.nonzero()) + + if arrays: + from numpy import broadcast_to, array, intp + + broadcast_shape = broadcast_shapes(*[a.shape for a in arrays]) + # If the broadcast shape is empty, out of bounds indices in + # non-empty arrays are ignored, e.g., ([], [10]) would broadcast to + # ([], []), so the bounds for 10 are not checked. Thus, we must do + # this before calling reduce() on the arguments. This rule, however, + # is *not* followed for scalar integer indices. + + for i in range(len(args)): + s = args[i] + if isinstance(s, IntegerArray): + if s.ndim == 0: + args[i] = Integer(s.raw) + else: + # broadcast_to(x) gives a readonly view on x, which is also + # readonly, so set _copy=False to avoid representing the full + # broadcasted array in memory. + args[i] = type(s)(broadcast_to(s.raw, broadcast_shape), + _copy=False) + + # assert args.count(...) == 1 + # assert args.count(False) <= 1 + # assert args.count(True) <= 1 + n_newaxis = args.count(None) + n_boolean = sum(i.ndim - 1 for i in args if + isinstance(i, BooleanArray) and not _is_boolean_scalar(i)) + if True in args or False in args: + n_boolean -= 1 + indexed_args = len(args) + n_boolean - n_newaxis - 1 # -1 for the ellipsis + shape = asshape(shape, axis=indexed_args - 1) + + ellipsis_i = self.ellipsis_index + + startargs = [] + begin_offset = 0 + for i, s in enumerate(args[:ellipsis_i]): + axis = i + begin_offset + if not (isinstance(s, IntegerArray) and (0 in broadcast_shape or + False in args)): + s = s.reduce(shape, axis=axis) + if isinstance(s, ArrayIndex): + if isinstance(s, BooleanArray): + begin_offset += s.ndim - 1 + if not _is_boolean_scalar(s): + s = s.reduce(shape, axis=axis) + startargs.extend([IntegerArray(broadcast_to(i, + broadcast_shape)) + for i in s.array.nonzero()]) + continue + elif arrays and isinstance(s, Integer): + s = IntegerArray(broadcast_to(array(s.raw, dtype=intp), + broadcast_shape), _copy=False) + elif s == None: + begin_offset -= 1 + startargs.append(s) + + # TODO: Merge this with the above loop + endargs = [] + end_offset = 0 + for i, s in enumerate(reversed(args[ellipsis_i+1:]), start=1): + if isinstance(s, ArrayIndex): + if isinstance(s, BooleanArray): + end_offset -= s.ndim - 1 + if not _is_boolean_scalar(s): + s = s.reduce(shape, axis=len(shape) - i + end_offset) + endargs.extend([IntegerArray(broadcast_to(i, + broadcast_shape)) + for i in reversed(s.array.nonzero())]) + continue + elif arrays and isinstance(s, Integer): + if (0 in broadcast_shape or False in args): + s = s.reduce(shape, axis=len(shape)-i+end_offset) + s = IntegerArray(broadcast_to(array(s.raw, dtype=intp), + broadcast_shape), _copy=False) + elif s == None: + end_offset += 1 + axis = len(shape) - i + end_offset + assert axis >= 0 + if not (isinstance(s, IntegerArray) and (0 in broadcast_shape or + False in args)): + # Array bounds are not checked when the broadcast shape is empty + s = s.reduce(shape, axis=axis) + endargs.append(s) + + idx_offset = begin_offset - end_offset + + midargs = [Slice(None).reduce(shape, axis=i + ellipsis_i + begin_offset) for + i in range(len(shape) - len(args) + 1 - idx_offset)] + + + newargs = startargs + midargs + endargs[::-1] + + return type(self)(*newargs) + + def newshape(self, shape): + # The docstring for this method is on the NDIndex base class + shape = asshape(shape) + + if self == Tuple(): + return shape + + # This will raise any IndexErrors + self = self.expand(shape) + + newshape = [] + axis = 0 + arrays = False + for i, s in enumerate(self.args): + if s == None: + newshape.append(1) + axis -= 1 + # After expand(), there will be at most one boolean scalar + elif s == True: + newshape.append(1) + axis -= 1 + elif s == False: + newshape.append(0) + axis -= 1 + elif isinstance(s, ArrayIndex): + if not arrays: + # Multiple arrays are all broadcast together (in expand()) + # and iterated as one, so we only need to get the shape + # for the first array we see. Note that arrays separated + # by ellipses, slices, or newaxes affect the shape + # differently, but these are currently unsupported (see + # the comments in the Tuple constructor). + + # expand() should remove all non scalar boolean arrays + assert not isinstance(s, BooleanArray) + + newshape.extend(list(s.newshape(shape[axis]))) + arrays = True + else: + newshape.extend(list(s.newshape(shape[axis]))) + axis += 1 + return tuple(newshape) + + def as_subindex(self, index): + index = ndindex(index).reduce().broadcast_arrays() + + self = self.broadcast_arrays() + + if ... in self.args: + raise NotImplementedError("Tuple.as_subindex() is not yet implemented for tuples with ellipses") + + if isinstance(index, (Integer, ArrayIndex, Slice)): + index = Tuple(index) + if isinstance(index, Tuple): + new_args = [] + boolean_arrays = [] + integer_arrays = [] + if any(isinstance(i, Slice) and i.step < 0 for i in index.args): + raise NotImplementedError("Tuple.as_subindex() is only implemented on slices with positive steps") + if ... in index.args: + raise NotImplementedError("Tuple.as_subindex() is not yet implemented for tuples with ellipses") + for self_arg, index_arg in zip(self.args, index.args): + if (isinstance(self_arg, IntegerArray) and + isinstance(index_arg, Slice)): + if (self_arg.array < 0).any(): + raise NotImplementedError("IntegerArray.as_subindex() is only implemented for arrays with all nonnegative entries. Try calling reduce() with a shape first.") + if index_arg.step < 0: + raise NotImplementedError("IntegerArray.as_subindex(Slice) is only implemented for slices with positive steps") + + # After reducing, start is not None when step > 0 + if index_arg.stop is None or index_arg.start < 0 or index_arg.stop < 0: + raise NotImplementedError("IntegerArray.as_subindex(Slice) is only implemented for slices with nonnegative start and stop. Try calling reduce() with a shape first.") + + s = self_arg.array + start, stop, step = subindex_slice( + s, s+1, 1, index_arg.start, index_arg.stop, index_arg.step) + if (stop <= 0).all(): + raise ValueError("Indices do not intersect") + if start.shape == (): + if start >= stop: + raise ValueError("Indices do not intersect") + + integer_arrays.append((start, stop)) + # Placeholder. We need to mask out the stops below. + new_args.append(IntegerArray(start)) + else: + subindex = self_arg.as_subindex(index_arg) + if isinstance(subindex, Tuple): + assert subindex == () + subindex # Workaround https://github.com/nedbat/coveragepy/issues/1029 + continue + if isinstance(subindex, BooleanArray): + boolean_arrays.append(subindex) + new_args.append(subindex) + args_remainder = self.args[min(len(self.args), len(index.args)):] + index_remainder = index.args[min(len(self.args), len(index.args)):] + if any(isinstance(i, ArrayIndex) and i.isempty() for i in + index_remainder): + raise ValueError("Indices do not intersect") + for arg in args_remainder: + if isinstance(arg, BooleanArray): + boolean_arrays.append(arg) + if isinstance(arg, IntegerArray): + integer_arrays.append((arg.array, arg.array+1)) + new_args.append(arg) + # Replace all boolean arrays with the logical AND of them. + if any(i.isempty() for i in boolean_arrays): + raise ValueError("Indices do not intersect") + if boolean_arrays: + if len(boolean_arrays) > 1: + from numpy import logical_and + new_array = BooleanArray(logical_and.reduce([i.array for i in boolean_arrays])) + else: + new_array = boolean_arrays[0] + new_args2 = [] + first = True + for arg in new_args: + if arg in boolean_arrays: + if first: + new_args2.append(new_array) + first = False + else: + new_args2.append(arg) + new_args = new_args2 + + # Mask out integer arrays to only where the start is less than the + # stop for all arrays. + if integer_arrays: + from numpy import array, broadcast_arrays, logical_and + starts, stops = zip(*integer_arrays) + starts = array(broadcast_arrays(*starts)) + stops = array(broadcast_arrays(*stops)) + mask = logical_and.reduce(starts < stops, axis=0) + new_args2 = [] + i = 0 + for arg in new_args: + if isinstance(arg, IntegerArray): + if mask.ndim == 0: + # Integer arrays always result in a 1 dimensional + # result, except when we have a scalar, we want to + # have a 0 dimensional result to match Integer(). + new_args2.append(IntegerArray(starts[i])) + elif mask.all(): + new_args2.append(IntegerArray(starts[i])) + else: + new_args2.append(IntegerArray(starts[i, mask])) + if new_args2[-1].isempty(): + raise ValueError("Indices do not intersect") + i += 1 + else: + new_args2.append(arg) + new_args = new_args2 + return Tuple(*new_args) + raise NotImplementedError(f"Tuple.as_subindex() is not implemented for type '{type(index).__name__}") + + def isempty(self, shape=None): + if shape is not None: + return 0 in self.newshape(shape) + + return any(i.isempty() for i in self.args) + + def selected_indices(self, shape): + shape = asshape(shape) + idx = self.expand(shape) + + def _zipped_array_indices(array_indices, shape, axis=0): + return zip(*[i.selected_indices(shape, axis=axis+j) + for j, i in enumerate(array_indices)]) + + def _flatten(l): + for element in l: + if isinstance(element, tuple): + yield from element + else: + yield element + + # We need to zip all array indices into a single iterator. + iterators = [] + array_indices = [] + axis = 0 + for i in idx.args: + if i == False: + return + elif i == True: + pass + elif isinstance(i, IntegerArray): + array_indices.append(i) + else: + # Tuples do not support array indices separated by slices, + # newaxes, or ellipses. Furthermore, if there are (non-scalar + # boolean) array indices, any Integer and BooleanArray indices + # are converted to IntegerArray. So we can assume all array + # indices are together in a single block, and this is the end + # of it. + if array_indices: + iterators.append(_zipped_array_indices(array_indices, + shape, axis=axis)) + axis += len(array_indices) + array_indices.clear() + if i != None: + iterators.append(i.selected_indices(shape, axis=axis)) + axis += 1 + if idx.args and isinstance(idx.args[-1], IntegerArray): + iterators.append(_zipped_array_indices(array_indices, + shape, axis=axis)) + + for i in itertools.product(*iterators): + yield Tuple(*_flatten(i)).reduce() + +# Imports at the bottom to avoid circular import issues +from .array import ArrayIndex +from .slice import Slice +from .integer import Integer +from .booleanarray import BooleanArray, _is_boolean_scalar +from .integerarray import IntegerArray diff --git a/venv/Lib/site-packages/numexpr-2.14.1.dist-info/INSTALLER b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/Lib/site-packages/numexpr-2.14.1.dist-info/METADATA b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/METADATA new file mode 100644 index 0000000..5480474 --- /dev/null +++ b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/METADATA @@ -0,0 +1,234 @@ +Metadata-Version: 2.4 +Name: numexpr +Version: 2.14.1 +Summary: Fast numerical expression evaluator for NumPy +Author-email: "David M. Cooke, Francesc Alted, and others" +Maintainer-email: Blosc Development Team +License-Expression: MIT +Project-URL: homepage, https://github.com/pydata/numexpr +Project-URL: documentation, https://numexpr.readthedocs.io +Project-URL: repository, https://github.com/pydata/numexpr +Classifier: Development Status :: 6 - Mature +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Information Technology +Classifier: Intended Audience :: Science/Research +Classifier: Programming Language :: Python +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: Unix +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 +Requires-Python: >=3.10 +Description-Content-Type: text/x-rst +License-File: LICENSE.txt +License-File: LICENSES/cpuinfo.txt +Requires-Dist: numpy>=1.23.0 +Dynamic: license-file + +====================================================== +NumExpr: Fast numerical expression evaluator for NumPy +====================================================== + +:Author: David M. Cooke, Francesc Alted, and others. +:Maintainer: Francesc Alted +:Contact: faltet@gmail.com +:URL: https://github.com/pydata/numexpr +:Documentation: http://numexpr.readthedocs.io/en/latest/ +:GitHub Actions: |actions| +:PyPi: |version| +:DOI: |doi| +:readthedocs: |docs| + +.. |actions| image:: https://github.com/pydata/numexpr/workflows/Build/badge.svg + :target: https://github.com/pydata/numexpr/actions +.. |travis| image:: https://travis-ci.org/pydata/numexpr.png?branch=master + :target: https://travis-ci.org/pydata/numexpr +.. |docs| image:: https://readthedocs.org/projects/numexpr/badge/?version=latest + :target: http://numexpr.readthedocs.io/en/latest +.. |doi| image:: https://zenodo.org/badge/doi/10.5281/zenodo.2483274.svg + :target: https://doi.org/10.5281/zenodo.2483274 +.. |version| image:: https://img.shields.io/pypi/v/numexpr + :target: https://pypi.python.org/pypi/numexpr + + +What is NumExpr? +---------------- + +NumExpr is a fast numerical expression evaluator for NumPy. With it, +expressions that operate on arrays (like :code:`'3*a+4*b'`) are accelerated +and use less memory than doing the same calculation in Python. + +In addition, its multi-threaded capabilities can make use of all your +cores -- which generally results in substantial performance scaling compared +to NumPy. + +Last but not least, numexpr can make use of Intel's VML (Vector Math +Library, normally integrated in its Math Kernel Library, or MKL). +This allows further acceleration of transcendent expressions. + + +How NumExpr achieves high performance +------------------------------------- + +The main reason why NumExpr achieves better performance than NumPy is +that it avoids allocating memory for intermediate results. This +results in better cache utilization and reduces memory access in +general. Due to this, NumExpr works best with large arrays. + +NumExpr parses expressions into its own op-codes that are then used by +an integrated computing virtual machine. The array operands are split +into small chunks that easily fit in the cache of the CPU and passed +to the virtual machine. The virtual machine then applies the +operations on each chunk. It's worth noting that all temporaries and +constants in the expression are also chunked. Chunks are distributed among +the available cores of the CPU, resulting in highly parallelized code +execution. + +The result is that NumExpr can get the most of your machine computing +capabilities for array-wise computations. Common speed-ups with regard +to NumPy are usually between 0.95x (for very simple expressions like +:code:`'a + 1'`) and 4x (for relatively complex ones like :code:`'a*b-4.1*a > 2.5*b'`), +although much higher speed-ups can be achieved for some functions and complex +math operations (up to 15x in some cases). + +NumExpr performs best on matrices that are too large to fit in L1 CPU cache. +In order to get a better idea on the different speed-ups that can be achieved +on your platform, run the provided benchmarks. + +Installation +------------ + +From wheels +^^^^^^^^^^^ + +NumExpr is available for install via `pip` for a wide range of platforms and +Python versions (which may be browsed at: https://pypi.org/project/numexpr/#files). +Installation can be performed as:: + + pip install numexpr + +If you are using the Anaconda or Miniconda distribution of Python you may prefer +to use the `conda` package manager in this case:: + + conda install numexpr + +From Source +^^^^^^^^^^^ + +On most \*nix systems your compilers will already be present. However if you +are using a virtual environment with a substantially newer version of Python than +your system Python you may be prompted to install a new version of `gcc` or `clang`. + +For Windows, you will need to install the Microsoft Visual C++ Build Tools +(which are free) first. The version depends on which version of Python you have +installed: + +https://wiki.python.org/moin/WindowsCompilers + +For Python 3.6+ simply installing the latest version of MSVC build tools should +be sufficient. Note that wheels found via pip do not include MKL support. Wheels +available via `conda` will have MKL, if the MKL backend is used for NumPy. + +See `requirements.txt` for the required version of NumPy. + +NumExpr is built in the standard Python way:: + + pip install [-e] . + +You can test `numexpr` with:: + + python -c "import numexpr; numexpr.test()" + +Do not test NumExpr in the source directory or you will generate import errors. + +Enable Intel® MKL support +^^^^^^^^^^^^^^^^^^^^^^^^^ + +NumExpr includes support for Intel's MKL library. This may provide better +performance on Intel architectures, mainly when evaluating transcendental +functions (trigonometrical, exponential, ...). + +If you have Intel's MKL, copy the `site.cfg.example` that comes with the +distribution to `site.cfg` and edit the latter file to provide correct paths to +the MKL libraries in your system. After doing this, you can proceed with the +usual building instructions listed above. + +Pay attention to the messages during the building process in order to know +whether MKL has been detected or not. Finally, you can check the speed-ups on +your machine by running the `bench/vml_timing.py` script (you can play with +different parameters to the `set_vml_accuracy_mode()` and `set_vml_num_threads()` +functions in the script so as to see how it would affect performance). + +Usage +----- + +:: + + >>> import numpy as np + >>> import numexpr as ne + + >>> a = np.arange(1e6) # Choose large arrays for better speedups + >>> b = np.arange(1e6) + + >>> ne.evaluate("a + 1") # a simple expression + array([ 1.00000000e+00, 2.00000000e+00, 3.00000000e+00, ..., + 9.99998000e+05, 9.99999000e+05, 1.00000000e+06]) + + >>> ne.evaluate("a * b - 4.1 * a > 2.5 * b") # a more complex one + array([False, False, False, ..., True, True, True], dtype=bool) + + >>> ne.evaluate("sin(a) + arcsinh(a/b)") # you can also use functions + array([ NaN, 1.72284457, 1.79067101, ..., 1.09567006, + 0.17523598, -0.09597844]) + + >>> s = np.array([b'abba', b'abbb', b'abbcdef']) + >>> ne.evaluate("b'abba' == s") # string arrays are supported too + array([ True, False, False], dtype=bool) + + +Free-threading support +---------------------- +Starting on CPython 3.13 onwards there is a new distribution that disables the +Global Interpreter Lock (GIL) altogether, thus increasing the performance yields +under multi-threaded conditions on a single interpreter, as opposed to having to use +multiprocessing. + +Whilst numexpr has been demonstrated to work under free-threaded +CPython, considerations need to be taken when using numexpr native parallel +implementation vs using Python threads directly in order to prevent oversubscription, +we recommend either using the main CPython interpreter thread to spawn multiple C threads +using the parallel numexpr API, or spawning multiple CPython threads that do not use +the parallel API. + +For more information about free-threaded CPython, we recommend visiting the following +`community Wiki ` + + +Documentation +------------- + +Please see the official documentation at `numexpr.readthedocs.io `_. +Included is a user guide, benchmark results, and the reference API. + + +Authors +------- + +Please see `AUTHORS.txt `_. + + +License +------- + +NumExpr is distributed under the `MIT `_ license. + + +.. Local Variables: +.. mode: text +.. coding: utf-8 +.. fill-column: 70 +.. End: diff --git a/venv/Lib/site-packages/numexpr-2.14.1.dist-info/RECORD b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/RECORD new file mode 100644 index 0000000..36fd578 --- /dev/null +++ b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/RECORD @@ -0,0 +1,44 @@ +numexpr-2.14.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +numexpr-2.14.1.dist-info/METADATA,sha256=ls09kU_Xe1Ytp92_vXBEWqyFPTtOOWEWZ7s4pKQPWKc,9274 +numexpr-2.14.1.dist-info/RECORD,, +numexpr-2.14.1.dist-info/WHEEL,sha256=JLOMsP7F5qtkAkINx5UnzbFguf8CqZeraV8o04b0I8I,101 +numexpr-2.14.1.dist-info/licenses/LICENSE.txt,sha256=SnJdGINg7OVJ13bFOvZ82as6h_-QY58kBCKcDLhPz4I,1214 +numexpr-2.14.1.dist-info/licenses/LICENSES/cpuinfo.txt,sha256=5waXkrKbBANJZiVbKphr_jHu1pQ_auPw2HBPiRjSQhA,1583 +numexpr-2.14.1.dist-info/top_level.txt,sha256=5R5OAhc7k6sUajqt4vp-368lWWhb23CoC6LltIMNqNA,8 +numexpr/__init__.py,sha256=fao9MhoWB9aYxv6t0pRTeOATxj0_mMRSqBKkMEs0Gcg,2406 +numexpr/__pycache__/__init__.cpython-311.pyc,, +numexpr/__pycache__/cpuinfo.cpython-311.pyc,, +numexpr/__pycache__/expressions.cpython-311.pyc,, +numexpr/__pycache__/necompiler.cpython-311.pyc,, +numexpr/__pycache__/utils.cpython-311.pyc,, +numexpr/__pycache__/version.cpython-311.pyc,, +numexpr/bespoke_functions.hpp,sha256=BmC5GKJYH6zuy3UpRt5tLNMlJP-t6TB0AdTxHdi3PkU,8673 +numexpr/complex_functions.hpp,sha256=KYKemDuukQcz_nBOmzM83LXm-VKmXfTUf5B6vIhrqpg,11643 +numexpr/cpuinfo.py,sha256=4LXmEE2jGLr6NQJ9p49nXmQaDBN55VMHd_WIj-lvoH0,26037 +numexpr/expressions.py,sha256=WhjFJTa1_AOZvYQ0Yz20VIzU8dOHaKBKDPZAQTaAUxw,17750 +numexpr/functions.hpp,sha256=iojJmwUfpEY4-FHmqRrYipxorAOp9TcqWffYwp4o3FQ,9732 +numexpr/interp_body.cpp,sha256=u1JZhnyI_D9-b-i2QpCatV0_37G-1dziObyCM5ZN3pg,24110 +numexpr/interpreter.cp311-win_amd64.pyd,sha256=5PRAyjEXVbsghFCPa_g_jghFZK9nPGBhpCmDPDxMAbI,156672 +numexpr/interpreter.cpp,sha256=OCQ2lxExNcyKFkZRnvZnACpLhoZuj8eAIDaE3b09CKg,52329 +numexpr/interpreter.hpp,sha256=QKREGOTQWRXH4PA-x2cBYppytW0hAOAccJZ1QAI_GJ4,3207 +numexpr/missing_posix_functions.hpp,sha256=94NeOr3ya-v_ucWOjKhS8SXjHykwgoD-GqAw3FYrjxQ,2052 +numexpr/module.cpp,sha256=huJRU4BwsyEdL8kuy2mljg_5fL982wIj0tXTN6AiGjw,17367 +numexpr/module.hpp,sha256=E6OXYHYhr-X_RVjDfBkRqY2CF8PSdQdrOOuVCOJrvrY,2259 +numexpr/msvc_function_stubs.hpp,sha256=n1gKFbL4ckohiH63bFL5-jez-ye1o8NNX8WH1RIAmT0,6326 +numexpr/necompiler.py,sha256=gqw5jPJ45i6dfqCC0fq9h4M5kADVhQbPLfi4fd-XWEU,35916 +numexpr/numexpr_config.hpp,sha256=z1M5UCS5WDyEcOPVtKUVDm4F3MbYueqkdGyAD1gcP84,2620 +numexpr/numexpr_object.cpp,sha256=YPwKP-CDQJ20IDJjguidoXvnnx1vWhlPPzvbxJSaGr0,15039 +numexpr/numexpr_object.hpp,sha256=Hmxg9M1QDvaHHtjtK0KPlx1Tzm2ljWKpSlIAKbmgBUc,1069 +numexpr/opcodes.hpp,sha256=JgshePorFidmfMf225xh8Z3xWmsCLfQbqOC_1mjUVnU,9015 +numexpr/str-two-way.hpp,sha256=isUevqiWhtvlKFgkNEKnXMLDetkch4bQlrLE1WlaphE,14863 +numexpr/tests/__init__.py,sha256=eo43wKuAqH3RMzC8arUSvoDn0sARBmtyv3-OcXuMIvY,461 +numexpr/tests/__pycache__/__init__.cpython-311.pyc,, +numexpr/tests/__pycache__/conftest.cpython-311.pyc,, +numexpr/tests/__pycache__/test_numexpr.cpython-311.pyc,, +numexpr/tests/conftest.py,sha256=JGxCdrxLcDQabMyDxu0c5bIY0jTBcQNXmhsF36iaZHQ,525 +numexpr/tests/test_numexpr.py,sha256=yUkBBWWzxoDUHaqrE6mxmF4HGph58XKLSd5r7dTc6K4,59938 +numexpr/utils.py,sha256=IfY1xEJlQ609ROpDxpAS7eGRGln4gt4u4XdWxq1zyQM,10190 +numexpr/version.py,sha256=ffHlYI0UiHkJcrqTry_xoOVbnDIphMR_3srVKg369dI,143 +numexpr/win32/pthread.c,sha256=Mb6W4EgEw9JyRs6IO63X4XjiZdr3DeWDX2tteeHRQGg,7303 +numexpr/win32/pthread.h,sha256=Zy2gFfply5xOt5LLzXFlU7rvJBPSUb6t6xijcn-lGf4,4155 +numexpr/win32/stdint.h,sha256=wdioc67fan2iNd5Or8H04HJQl53zOJ5KmBK2G1JoD3Y,7559 diff --git a/venv/Lib/site-packages/numexpr-2.14.1.dist-info/WHEEL b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/WHEEL new file mode 100644 index 0000000..8f98e0a --- /dev/null +++ b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (80.9.0) +Root-Is-Purelib: false +Tag: cp311-cp311-win_amd64 + diff --git a/venv/Lib/site-packages/numexpr-2.14.1.dist-info/licenses/LICENSE.txt b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/licenses/LICENSE.txt new file mode 100644 index 0000000..de9a582 --- /dev/null +++ b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/licenses/LICENSE.txt @@ -0,0 +1,21 @@ +Copyright (c) 2007,2008 David M. Cooke +Copyright (c) 2009,2010 Francesc Alted +Copyright (c) 2011- See AUTHORS.txt + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/venv/Lib/site-packages/numexpr-2.14.1.dist-info/licenses/LICENSES/cpuinfo.txt b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/licenses/LICENSES/cpuinfo.txt new file mode 100644 index 0000000..fff4104 --- /dev/null +++ b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/licenses/LICENSES/cpuinfo.txt @@ -0,0 +1,31 @@ +Copyright statement for `cpuinfo` module. + +Copyright 2002 Pearu Peterson all rights reserved, +Pearu Peterson + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Pearu Peterson nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +HIS SOFTWARE IS PROVIDED BY PEARU PETERSON ''AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL PEARU PETERSON BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/Lib/site-packages/numexpr-2.14.1.dist-info/top_level.txt b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/top_level.txt new file mode 100644 index 0000000..ba1904b --- /dev/null +++ b/venv/Lib/site-packages/numexpr-2.14.1.dist-info/top_level.txt @@ -0,0 +1 @@ +numexpr diff --git a/venv/Lib/site-packages/numexpr/__init__.py b/venv/Lib/site-packages/numexpr/__init__.py new file mode 100644 index 0000000..63bb9e9 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/__init__.py @@ -0,0 +1,66 @@ +################################################################### +# Numexpr - Fast numerical array expression evaluator for NumPy. +# +# License: MIT +# Author: See AUTHORS.txt +# +# See LICENSE.txt and LICENSES/*.txt for details about copyright and +# rights to use. +#################################################################### + +""" +Numexpr is a fast numerical expression evaluator for NumPy. With it, +expressions that operate on arrays (like "3*a+4*b") are accelerated +and use less memory than doing the same calculation in Python. + +See: + +https://github.com/pydata/numexpr + +for more info about it. + +""" + +from numexpr.interpreter import __BLOCK_SIZE1__, MAX_THREADS, use_vml + +is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE + +# cpuinfo imports were moved into the test submodule function that calls them +# to improve import times. + +from numexpr.expressions import E +from numexpr.necompiler import (NumExpr, disassemble, evaluate, re_evaluate, + validate) +from numexpr.utils import (_init_num_threads, detect_number_of_cores, + detect_number_of_threads, get_num_threads, + get_vml_version, set_num_threads, + set_vml_accuracy_mode, set_vml_num_threads) + +# Detect the number of cores +ncores = detect_number_of_cores() +# Initialize the number of threads to be used +nthreads = _init_num_threads() +# The default for VML is 1 thread (see #39) +# set_vml_num_threads(1) + +from . import version + +__version__ = version.version + +def print_versions(): + """Print the versions of software that numexpr relies on.""" + try: + import numexpr.tests + return numexpr.tests.print_versions() + except ImportError: + # To maintain Python 2.6 compatibility we have simple error handling + raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') + +def test(verbosity=1): + """Run all the tests in the test suite.""" + try: + import numexpr.tests + return numexpr.tests.test(verbosity=verbosity) + except ImportError: + # To maintain Python 2.6 compatibility we have simple error handling + raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') diff --git a/venv/Lib/site-packages/numexpr/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/numexpr/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..b90a717 Binary files /dev/null and b/venv/Lib/site-packages/numexpr/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/numexpr/__pycache__/cpuinfo.cpython-311.pyc b/venv/Lib/site-packages/numexpr/__pycache__/cpuinfo.cpython-311.pyc new file mode 100644 index 0000000..a8637d2 Binary files /dev/null and b/venv/Lib/site-packages/numexpr/__pycache__/cpuinfo.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/numexpr/__pycache__/expressions.cpython-311.pyc b/venv/Lib/site-packages/numexpr/__pycache__/expressions.cpython-311.pyc new file mode 100644 index 0000000..c402cfb Binary files /dev/null and b/venv/Lib/site-packages/numexpr/__pycache__/expressions.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/numexpr/__pycache__/necompiler.cpython-311.pyc b/venv/Lib/site-packages/numexpr/__pycache__/necompiler.cpython-311.pyc new file mode 100644 index 0000000..bf231b9 Binary files /dev/null and b/venv/Lib/site-packages/numexpr/__pycache__/necompiler.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/numexpr/__pycache__/utils.cpython-311.pyc b/venv/Lib/site-packages/numexpr/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000..f19be83 Binary files /dev/null and b/venv/Lib/site-packages/numexpr/__pycache__/utils.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/numexpr/__pycache__/version.cpython-311.pyc b/venv/Lib/site-packages/numexpr/__pycache__/version.cpython-311.pyc new file mode 100644 index 0000000..36f94cb Binary files /dev/null and b/venv/Lib/site-packages/numexpr/__pycache__/version.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/numexpr/bespoke_functions.hpp b/venv/Lib/site-packages/numexpr/bespoke_functions.hpp new file mode 100644 index 0000000..26f784e --- /dev/null +++ b/venv/Lib/site-packages/numexpr/bespoke_functions.hpp @@ -0,0 +1,339 @@ +#include +#include +#include +#include +#include +#include "numexpr_config.hpp" // isnan definitions + +// Generic sign function +inline int signi(int x) {return (0 < x) - (x < 0);} +inline long signl(long x) {return (0 < x) - (x < 0);} +inline double sign(double x){ + // Floats: -1.0, 0.0, +1.0, NaN stays NaN + if (isnand(x)) {return NAN;} + if (x > 0) {return 1;} + if (x < 0) {return -1;} + return 0; // handles +0.0 and -0.0 + } +inline float signf(float x){ + // Floats: -1.0, 0.0, +1.0, NaN stays NaN + if (isnanf_(x)) {return NAN;} + if (x > 0) {return 1;} + if (x < 0) {return -1;} + return 0; // handles +0.0 and -0.0 + } + +// round function for ints +inline int rinti(int x) {return x;} +inline long rintl(long x) {return x;} +// abs function for ints +inline int fabsi(int x) {return x<0 ? -x: x;} +inline long fabsl(long x) {return x<0 ? -x: x;} +// fmod function for ints +// TODO: Have to add FUNC_III, FUNC_LLL signatures to functions.hpp to enable these +// inline int fmodi(int x, int y) {return (int)fmodf((float)x, (float)y);} +// inline long fmodl(long x, long y) {return (long)fmodf((long)x, (long)y);} + +#ifdef USE_VML +// To match Numpy behaviour for NaNs +static void vsFmax_(MKL_INT n, const float* x1, const float* x2, float* dest) +{ + vsFmax(n, x1, x2, dest); + MKL_INT j; + for (j=0; j +#include // NAN +#include + +/* constants */ +static std::complex nc_1(1., 0.); +static std::complex nc_half(0.5, 0.); +static std::complex nc_i(0., 1.); +static std::complex nc_i2(0., 0.5); +/* +static std::complex nc_mi = {0., -1.}; +static std::complex nc_pi2 = {M_PI/2., 0.}; +*/ + +/* *************************** WARNING ***************************** +Due to the way Numexpr places the results of operations, the *x and *r +pointers do point to the same address (apparently this doesn't happen +in NumPy). So, measures should be taken so as to not to reuse *x +after the first *r has been overwritten. +********************************************************************* +*/ + +static void +nc_assign(std::complex *x, std::complex *r) +{ + r->real(x->real()); + r->imag(x->imag()); + return; +} + +static void +nc_sum(std::complex *a, std::complex *b, std::complex *r) +{ + r->real(a->real() + b->real()); + r->imag(a->imag() + b->imag()); + return; +} + +static void +nc_diff(std::complex *a, std::complex *b, std::complex *r) +{ + r->real(a->real() - b->real()); + r->imag(a->imag() - b->imag()); + return; +} + +static void +nc_neg(std::complex *a, std::complex *r) +{ + r->real(-a->real()); + r->imag(-a->imag()); + return; +} + +static void +nc_conj(std::complex *a, std::complex *r) +{ + r->real(a->real()); + r->imag(-a->imag()); + return; +} + +// Needed for allowing the internal casting in numexpr machinery for +// conjugate operations +inline float fconjf(float x) +{ + return x; +} + +// Needed for allowing the internal casting in numexpr machinery for +// conjugate operations +inline double fconj(double x) +{ + return x; +} + +static void +nc_prod(std::complex *a, std::complex *b, std::complex *r) +{ + double ar=a->real(), br=b->real(), ai=a->imag(), bi=b->imag(); + r->real(ar*br - ai*bi); + r->imag(ar*bi + ai*br); + return; +} + +static void +nc_quot(std::complex *a, std::complex *b, std::complex *r) +{ + double ar=a->real(), br=b->real(), ai=a->imag(), bi=b->imag(); + double d = br*br + bi*bi; + r->real((ar*br + ai*bi)/d); + r->imag((ai*br - ar*bi)/d); + return; +} + +static void +nc_sqrt(std::complex *x, std::complex *r) +{ + double s,d; + if (x->real() == 0. && x->imag() == 0.) + *r = *x; + else { + s = sqrt((fabs(x->real()) + hypot(x->real(),x->imag()))/2); + d = x->imag()/(2*s); + if (x->real() > 0.) { + r->real(s); + r->imag(d); + } + else if (x->imag() >= 0.) { + r->real(d); + r->imag(s); + } + else { + r->real(-d); + r->imag(-s); + } + } + return; +} + +static void +nc_log(std::complex *x, std::complex *r) +{ + double l = hypot(x->real(),x->imag()); + r->imag(atan2(x->imag(), x->real())); + r->real(log(l)); + return; +} + +static void +nc_log1p(std::complex *x, std::complex *r) +{ + double l = hypot(x->real() + 1.0,x->imag()); + r->imag(atan2(x->imag(), x->real() + 1.0)); + r->real(log(l)); + return; +} + +static void +nc_exp(std::complex *x, std::complex *r) +{ + double a = exp(x->real()); + r->real(a*cos(x->imag())); + r->imag(a*sin(x->imag())); + return; +} + +static void +nc_expm1(std::complex *x, std::complex *r) +{ + double a = sin(x->imag() / 2); + double b = exp(x->real()); + r->real(expm1(x->real()) * cos(x->imag()) - 2 * a * a); + r->imag(b * sin(x->imag())); + return; +} + +static void +nc_pow(std::complex *a, std::complex *b, std::complex *r) +{ + npy_intp n; + double ar=a->real(), br=b->real(), ai=a->imag(), bi=b->imag(); + + if (br == 0. && bi == 0.) { + r->real(1.); + r->imag(0.); + return; + } + if (ar == 0. && ai == 0.) { + r->real(0.); + r->imag(0.); + return; + } + if (bi == 0 && (n=(npy_intp)br) == br) { + if (n > -100 && n < 100) { + std::complex p, aa; + npy_intp mask = 1; + if (n < 0) n = -n; + aa = nc_1; + p.real(ar); p.imag(ai); + while (1) { + if (n & mask) + nc_prod(&aa,&p,&aa); + mask <<= 1; + if (n < mask || mask <= 0) break; + nc_prod(&p,&p,&p); + } + r->real(aa.real()); r->imag(aa.imag()); + if (br < 0) nc_quot(&nc_1, r, r); + return; + } + } + /* complexobject.c uses an inline version of this formula + investigate whether this had better performance or accuracy */ + nc_log(a, r); + nc_prod(r, b, r); + nc_exp(r, r); + return; +} + + +static void +nc_prodi(std::complex *x, std::complex *r) +{ + double xr = x->real(); + r->real(-x->imag()); + r->imag(xr); + return; +} + + +static void +nc_acos(std::complex *x, std::complex *r) +{ + std::complex a, *pa=&a; + + nc_assign(x, pa); + nc_prod(x,x,r); + nc_diff(&nc_1, r, r); + nc_sqrt(r, r); + nc_prodi(r, r); + nc_sum(pa, r, r); + nc_log(r, r); + nc_prodi(r, r); + nc_neg(r, r); + return; + /* return nc_neg(nc_prodi(nc_log(nc_sum(x,nc_prod(nc_i, + nc_sqrt(nc_diff(nc_1,nc_prod(x,x)))))))); + */ +} + +static void +nc_acosh(std::complex *x, std::complex *r) +{ + std::complex t, a, *pa=&a; + + nc_assign(x, pa); + nc_sum(x, &nc_1, &t); + nc_sqrt(&t, &t); + nc_diff(x, &nc_1, r); + nc_sqrt(r, r); + nc_prod(&t, r, r); + nc_sum(pa, r, r); + nc_log(r, r); + return; + /* + return nc_log(nc_sum(x, + nc_prod(nc_sqrt(nc_sum(x,nc_1)), nc_sqrt(nc_diff(x,nc_1))))); + */ +} + +static void +nc_asin(std::complex *x, std::complex *r) +{ + std::complex a, *pa=&a; + nc_prodi(x, pa); + nc_prod(x, x, r); + nc_diff(&nc_1, r, r); + nc_sqrt(r, r); + nc_sum(pa, r, r); + nc_log(r, r); + nc_prodi(r, r); + nc_neg(r, r); + return; + /* + return nc_neg(nc_prodi(nc_log(nc_sum(nc_prod(nc_i,x), + nc_sqrt(nc_diff(nc_1,nc_prod(x,x))))))); + */ +} + + +static void +nc_asinh(std::complex *x, std::complex *r) +{ + std::complex a, *pa=&a; + nc_assign(x, pa); + nc_prod(x, x, r); + nc_sum(&nc_1, r, r); + nc_sqrt(r, r); + nc_sum(r, pa, r); + nc_log(r, r); + return; + /* + return nc_log(nc_sum(nc_sqrt(nc_sum(nc_1,nc_prod(x,x))),x)); + */ +} + +static void +nc_atan(std::complex *x, std::complex *r) +{ + std::complex a, *pa=&a; + nc_diff(&nc_i, x, pa); + nc_sum(&nc_i, x, r); + nc_quot(r, pa, r); + nc_log(r,r); + nc_prod(&nc_i2, r, r); + return; + /* + return nc_prod(nc_i2,nc_log(nc_quot(nc_sum(nc_i,x),nc_diff(nc_i,x)))); + */ +} + +static void +nc_atanh(std::complex *x, std::complex *r) +{ + std::complex a, b, *pa=&a, *pb=&b; + nc_assign(x, pa); + nc_diff(&nc_1, pa, r); + nc_sum(&nc_1, pa, pb); + nc_quot(pb, r, r); + nc_log(r, r); + nc_prod(&nc_half, r, r); + return; + /* + return nc_prod(nc_half,nc_log(nc_quot(nc_sum(nc_1,x),nc_diff(nc_1,x)))); + */ +} + +static void +nc_cos(std::complex *x, std::complex *r) +{ + double xr=x->real(), xi=x->imag(); + r->real(cos(xr)*cosh(xi)); + r->imag(-sin(xr)*sinh(xi)); + return; +} + +static void +nc_cosh(std::complex *x, std::complex *r) +{ + double xr=x->real(), xi=x->imag(); + r->real(cos(xi)*cosh(xr)); + r->imag(sin(xi)*sinh(xr)); + return; +} + + +#define M_LOG10_E 0.434294481903251827651128918916605082294397 +#define M_LOG2_E 1.44269504088896340735992468100189213742664 + + +static void +nc_log10(std::complex *x, std::complex *r) +{ + nc_log(x, r); + r->real(r->real() * M_LOG10_E); + r->imag(r->imag() * M_LOG10_E); + return; +} + +static void +nc_log2(std::complex *x, std::complex *r) +{ + nc_log(x, r); + r->real(r->real() * M_LOG2_E); + r->imag(r->imag() * M_LOG2_E); + return; +} + +static void +nc_sin(std::complex *x, std::complex *r) +{ + double xr=x->real(), xi=x->imag(); + r->real(sin(xr)*cosh(xi)); + r->imag(cos(xr)*sinh(xi)); + return; +} + +static void +nc_sinh(std::complex *x, std::complex *r) +{ + double xr=x->real(), xi=x->imag(); + r->real(cos(xi)*sinh(xr)); + r->imag(sin(xi)*cosh(xr)); + return; +} + +static void +nc_tan(std::complex *x, std::complex *r) +{ + double xr = x->real(); + double xi = x->imag(); + double imag_part; + + double denom = cos(2*xr) + cosh(2*xi); + // handle overflows + if (xi > 20) { + imag_part = 1.0 / (1.0 + exp(-4*xi)); + } else if (xi < -20) { + imag_part = -1.0 / (1.0 + exp(4*xi)); + } else { + imag_part = sinh(2*xi) / denom; + } + double real_part = sin(2*xr) / denom; + + r->real(real_part); + r->imag(imag_part); + return; +} + +static void +nc_tanh(std::complex *x, std::complex *r) +{ + double xr = x->real(); + double xi = x->imag(); + double real_part; + double denom = cosh(2*xr) + cos(2*xi); + // handle overflows + if (xr > 20) { + real_part = 1.0 / (1.0 + exp(-4*xr)); + } else if (xr < -20) { + real_part = -1.0 / (1.0 + exp(4*xr)); + } else { + real_part = sinh(2*xr) / denom; + } + double imag_part = sin(2*xi) / denom; + + r->real(real_part); + r->imag(imag_part); + return; +} + +static void +nc_abs(std::complex *x, std::complex *r) +{ + r->real(sqrt(x->real()*x->real() + x->imag()*x->imag())); + r->imag(0); +} + +static void +nc_rint(std::complex *x, std::complex *r) +{ + r->real(rint(x->real())); + r->imag(rint(x->imag())); +} + +static bool +nc_isinf(std::complex *x) +{ + double xr=x->real(), xi=x->imag(); + bool bi,br; + bi = isinfd(xi); + br = isinfd(xr); + return bi || br; +} + +static bool +nc_isnan(std::complex *x) +{ + double xr=x->real(), xi=x->imag(); + bool bi,br; + bi = isnand(xi); + br = isnand(xr); + return bi || br; +} + +static bool +nc_isfinite(std::complex *x) +{ + double xr=x->real(), xi=x->imag(); + bool bi,br; + bi = isfinited(xi); + br = isfinited(xr); + return bi && br; +} + +static void +nc_sign(std::complex *x, std::complex *r) +{ + if (nc_isnan(x)){ + r->real(NAN); + r->imag(NAN); + } + std::complex mag; + nc_abs(x, &mag); + if (mag.real() == 0){ + r->real(0); + r->imag(0); + } + else{ + r->real(x->real()/mag.real()); + r->imag(x->imag()/mag.real()); + } +} + +#endif // NUMEXPR_COMPLEX_FUNCTIONS_HPP diff --git a/venv/Lib/site-packages/numexpr/cpuinfo.py b/venv/Lib/site-packages/numexpr/cpuinfo.py new file mode 100644 index 0000000..897a4ca --- /dev/null +++ b/venv/Lib/site-packages/numexpr/cpuinfo.py @@ -0,0 +1,861 @@ +################################################################### +# cpuinfo - Get information about CPU +# +# License: BSD +# Author: Pearu Peterson +# +# See LICENSES/cpuinfo.txt for details about copyright and +# rights to use. +#################################################################### + +""" +cpuinfo + +Copyright 2002 Pearu Peterson all rights reserved, +Pearu Peterson +Permission to use, modify, and distribute this software is given under the +terms of the NumPy (BSD style) license. See LICENSE.txt that came with +this distribution for specifics. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +Pearu Peterson +""" + +__all__ = ['cpu'] + +import inspect +import os +import platform +import re +import subprocess +import sys +import types +import warnings + +is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE + +def getoutput(cmd, successful_status=(0,), stacklevel=1): + try: + p = subprocess.Popen(cmd, stdout=subprocess.PIPE) + output, _ = p.communicate() + status = p.returncode + except EnvironmentError as e: + warnings.warn(str(e), UserWarning, stacklevel=stacklevel) + return False, '' + if os.WIFEXITED(status) and os.WEXITSTATUS(status) in successful_status: + return True, output + return False, output + + +def command_info(successful_status=(0,), stacklevel=1, **kw): + info = {} + for key in kw: + ok, output = getoutput(kw[key], successful_status=successful_status, + stacklevel=stacklevel + 1) + if ok: + info[key] = output.strip() + return info + + +def command_by_line(cmd, successful_status=(0,), stacklevel=1): + ok, output = getoutput(cmd, successful_status=successful_status, + stacklevel=stacklevel + 1) + if not ok: + return + + # XXX: check + output = output.decode('ascii') + + for line in output.splitlines(): + yield line.strip() + + +def key_value_from_command(cmd, sep, successful_status=(0,), + stacklevel=1): + d = {} + for line in command_by_line(cmd, successful_status=successful_status, + stacklevel=stacklevel + 1): + l = [s.strip() for s in line.split(sep, 1)] + if len(l) == 2: + d[l[0]] = l[1] + return d + + +class CPUInfoBase(object): + """Holds CPU information and provides methods for requiring + the availability of various CPU features. + """ + + def _try_call(self, func): + try: + return func() + except: + pass + + def __getattr__(self, name): + if not name.startswith('_'): + if hasattr(self, '_' + name): + attr = getattr(self, '_' + name) + if inspect.ismethod(attr): + return lambda func=self._try_call, attr=attr: func(attr) + else: + return lambda: None + raise AttributeError(name) + + def _getNCPUs(self): + return 1 + + def __get_nbits(self): + abits = platform.architecture()[0] + nbits = re.compile(r'(\d+)bit').search(abits).group(1) + return nbits + + def _is_32bit(self): + return self.__get_nbits() == '32' + + def _is_64bit(self): + return self.__get_nbits() == '64' + + +class LinuxCPUInfo(CPUInfoBase): + info = None + + def __init__(self): + if self.info is not None: + return + info = [{}] + ok, output = getoutput(['uname', '-m']) + if ok: + info[0]['uname_m'] = output.strip() + try: + fo = open('/proc/cpuinfo') + except EnvironmentError as e: + warnings.warn(str(e), UserWarning) + else: + for line in fo: + name_value = [s.strip() for s in line.split(':', 1)] + if len(name_value) != 2: + continue + name, value = name_value + if not info or name in info[-1]: # next processor + info.append({}) + info[-1][name] = value + fo.close() + self.__class__.info = info + + def _not_impl(self): + pass + + # Athlon + + def _is_AMD(self): + return self.info[0]['vendor_id'] == 'AuthenticAMD' + + def _is_AthlonK6_2(self): + return self._is_AMD() and self.info[0]['model'] == '2' + + def _is_AthlonK6_3(self): + return self._is_AMD() and self.info[0]['model'] == '3' + + def _is_AthlonK6(self): + return re.match(r'.*?AMD-K6', self.info[0]['model name']) is not None + + def _is_AthlonK7(self): + return re.match(r'.*?AMD-K7', self.info[0]['model name']) is not None + + def _is_AthlonMP(self): + return re.match(r'.*?Athlon\(tm\) MP\b', + self.info[0]['model name']) is not None + + def _is_AMD64(self): + return self.is_AMD() and self.info[0]['family'] == '15' + + def _is_Athlon64(self): + return re.match(r'.*?Athlon\(tm\) 64\b', + self.info[0]['model name']) is not None + + def _is_AthlonHX(self): + return re.match(r'.*?Athlon HX\b', + self.info[0]['model name']) is not None + + def _is_Opteron(self): + return re.match(r'.*?Opteron\b', + self.info[0]['model name']) is not None + + def _is_Hammer(self): + return re.match(r'.*?Hammer\b', + self.info[0]['model name']) is not None + + # Alpha + + def _is_Alpha(self): + return self.info[0]['cpu'] == 'Alpha' + + def _is_EV4(self): + return self.is_Alpha() and self.info[0]['cpu model'] == 'EV4' + + def _is_EV5(self): + return self.is_Alpha() and self.info[0]['cpu model'] == 'EV5' + + def _is_EV56(self): + return self.is_Alpha() and self.info[0]['cpu model'] == 'EV56' + + def _is_PCA56(self): + return self.is_Alpha() and self.info[0]['cpu model'] == 'PCA56' + + # Intel + + #XXX + _is_i386 = _not_impl + + def _is_Intel(self): + return self.info[0]['vendor_id'] == 'GenuineIntel' + + def _is_i486(self): + return self.info[0]['cpu'] == 'i486' + + def _is_i586(self): + return self.is_Intel() and self.info[0]['cpu family'] == '5' + + def _is_i686(self): + return self.is_Intel() and self.info[0]['cpu family'] == '6' + + def _is_Celeron(self): + return re.match(r'.*?Celeron', + self.info[0]['model name']) is not None + + def _is_Pentium(self): + return re.match(r'.*?Pentium', + self.info[0]['model name']) is not None + + def _is_PentiumII(self): + return re.match(r'.*?Pentium.*?II\b', + self.info[0]['model name']) is not None + + def _is_PentiumPro(self): + return re.match(r'.*?PentiumPro\b', + self.info[0]['model name']) is not None + + def _is_PentiumMMX(self): + return re.match(r'.*?Pentium.*?MMX\b', + self.info[0]['model name']) is not None + + def _is_PentiumIII(self): + return re.match(r'.*?Pentium.*?III\b', + self.info[0]['model name']) is not None + + def _is_PentiumIV(self): + return re.match(r'.*?Pentium.*?(IV|4)\b', + self.info[0]['model name']) is not None + + def _is_PentiumM(self): + return re.match(r'.*?Pentium.*?M\b', + self.info[0]['model name']) is not None + + def _is_Prescott(self): + return self.is_PentiumIV() and self.has_sse3() + + def _is_Nocona(self): + return (self.is_Intel() and + self.info[0]['cpu family'] in ('6', '15') and + # two s sse3; three s ssse3 not the same thing, this is fine + (self.has_sse3() and not self.has_ssse3()) and + re.match(r'.*?\blm\b', self.info[0]['flags']) is not None) + + def _is_Core2(self): + return (self.is_64bit() and self.is_Intel() and + re.match(r'.*?Core\(TM\)2\b', + self.info[0]['model name']) is not None) + + def _is_Itanium(self): + return re.match(r'.*?Itanium\b', + self.info[0]['family']) is not None + + def _is_XEON(self): + return re.match(r'.*?XEON\b', + self.info[0]['model name'], re.IGNORECASE) is not None + + _is_Xeon = _is_XEON + + # Power + def _is_Power(self): + return re.match(r'.*POWER.*', + self.info[0]['cpu']) is not None + + def _is_Power7(self): + return re.match(r'.*POWER7.*', + self.info[0]['cpu']) is not None + + def _is_Power8(self): + return re.match(r'.*POWER8.*', + self.info[0]['cpu']) is not None + + def _is_Power9(self): + return re.match(r'.*POWER9.*', + self.info[0]['cpu']) is not None + + def _has_Altivec(self): + return re.match(r'.*altivec\ supported.*', + self.info[0]['cpu']) is not None + + # Varia + + def _is_singleCPU(self): + return len(self.info) == 1 + + def _getNCPUs(self): + return len(self.info) + + def _has_fdiv_bug(self): + return self.info[0]['fdiv_bug'] == 'yes' + + def _has_f00f_bug(self): + return self.info[0]['f00f_bug'] == 'yes' + + def _has_mmx(self): + return re.match(r'.*?\bmmx\b', self.info[0]['flags']) is not None + + def _has_sse(self): + return re.match(r'.*?\bsse\b', self.info[0]['flags']) is not None + + def _has_sse2(self): + return re.match(r'.*?\bsse2\b', self.info[0]['flags']) is not None + + def _has_sse3(self): + return re.match(r'.*?\bpni\b', self.info[0]['flags']) is not None + + def _has_ssse3(self): + return re.match(r'.*?\bssse3\b', self.info[0]['flags']) is not None + + def _has_3dnow(self): + return re.match(r'.*?\b3dnow\b', self.info[0]['flags']) is not None + + def _has_3dnowext(self): + return re.match(r'.*?\b3dnowext\b', self.info[0]['flags']) is not None + + +class IRIXCPUInfo(CPUInfoBase): + info = None + + def __init__(self): + if self.info is not None: + return + info = key_value_from_command('sysconf', sep=' ', + successful_status=(0, 1)) + self.__class__.info = info + + def _not_impl(self): + pass + + def _is_singleCPU(self): + return self.info.get('NUM_PROCESSORS') == '1' + + def _getNCPUs(self): + return int(self.info.get('NUM_PROCESSORS', 1)) + + def __cputype(self, n): + return self.info.get('PROCESSORS').split()[0].lower() == 'r%s' % (n) + + def _is_r2000(self): + return self.__cputype(2000) + + def _is_r3000(self): + return self.__cputype(3000) + + def _is_r3900(self): + return self.__cputype(3900) + + def _is_r4000(self): + return self.__cputype(4000) + + def _is_r4100(self): + return self.__cputype(4100) + + def _is_r4300(self): + return self.__cputype(4300) + + def _is_r4400(self): + return self.__cputype(4400) + + def _is_r4600(self): + return self.__cputype(4600) + + def _is_r4650(self): + return self.__cputype(4650) + + def _is_r5000(self): + return self.__cputype(5000) + + def _is_r6000(self): + return self.__cputype(6000) + + def _is_r8000(self): + return self.__cputype(8000) + + def _is_r10000(self): + return self.__cputype(10000) + + def _is_r12000(self): + return self.__cputype(12000) + + def _is_rorion(self): + return self.__cputype('orion') + + def get_ip(self): + try: + return self.info.get('MACHINE') + except: + pass + + def __machine(self, n): + return self.info.get('MACHINE').lower() == 'ip%s' % (n) + + def _is_IP19(self): + return self.__machine(19) + + def _is_IP20(self): + return self.__machine(20) + + def _is_IP21(self): + return self.__machine(21) + + def _is_IP22(self): + return self.__machine(22) + + def _is_IP22_4k(self): + return self.__machine(22) and self._is_r4000() + + def _is_IP22_5k(self): + return self.__machine(22) and self._is_r5000() + + def _is_IP24(self): + return self.__machine(24) + + def _is_IP25(self): + return self.__machine(25) + + def _is_IP26(self): + return self.__machine(26) + + def _is_IP27(self): + return self.__machine(27) + + def _is_IP28(self): + return self.__machine(28) + + def _is_IP30(self): + return self.__machine(30) + + def _is_IP32(self): + return self.__machine(32) + + def _is_IP32_5k(self): + return self.__machine(32) and self._is_r5000() + + def _is_IP32_10k(self): + return self.__machine(32) and self._is_r10000() + + +class DarwinCPUInfo(CPUInfoBase): + info = None + + def __init__(self): + if self.info is not None: + return + info = command_info(arch='arch', + machine='machine') + info['sysctl_hw'] = key_value_from_command(['sysctl', 'hw'], sep='=') + self.__class__.info = info + + def _not_impl(self): pass + + def _getNCPUs(self): + return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) + + def _is_Power_Macintosh(self): + return self.info['sysctl_hw']['hw.machine'] == 'Power Macintosh' + + def _is_i386(self): + return self.info['arch'] == 'i386' + + def _is_ppc(self): + return self.info['arch'] == 'ppc' + + def __machine(self, n): + return self.info['machine'] == 'ppc%s' % n + + def _is_ppc601(self): return self.__machine(601) + + def _is_ppc602(self): return self.__machine(602) + + def _is_ppc603(self): return self.__machine(603) + + def _is_ppc603e(self): return self.__machine('603e') + + def _is_ppc604(self): return self.__machine(604) + + def _is_ppc604e(self): return self.__machine('604e') + + def _is_ppc620(self): return self.__machine(620) + + def _is_ppc630(self): return self.__machine(630) + + def _is_ppc740(self): return self.__machine(740) + + def _is_ppc7400(self): return self.__machine(7400) + + def _is_ppc7450(self): return self.__machine(7450) + + def _is_ppc750(self): return self.__machine(750) + + def _is_ppc403(self): return self.__machine(403) + + def _is_ppc505(self): return self.__machine(505) + + def _is_ppc801(self): return self.__machine(801) + + def _is_ppc821(self): return self.__machine(821) + + def _is_ppc823(self): return self.__machine(823) + + def _is_ppc860(self): return self.__machine(860) + +class NetBSDCPUInfo(CPUInfoBase): + info = None + + def __init__(self): + if self.info is not None: + return + info = {} + info['sysctl_hw'] = key_value_from_command(['sysctl', 'hw'], sep='=') + info['arch'] = info['sysctl_hw'].get('hw.machine_arch', 1) + info['machine'] = info['sysctl_hw'].get('hw.machine', 1) + self.__class__.info = info + + def _not_impl(self): pass + + def _getNCPUs(self): + return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) + + def _is_Intel(self): + if self.info['sysctl_hw'].get('hw.model', "")[0:5] == 'Intel': + return True + return False + + def _is_AMD(self): + if self.info['sysctl_hw'].get('hw.model', "")[0:3] == 'AMD': + return True + return False + +class SunOSCPUInfo(CPUInfoBase): + info = None + + def __init__(self): + if self.info is not None: + return + info = command_info(arch='arch', + mach='mach', + uname_i=['uname', '-i'], + isainfo_b=['isainfo', '-b'], + isainfo_n=['isainfo', '-n'], + ) + info['uname_X'] = key_value_from_command(['uname', '-X'], sep='=') + for line in command_by_line(['psrinfo', '-v', '0']): + m = re.match(r'\s*The (?P

[\w\d]+) processor operates at', line) + if m: + info['processor'] = m.group('p') + break + self.__class__.info = info + + def _not_impl(self): + pass + + def _is_i386(self): + return self.info['isainfo_n'] == 'i386' + + def _is_sparc(self): + return self.info['isainfo_n'] == 'sparc' + + def _is_sparcv9(self): + return self.info['isainfo_n'] == 'sparcv9' + + def _getNCPUs(self): + return int(self.info['uname_X'].get('NumCPU', 1)) + + def _is_sun4(self): + return self.info['arch'] == 'sun4' + + def _is_SUNW(self): + return re.match(r'SUNW', self.info['uname_i']) is not None + + def _is_sparcstation5(self): + return re.match(r'.*SPARCstation-5', self.info['uname_i']) is not None + + def _is_ultra1(self): + return re.match(r'.*Ultra-1', self.info['uname_i']) is not None + + def _is_ultra250(self): + return re.match(r'.*Ultra-250', self.info['uname_i']) is not None + + def _is_ultra2(self): + return re.match(r'.*Ultra-2', self.info['uname_i']) is not None + + def _is_ultra30(self): + return re.match(r'.*Ultra-30', self.info['uname_i']) is not None + + def _is_ultra4(self): + return re.match(r'.*Ultra-4', self.info['uname_i']) is not None + + def _is_ultra5_10(self): + return re.match(r'.*Ultra-5_10', self.info['uname_i']) is not None + + def _is_ultra5(self): + return re.match(r'.*Ultra-5', self.info['uname_i']) is not None + + def _is_ultra60(self): + return re.match(r'.*Ultra-60', self.info['uname_i']) is not None + + def _is_ultra80(self): + return re.match(r'.*Ultra-80', self.info['uname_i']) is not None + + def _is_ultraenterprice(self): + return re.match(r'.*Ultra-Enterprise', self.info['uname_i']) is not None + + def _is_ultraenterprice10k(self): + return re.match(r'.*Ultra-Enterprise-10000', self.info['uname_i']) is not None + + def _is_sunfire(self): + return re.match(r'.*Sun-Fire', self.info['uname_i']) is not None + + def _is_ultra(self): + return re.match(r'.*Ultra', self.info['uname_i']) is not None + + def _is_cpusparcv7(self): + return self.info['processor'] == 'sparcv7' + + def _is_cpusparcv8(self): + return self.info['processor'] == 'sparcv8' + + def _is_cpusparcv9(self): + return self.info['processor'] == 'sparcv9' + + +class Win32CPUInfo(CPUInfoBase): + info = None + pkey = r"HARDWARE\DESCRIPTION\System\CentralProcessor" + # XXX: what does the value of + # HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0 + # mean? + + def __init__(self): + try: + import _winreg + except ImportError: # Python 3 + import winreg as _winreg + + if self.info is not None: + return + info = [] + try: + #XXX: Bad style to use so long `try:...except:...`. Fix it! + + prgx = re.compile(r"family\s+(?P\d+)\s+model\s+(?P\d+)" + r"\s+stepping\s+(?P\d+)", re.IGNORECASE) + chnd = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, self.pkey) + pnum = 0 + while 1: + try: + proc = _winreg.EnumKey(chnd, pnum) + except _winreg.error: + break + else: + pnum += 1 + info.append({"Processor": proc}) + phnd = _winreg.OpenKey(chnd, proc) + pidx = 0 + while True: + try: + name, value, vtpe = _winreg.EnumValue(phnd, pidx) + except _winreg.error: + break + else: + pidx = pidx + 1 + info[-1][name] = value + if name == "Identifier": + srch = prgx.search(value) + if srch: + info[-1]["Family"] = int(srch.group("FML")) + info[-1]["Model"] = int(srch.group("MDL")) + info[-1]["Stepping"] = int(srch.group("STP")) + except: + print(sys.exc_value, '(ignoring)') + self.__class__.info = info + + def _not_impl(self): + pass + + # Athlon + + def _is_AMD(self): + return self.info[0]['VendorIdentifier'] == 'AuthenticAMD' + + def _is_Am486(self): + return self.is_AMD() and self.info[0]['Family'] == 4 + + def _is_Am5x86(self): + return self.is_AMD() and self.info[0]['Family'] == 4 + + def _is_AMDK5(self): + return (self.is_AMD() and self.info[0]['Family'] == 5 and + self.info[0]['Model'] in [0, 1, 2, 3]) + + def _is_AMDK6(self): + return (self.is_AMD() and self.info[0]['Family'] == 5 and + self.info[0]['Model'] in [6, 7]) + + def _is_AMDK6_2(self): + return (self.is_AMD() and self.info[0]['Family'] == 5 and + self.info[0]['Model'] == 8) + + def _is_AMDK6_3(self): + return (self.is_AMD() and self.info[0]['Family'] == 5 and + self.info[0]['Model'] == 9) + + def _is_AMDK7(self): + return self.is_AMD() and self.info[0]['Family'] == 6 + + # To reliably distinguish between the different types of AMD64 chips + # (Athlon64, Operton, Athlon64 X2, Semperon, Turion 64, etc.) would + # require looking at the 'brand' from cpuid + + def _is_AMD64(self): + return self.is_AMD() and self.info[0]['Family'] == 15 + + # Intel + + def _is_Intel(self): + return self.info[0]['VendorIdentifier'] == 'GenuineIntel' + + def _is_i386(self): + return self.info[0]['Family'] == 3 + + def _is_i486(self): + return self.info[0]['Family'] == 4 + + def _is_i586(self): + return self.is_Intel() and self.info[0]['Family'] == 5 + + def _is_i686(self): + return self.is_Intel() and self.info[0]['Family'] == 6 + + def _is_Pentium(self): + return self.is_Intel() and self.info[0]['Family'] == 5 + + def _is_PentiumMMX(self): + return (self.is_Intel() and self.info[0]['Family'] == 5 and + self.info[0]['Model'] == 4) + + def _is_PentiumPro(self): + return (self.is_Intel() and self.info[0]['Family'] == 6 and + self.info[0]['Model'] == 1) + + def _is_PentiumII(self): + return (self.is_Intel() and self.info[0]['Family'] == 6 and + self.info[0]['Model'] in [3, 5, 6]) + + def _is_PentiumIII(self): + return (self.is_Intel() and self.info[0]['Family'] == 6 and + self.info[0]['Model'] in [7, 8, 9, 10, 11]) + + def _is_PentiumIV(self): + return self.is_Intel() and self.info[0]['Family'] == 15 + + def _is_PentiumM(self): + return (self.is_Intel() and self.info[0]['Family'] == 6 and + self.info[0]['Model'] in [9, 13, 14]) + + def _is_Core2(self): + return (self.is_Intel() and self.info[0]['Family'] == 6 and + self.info[0]['Model'] in [15, 16, 17]) + + # Varia + + def _is_singleCPU(self): + return len(self.info) == 1 + + def _getNCPUs(self): + return len(self.info) + + def _has_mmx(self): + if self.is_Intel(): + return ((self.info[0]['Family'] == 5 and + self.info[0]['Model'] == 4) or + (self.info[0]['Family'] in [6, 15])) + elif self.is_AMD(): + return self.info[0]['Family'] in [5, 6, 15] + else: + return False + + def _has_sse(self): + if self.is_Intel(): + return ((self.info[0]['Family'] == 6 and + self.info[0]['Model'] in [7, 8, 9, 10, 11]) or + self.info[0]['Family'] == 15) + elif self.is_AMD(): + return ((self.info[0]['Family'] == 6 and + self.info[0]['Model'] in [6, 7, 8, 10]) or + self.info[0]['Family'] == 15) + else: + return False + + def _has_sse2(self): + if self.is_Intel(): + return self.is_Pentium4() or self.is_PentiumM() or self.is_Core2() + elif self.is_AMD(): + return self.is_AMD64() + else: + return False + + def _has_3dnow(self): + return self.is_AMD() and self.info[0]['Family'] in [5, 6, 15] + + def _has_3dnowext(self): + return self.is_AMD() and self.info[0]['Family'] in [6, 15] + + +if sys.platform.startswith('linux'): # variations: linux2,linux-i386 (any others?) + cpuinfo = LinuxCPUInfo +elif sys.platform.startswith('irix'): + cpuinfo = IRIXCPUInfo +elif sys.platform == 'darwin': + cpuinfo = DarwinCPUInfo +elif sys.platform[0:6] == 'netbsd': + cpuinfo = NetBSDCPUInfo +elif sys.platform.startswith('sunos'): + cpuinfo = SunOSCPUInfo +elif sys.platform.startswith('win32'): + cpuinfo = Win32CPUInfo +elif sys.platform.startswith('cygwin'): + cpuinfo = LinuxCPUInfo +#XXX: other OS's. Eg. use _winreg on Win32. Or os.uname on unices. +else: + cpuinfo = CPUInfoBase + +cpu = cpuinfo() + +if __name__ == "__main__": + + cpu.is_blaa() + cpu.is_Intel() + cpu.is_Alpha() + + info = [] + for name in dir(cpuinfo): + if name[0] == '_' and name[1] != '_': + r = getattr(cpu, name[1:])() + if r: + if r != 1: + info.append('%s=%s' % (name[1:], r)) + else: + info.append(name[1:]) + print('CPU information: ' + ' '.join(info)) diff --git a/venv/Lib/site-packages/numexpr/expressions.py b/venv/Lib/site-packages/numexpr/expressions.py new file mode 100644 index 0000000..cab0247 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/expressions.py @@ -0,0 +1,546 @@ +################################################################### +# Numexpr - Fast numerical array expression evaluator for NumPy. +# +# License: MIT +# Author: See AUTHORS.txt +# +# See LICENSE.txt and LICENSES/*.txt for details about copyright and +# rights to use. +#################################################################### + +__all__ = ['E'] + +import operator +import sys +import threading + +import numpy + +# Declare a double type that does not exist in Python space +double = numpy.double + +# The default kind for undeclared variables +default_kind = 'double' +int_ = numpy.int32 +long_ = numpy.int64 + +type_to_kind = {bool: 'bool', int_: 'int', long_: 'long', float: 'float', + double: 'double', complex: 'complex', bytes: 'bytes', str: 'str'} +kind_to_type = {'bool': bool, 'int': int_, 'long': long_, 'float': float, + 'double': double, 'complex': complex, 'bytes': bytes, 'str': str} +kind_rank = ('bool', 'int', 'long', 'float', 'double', 'complex', 'none') +scalar_constant_types = [bool, int_, int, float, double, complex, bytes, str] + +scalar_constant_types = tuple(scalar_constant_types) + +from numexpr import interpreter + + +class Expression(): + + def __getattr__(self, name): + if name.startswith('_'): + try: + return self.__dict__[name] + except KeyError: + raise AttributeError + else: + return VariableNode(name, default_kind) + + +E = Expression() + + +class Context(threading.local): + + def get(self, value, default): + return self.__dict__.get(value, default) + + def get_current_context(self): + return self.__dict__ + + def set_new_context(self, dict_): + self.__dict__.update(dict_) + +# This will be called each time the local object is used in a separate thread +_context = Context() + + +def get_optimization(): + return _context.get('optimization', 'none') + + +# helper functions for creating __magic__ methods +def ophelper(f): + def func(*args): + args = list(args) + for i, x in enumerate(args): + if isConstant(x): + args[i] = x = ConstantNode(x) + if not isinstance(x, ExpressionNode): + raise TypeError("unsupported object type: %s" % type(x)) + return f(*args) + + func.__name__ = f.__name__ + func.__doc__ = f.__doc__ + func.__dict__.update(f.__dict__) + return func + + +def allConstantNodes(args): + "returns True if args are all ConstantNodes." + for x in args: + if not isinstance(x, ConstantNode): + return False + return True + + +def isConstant(ex): + "Returns True if ex is a constant scalar of an allowed type." + return isinstance(ex, scalar_constant_types) + + +def commonKind(nodes): + node_kinds = [node.astKind for node in nodes] + str_count = node_kinds.count('bytes') + node_kinds.count('str') + if 0 < str_count < len(node_kinds): # some args are strings, but not all + raise TypeError("strings can only be operated with strings") + if str_count > 0: # if there are some, all of them must be + return 'bytes' + n = -1 + for x in nodes: + n = max(n, kind_rank.index(x.astKind)) + return kind_rank[n] + + +max_int32 = 2147483647 +min_int32 = -max_int32 - 1 + + +def bestConstantType(x): + # ``numpy.string_`` is a subclass of ``bytes`` + if isinstance(x, (bytes, str)): + return bytes + # Numeric conversion to boolean values is not tried because + # ``bool(1) == True`` (same for 0 and False), so 0 and 1 would be + # interpreted as booleans when ``False`` and ``True`` are already + # supported. + if isinstance(x, (bool, numpy.bool_)): + return bool + # ``long`` objects are kept as is to allow the user to force + # promotion of results by using long constants, e.g. by operating + # a 32-bit array with a long (64-bit) constant. + if isinstance(x, (long_, numpy.int64)): + return long_ + # ``double`` objects are kept as is to allow the user to force + # promotion of results by using double constants, e.g. by operating + # a float (32-bit) array with a double (64-bit) constant. + if isinstance(x, double): + return double + if isinstance(x, numpy.float32): + return float + if isinstance(x, (int, numpy.integer)): + # Constants needing more than 32 bits are always + # considered ``long``, *regardless of the platform*, so we + # can clearly tell 32- and 64-bit constants apart. + if not (min_int32 <= x <= max_int32): + return long_ + return int_ + # The duality of float and double in Python avoids that we have to list + # ``double`` too. + for converter in float, complex: + try: + y = converter(x) + except Exception as err: + continue + if y == x or numpy.isnan(y): + return converter + + +def getKind(x): + converter = bestConstantType(x) + return type_to_kind[converter] + + +def binop(opname, reversed=False, kind=None): + # Getting the named method from self (after reversal) does not + # always work (e.g. int constants do not have a __lt__ method). + opfunc = getattr(operator, "__%s__" % opname) + + @ophelper + def operation(self, other): + if reversed: + self, other = other, self + if allConstantNodes([self, other]): + return ConstantNode(opfunc(self.value, other.value)) + else: + return OpNode(opname, (self, other), kind=kind) + + return operation + + +def func(func, minkind=None, maxkind=None): + @ophelper + def function(*args): + if allConstantNodes(args): + return ConstantNode(func(*[x.value for x in args])) + kind = commonKind(args) + if kind in ('int', 'long'): + if func.__name__ not in ('copy', 'abs', 'ones_like', 'round', 'sign'): + # except for these special functions (which return ints for int inputs in NumPy) + # just do a cast to double + # FIXME: 'fmod' outputs ints for NumPy when inputs are ints, but need to + # add new function signatures FUNC_LLL FUNC_III to support this + kind = 'double' + else: + # Apply regular casting rules + if minkind and kind_rank.index(minkind) > kind_rank.index(kind): + kind = minkind + if maxkind and kind_rank.index(maxkind) < kind_rank.index(kind): + kind = maxkind + return FuncNode(func.__name__, args, kind) + + return function + + +@ophelper +def where_func(a, b, c): + if isinstance(a, ConstantNode): + return b if a.value else c + if allConstantNodes([a, b, c]): + return ConstantNode(numpy.where(a, b, c)) + return FuncNode('where', [a, b, c]) + + +def encode_axis(axis): + if isinstance(axis, ConstantNode): + axis = axis.value + if axis is None: + axis = interpreter.allaxes + else: + if axis < 0: + raise ValueError("negative axis are not supported") + if axis > 254: + raise ValueError("cannot encode axis") + return RawNode(axis) + + +def gen_reduce_axis_func(name): + def _func(a, axis=None): + axis = encode_axis(axis) + if isinstance(a, ConstantNode): + return a + if isinstance(a, (bool, int_, long_, float, double, complex)): + a = ConstantNode(a) + return FuncNode(name, [a, axis], kind=a.astKind) + return _func + + +@ophelper +def contains_func(a, b): + return FuncNode('contains', [a, b], kind='bool') + + +@ophelper +def div_op(a, b): + if get_optimization() in ('moderate', 'aggressive'): + if (isinstance(b, ConstantNode) and + (a.astKind == b.astKind) and + a.astKind in ('float', 'double', 'complex')): + return OpNode('mul', [a, ConstantNode(1. / b.value)]) + return OpNode('div', [a, b]) + + +@ophelper +def truediv_op(a, b): + if get_optimization() in ('moderate', 'aggressive'): + if (isinstance(b, ConstantNode) and + (a.astKind == b.astKind) and + a.astKind in ('float', 'double', 'complex')): + return OpNode('mul', [a, ConstantNode(1. / b.value)]) + kind = commonKind([a, b]) + if kind in ('bool', 'int', 'long'): + kind = 'double' + return OpNode('div', [a, b], kind=kind) + + +@ophelper +def rtruediv_op(a, b): + return truediv_op(b, a) + + +@ophelper +def pow_op(a, b): + + if isinstance(b, ConstantNode): + x = b.value + if ( a.astKind in ('int', 'long') and + b.astKind in ('int', 'long') and x < 0) : + raise ValueError( + 'Integers to negative integer powers are not allowed.') + if get_optimization() == 'aggressive': + RANGE = 50 # Approximate break even point with pow(x,y) + # Optimize all integral and half integral powers in [-RANGE, RANGE] + # Note: for complex numbers RANGE could be larger. + if (int(2 * x) == 2 * x) and (-RANGE <= abs(x) <= RANGE): + n = int_(abs(x)) + ishalfpower = int_(abs(2 * x)) % 2 + + def multiply(x, y): + if x is None: return y + return OpNode('mul', [x, y]) + + r = None + p = a + mask = 1 + while True: + if (n & mask): + r = multiply(r, p) + mask <<= 1 + if mask > n: + break + p = OpNode('mul', [p, p]) + if ishalfpower: + kind = commonKind([a]) + if kind in ('int', 'long'): + kind = 'double' + r = multiply(r, OpNode('sqrt', [a], kind)) + if r is None: + r = OpNode('ones_like', [a]) + if x < 0: + # Issue #428 + r = truediv_op(ConstantNode(1), r) + return r + if get_optimization() in ('moderate', 'aggressive'): + if x == -1: + return OpNode('div', [ConstantNode(1), a]) + if x == 0: + return OpNode('ones_like', [a]) + if x == 0.5: + kind = a.astKind + if kind in ('int', 'long'): kind = 'double' + return FuncNode('sqrt', [a], kind=kind) + if x == 1: + return a + if x == 2: + return OpNode('mul', [a, a]) + return OpNode('pow', [a, b]) + +# The functions and the minimum and maximum types accepted +numpy.expm1x = numpy.expm1 +functions = { + 'copy': func(numpy.copy), + 'ones_like': func(numpy.ones_like), + 'sqrt': func(numpy.sqrt, 'float'), + + 'sin': func(numpy.sin, 'float'), + 'cos': func(numpy.cos, 'float'), + 'tan': func(numpy.tan, 'float'), + 'arcsin': func(numpy.arcsin, 'float'), + 'arccos': func(numpy.arccos, 'float'), + 'arctan': func(numpy.arctan, 'float'), + + 'sinh': func(numpy.sinh, 'float'), + 'cosh': func(numpy.cosh, 'float'), + 'tanh': func(numpy.tanh, 'float'), + 'arcsinh': func(numpy.arcsinh, 'float'), + 'arccosh': func(numpy.arccosh, 'float'), + 'arctanh': func(numpy.arctanh, 'float'), + + 'fmod': func(numpy.fmod, 'float'), + 'arctan2': func(numpy.arctan2, 'float'), + 'hypot': func(numpy.hypot, 'double'), + 'nextafter': func(numpy.nextafter, 'double'), + 'copysign': func(numpy.copysign, 'double'), + 'maximum': func(numpy.maximum, 'double'), + 'minimum': func(numpy.minimum, 'double'), + + + 'log': func(numpy.log, 'float'), + 'log1p': func(numpy.log1p, 'float'), + 'log10': func(numpy.log10, 'float'), + 'log2': func(numpy.log2, 'float'), + 'exp': func(numpy.exp, 'float'), + 'expm1': func(numpy.expm1, 'float'), + + 'abs': func(numpy.absolute, 'float'), + 'ceil': func(numpy.ceil, 'float', 'double'), + 'floor': func(numpy.floor, 'float', 'double'), + 'round': func(numpy.round, 'double'), + 'trunc': func(numpy.trunc, 'double'), + 'sign': func(numpy.sign, 'double'), + + 'where': where_func, + + 'real': func(numpy.real, 'double', 'double'), + 'imag': func(numpy.imag, 'double', 'double'), + 'complex': func(complex, 'complex'), + 'conj': func(numpy.conj, 'complex'), + + 'isnan': func(numpy.isnan, 'double'), + 'isfinite': func(numpy.isfinite, 'double'), + 'isinf': func(numpy.isinf, 'double'), + 'signbit': func(numpy.signbit, 'double'), + + 'sum': gen_reduce_axis_func('sum'), + 'prod': gen_reduce_axis_func('prod'), + 'min': gen_reduce_axis_func('min'), + 'max': gen_reduce_axis_func('max'), + 'contains': contains_func, +} + + +class ExpressionNode(): + """ + An object that represents a generic number object. + + This implements the number special methods so that we can keep + track of how this object has been used. + """ + astType = 'generic' + + def __init__(self, value=None, kind=None, children=None): + self.value = value + if kind is None: + kind = 'none' + self.astKind = kind + if children is None: + self.children = () + else: + self.children = tuple(children) + + def get_real(self): + if self.astType == 'constant': + return ConstantNode(complex(self.value).real) + return OpNode('real', (self,), 'double') + + real = property(get_real) + + def get_imag(self): + if self.astType == 'constant': + return ConstantNode(complex(self.value).imag) + return OpNode('imag', (self,), 'double') + + imag = property(get_imag) + + def __str__(self): + return '%s(%s, %s, %s)' % (self.__class__.__name__, self.value, + self.astKind, self.children) + + def __repr__(self): + return self.__str__() + + def __neg__(self): + return OpNode('neg', (self,)) + + def __invert__(self): + return OpNode('invert', (self,)) + + def __pos__(self): + return self + + # The next check is commented out. See #24 for more info. + + def __bool__(self): + raise TypeError("You can't use Python's standard boolean operators in " + "NumExpr expressions. You should use their bitwise " + "counterparts instead: '&' instead of 'and', " + "'|' instead of 'or', and '~' instead of 'not'.") + + __add__ = __radd__ = binop('add') + __sub__ = binop('sub') + __rsub__ = binop('sub', reversed=True) + __mul__ = __rmul__ = binop('mul') + __truediv__ = truediv_op + __rtruediv__ = rtruediv_op + __floordiv__ = binop("floordiv") + __pow__ = pow_op + __rpow__ = binop('pow', reversed=True) + __mod__ = binop('mod') + __rmod__ = binop('mod', reversed=True) + + __lshift__ = binop('lshift') + __rlshift__ = binop('lshift', reversed=True) + __rshift__ = binop('rshift') + __rrshift__ = binop('rshift', reversed=True) + + # bitwise or logical operations + __and__ = binop('and') + __or__ = binop('or') + __xor__ = binop('xor') + + __gt__ = binop('gt', kind='bool') + __ge__ = binop('ge', kind='bool') + __eq__ = binop('eq', kind='bool') + __ne__ = binop('ne', kind='bool') + __lt__ = binop('gt', reversed=True, kind='bool') + __le__ = binop('ge', reversed=True, kind='bool') + + +class LeafNode(ExpressionNode): + leafNode = True + + +class VariableNode(LeafNode): + astType = 'variable' + + def __init__(self, value=None, kind=None, children=None): + LeafNode.__init__(self, value=value, kind=kind) + + +class RawNode(): + """ + Used to pass raw integers to interpreter. + For instance, for selecting what function to use in func1. + Purposely don't inherit from ExpressionNode, since we don't wan't + this to be used for anything but being walked. + """ + astType = 'raw' + astKind = 'none' + + def __init__(self, value): + self.value = value + self.children = () + + def __str__(self): + return 'RawNode(%s)' % (self.value,) + + __repr__ = __str__ + + +class ConstantNode(LeafNode): + astType = 'constant' + + def __init__(self, value=None, children=None): + kind = getKind(value) + # Python float constants are double precision by default + if kind == 'float' and isinstance(value, float): + kind = 'double' + LeafNode.__init__(self, value=value, kind=kind) + + def __neg__(self): + return ConstantNode(-self.value) + + def __invert__(self): + return ConstantNode(~self.value) + + +class OpNode(ExpressionNode): + astType = 'op' + + def __init__(self, opcode=None, args=None, kind=None): + if (kind is None) and (args is not None): + kind = commonKind(args) + if kind=='bool': # handle bool*bool and bool+bool cases + opcode = 'and' if opcode=='mul' else opcode + opcode = 'or' if opcode=='add' else opcode + ExpressionNode.__init__(self, value=opcode, kind=kind, children=args) + + +class FuncNode(OpNode): + def __init__(self, opcode=None, args=None, kind=None): + if (kind is None) and (args is not None): + kind = commonKind(args) + if opcode in ("isnan", "isfinite", "isinf", "signbit"): # bodge for boolean return functions + kind = 'bool' + OpNode.__init__(self, opcode, args, kind) diff --git a/venv/Lib/site-packages/numexpr/functions.hpp b/venv/Lib/site-packages/numexpr/functions.hpp new file mode 100644 index 0000000..fc38fb8 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/functions.hpp @@ -0,0 +1,235 @@ +// -*- c-mode -*- +/********************************************************************* + Numexpr - Fast numerical array expression evaluator for NumPy. + + License: MIT + Author: See AUTHORS.txt + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/* These #if blocks make it easier to query this file, without having + to define every row function before #including it. */ +#ifndef FUNC_FF +#define ELIDE_FUNC_FF +#define FUNC_FF(...) +#endif +FUNC_FF(FUNC_SQRT_FF, "sqrt_ff", sqrtf, sqrtf2, vsSqrt) +FUNC_FF(FUNC_SIN_FF, "sin_ff", sinf, sinf2, vsSin) +FUNC_FF(FUNC_COS_FF, "cos_ff", cosf, cosf2, vsCos) +FUNC_FF(FUNC_TAN_FF, "tan_ff", tanf, tanf2, vsTan) +FUNC_FF(FUNC_ARCSIN_FF, "arcsin_ff", asinf, asinf2, vsAsin) +FUNC_FF(FUNC_ARCCOS_FF, "arccos_ff", acosf, acosf2, vsAcos) +FUNC_FF(FUNC_ARCTAN_FF, "arctan_ff", atanf, atanf2, vsAtan) +FUNC_FF(FUNC_SINH_FF, "sinh_ff", sinhf, sinhf2, vsSinh) +FUNC_FF(FUNC_COSH_FF, "cosh_ff", coshf, coshf2, vsCosh) +FUNC_FF(FUNC_TANH_FF, "tanh_ff", tanhf, tanhf2, vsTanh) +FUNC_FF(FUNC_ARCSINH_FF, "arcsinh_ff", asinhf, asinhf2, vsAsinh) +FUNC_FF(FUNC_ARCCOSH_FF, "arccosh_ff", acoshf, acoshf2, vsAcosh) +FUNC_FF(FUNC_ARCTANH_FF, "arctanh_ff", atanhf, atanhf2, vsAtanh) +FUNC_FF(FUNC_LOG_FF, "log_ff", logf, logf2, vsLn) +FUNC_FF(FUNC_LOG1P_FF, "log1p_ff", log1pf, log1pf2, vsLog1p) +FUNC_FF(FUNC_LOG10_FF, "log10_ff", log10f, log10f2, vsLog10) +FUNC_FF(FUNC_LOG2_FF, "log2_ff", log2f, log2f2, vsLog2) +FUNC_FF(FUNC_EXP_FF, "exp_ff", expf, expf2, vsExp) +FUNC_FF(FUNC_EXPM1_FF, "expm1_ff", expm1f, expm1f2, vsExpm1) +FUNC_FF(FUNC_ABS_FF, "absolute_ff", fabsf, fabsf2, vsAbs) +FUNC_FF(FUNC_CONJ_FF, "conjugate_ff",fconjf, fconjf2, vsConj) +FUNC_FF(FUNC_CEIL_FF, "ceil_ff", ceilf, ceilf2, vsCeil) +FUNC_FF(FUNC_FLOOR_FF, "floor_ff", floorf, floorf2, vsFloor) +FUNC_FF(FUNC_TRUNC_FF, "trunc_ff", truncf, truncf2, vsTrunc) +FUNC_FF(FUNC_SIGN_FF, "sign_ff", signf, signf2, vsSign) +//rint rounds to nearest even integer, matching NumPy (round doesn't) +FUNC_FF(FUNC_ROUND_FF, "round_ff", rintf, rintf2, vsRint) +FUNC_FF(FUNC_FF_LAST, NULL, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_FF +#undef ELIDE_FUNC_FF +#undef FUNC_FF +#endif + +#ifndef FUNC_FFF +#define ELIDE_FUNC_FFF +#define FUNC_FFF(...) +#endif +FUNC_FFF(FUNC_FMOD_FFF, "fmod_fff", fmodf, fmodf2, vsfmod) +FUNC_FFF(FUNC_ARCTAN2_FFF, "arctan2_fff", atan2f, atan2f2, vsAtan2) +FUNC_FFF(FUNC_HYPOT_FFF, "hypot_fff", hypotf, hypotf2, vsHypot) +FUNC_FFF(FUNC_NEXTAFTER_FFF, "nextafter_fff", nextafterf, nextafterf2, vsNextAfter) +FUNC_FFF(FUNC_COPYSIGN_FFF, "copysign_fff", copysignf, copysignf2, vsCopySign) +FUNC_FFF(FUNC_MAXIMUM_FFF, "maximum_fff", fmaxf_, fmaxf2, vsFmax_) +FUNC_FFF(FUNC_MINIMUM_FFF, "minimum_fff", fminf_, fminf2, vsFmin_) +FUNC_FFF(FUNC_FFF_LAST, NULL, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_FFF +#undef ELIDE_FUNC_FFF +#undef FUNC_FFF +#endif + +#ifndef FUNC_DD +#define ELIDE_FUNC_DD +#define FUNC_DD(...) +#endif +FUNC_DD(FUNC_SQRT_DD, "sqrt_dd", sqrt, vdSqrt) +FUNC_DD(FUNC_SIN_DD, "sin_dd", sin, vdSin) +FUNC_DD(FUNC_COS_DD, "cos_dd", cos, vdCos) +FUNC_DD(FUNC_TAN_DD, "tan_dd", tan, vdTan) +FUNC_DD(FUNC_ARCSIN_DD, "arcsin_dd", asin, vdAsin) +FUNC_DD(FUNC_ARCCOS_DD, "arccos_dd", acos, vdAcos) +FUNC_DD(FUNC_ARCTAN_DD, "arctan_dd", atan, vdAtan) +FUNC_DD(FUNC_SINH_DD, "sinh_dd", sinh, vdSinh) +FUNC_DD(FUNC_COSH_DD, "cosh_dd", cosh, vdCosh) +FUNC_DD(FUNC_TANH_DD, "tanh_dd", tanh, vdTanh) +FUNC_DD(FUNC_ARCSINH_DD, "arcsinh_dd", asinh, vdAsinh) +FUNC_DD(FUNC_ARCCOSH_DD, "arccosh_dd", acosh, vdAcosh) +FUNC_DD(FUNC_ARCTANH_DD, "arctanh_dd", atanh, vdAtanh) +FUNC_DD(FUNC_LOG_DD, "log_dd", log, vdLn) +FUNC_DD(FUNC_LOG1P_DD, "log1p_dd", log1p, vdLog1p) +FUNC_DD(FUNC_LOG10_DD, "log10_dd", log10, vdLog10) +FUNC_DD(FUNC_LOG2_DD, "log2_dd", log2, vdLog2) +FUNC_DD(FUNC_EXP_DD, "exp_dd", exp, vdExp) +FUNC_DD(FUNC_EXPM1_DD, "expm1_dd", expm1, vdExpm1) +FUNC_DD(FUNC_ABS_DD, "absolute_dd", fabs, vdAbs) +FUNC_DD(FUNC_CONJ_DD, "conjugate_dd",fconj, vdConj) +FUNC_DD(FUNC_CEIL_DD, "ceil_dd", ceil, vdCeil) +FUNC_DD(FUNC_FLOOR_DD, "floor_dd", floor, vdFloor) +FUNC_DD(FUNC_TRUNC_DD, "trunc_dd", trunc, vdTrunc) +FUNC_DD(FUNC_SIGN_DD, "sign_dd", sign, vdSign) +//rint rounds to nearest even integer, matching NumPy (round doesn't) +FUNC_DD(FUNC_ROUND_DD, "round_dd", rint, vdRint) +FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_DD +#undef ELIDE_FUNC_DD +#undef FUNC_DD +#endif + +// double -> boolean functions +#ifndef FUNC_BD +#define ELIDE_FUNC_BD +#define FUNC_BD(...) +#endif +FUNC_BD(FUNC_ISNAN_BD, "isnan_bd", isnand, vdIsnan) +FUNC_BD(FUNC_ISFINITE_BD, "isfinite_bd", isfinited, vdIsfinite) +FUNC_BD(FUNC_ISINF_BD, "isinf_bd", isinfd, vdIsinf) +FUNC_BD(FUNC_SIGNBIT_BD, "signbit_bd", signbit, vdSignBit) +FUNC_BD(FUNC_BD_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_BD +#undef ELIDE_FUNC_BD +#undef FUNC_BD +#endif + +// float -> boolean functions (C99 defines the same function for all types) +#ifndef FUNC_BF +#define ELIDE_FUNC_BF +#define FUNC_BF(...) +#endif // use wrappers as there is name collision with isnanf in std +FUNC_BF(FUNC_ISNAN_BF, "isnan_bf", isnanf_, isnanf2, vsIsnan) +FUNC_BF(FUNC_ISFINITE_BF, "isfinite_bf", isfinitef_, isfinitef2, vsIsfinite) +FUNC_BF(FUNC_ISINF_BF, "isinf_bf", isinff_, isinff2, vsIsinf) +FUNC_BF(FUNC_SIGNBIT_BF, "signbit_bf", signbitf, signbitf2, vsSignBit) +FUNC_BF(FUNC_BF_LAST, NULL, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_BF +#undef ELIDE_FUNC_BF +#undef FUNC_BF +#endif + +#ifndef FUNC_DDD +#define ELIDE_FUNC_DDD +#define FUNC_DDD(...) +#endif +FUNC_DDD(FUNC_FMOD_DDD, "fmod_ddd", fmod, vdfmod) +FUNC_DDD(FUNC_ARCTAN2_DDD, "arctan2_ddd", atan2, vdAtan2) +FUNC_DDD(FUNC_HYPOT_DDD, "hypot_ddd", hypot, vdHypot) +FUNC_DDD(FUNC_NEXTAFTER_DDD, "nextafter_ddd", nextafter, vdNextAfter) +FUNC_DDD(FUNC_COPYSIGN_DDD, "copysign_ddd", copysign, vdCopySign) +FUNC_DDD(FUNC_MAXIMUM_DDD, "maximum_ddd", fmaxd, vdFmax_) +FUNC_DDD(FUNC_MINIMUM_DDD, "minimum_ddd", fmind, vdFmin_) +FUNC_DDD(FUNC_DDD_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_DDD +#undef ELIDE_FUNC_DDD +#undef FUNC_DDD +#endif + +#ifndef FUNC_CC +#define ELIDE_FUNC_CC +#define FUNC_CC(...) +#endif +FUNC_CC(FUNC_SQRT_CC, "sqrt_cc", nc_sqrt, vzSqrt) +FUNC_CC(FUNC_SIN_CC, "sin_cc", nc_sin, vzSin) +FUNC_CC(FUNC_COS_CC, "cos_cc", nc_cos, vzCos) +FUNC_CC(FUNC_TAN_CC, "tan_cc", nc_tan, vzTan) +FUNC_CC(FUNC_ARCSIN_CC, "arcsin_cc", nc_asin, vzAsin) +FUNC_CC(FUNC_ARCCOS_CC, "arccos_cc", nc_acos, vzAcos) +FUNC_CC(FUNC_ARCTAN_CC, "arctan_cc", nc_atan, vzAtan) +FUNC_CC(FUNC_SINH_CC, "sinh_cc", nc_sinh, vzSinh) +FUNC_CC(FUNC_COSH_CC, "cosh_cc", nc_cosh, vzCosh) +FUNC_CC(FUNC_TANH_CC, "tanh_cc", nc_tanh, vzTanh) +FUNC_CC(FUNC_ARCSINH_CC, "arcsinh_cc", nc_asinh, vzAsinh) +FUNC_CC(FUNC_ARCCOSH_CC, "arccosh_cc", nc_acosh, vzAcosh) +FUNC_CC(FUNC_ARCTANH_CC, "arctanh_cc", nc_atanh, vzAtanh) +FUNC_CC(FUNC_LOG_CC, "log_cc", nc_log, vzLn) +FUNC_CC(FUNC_LOG1P_CC, "log1p_cc", nc_log1p, vzLog1p) +FUNC_CC(FUNC_LOG10_CC, "log10_cc", nc_log10, vzLog10) +FUNC_CC(FUNC_LOG2_CC, "log2_cc", nc_log2, vzLog2) +FUNC_CC(FUNC_EXP_CC, "exp_cc", nc_exp, vzExp) +FUNC_CC(FUNC_EXPM1_CC, "expm1_cc", nc_expm1, vzExpm1) +FUNC_CC(FUNC_ABS_CC, "absolute_cc", nc_abs, vzAbs_) +FUNC_CC(FUNC_CONJ_CC, "conjugate_cc",nc_conj, vzConj) +FUNC_CC(FUNC_SIGN_CC, "sign_cc", nc_sign, vzSign) +// rint rounds to nearest even integer, matches NumPy behaviour (round doesn't) +FUNC_CC(FUNC_ROUND_CC, "round_cc", nc_rint, vzRint) +FUNC_CC(FUNC_CC_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_CC +#undef ELIDE_FUNC_CC +#undef FUNC_CC +#endif + +#ifndef FUNC_CCC +#define ELIDE_FUNC_CCC +#define FUNC_CCC(...) +#endif +FUNC_CCC(FUNC_POW_CCC, "pow_ccc", nc_pow) +FUNC_CCC(FUNC_CCC_LAST, NULL, NULL) +#ifdef ELIDE_FUNC_CCC +#undef ELIDE_FUNC_CCC +#undef FUNC_CCC +#endif + +// complex -> boolean functions +#ifndef FUNC_BC +#define ELIDE_FUNC_BC +#define FUNC_BC(...) +#endif // use wrappers as there is name collision with isnanf in std +FUNC_BC(FUNC_ISNAN_BC, "isnan_bc", nc_isnan, vzIsnan) +FUNC_BC(FUNC_ISFINITE_BC, "isfinite_bc", nc_isfinite, vzIsfinite) +FUNC_BC(FUNC_ISINF_BC, "isinf_bc", nc_isinf, vzIsinf) +FUNC_BC(FUNC_BC_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_BC +#undef ELIDE_FUNC_BC +#undef FUNC_BC +#endif + +// int -> int functions +#ifndef FUNC_II +#define ELIDE_FUNC_II +#define FUNC_II(...) +#endif +FUNC_II(FUNC_SIGN_II, "sign_ii", signi, viSign) +FUNC_II(FUNC_ROUND_II, "round_ii", rinti, viRint) +FUNC_II(FUNC_ABS_II, "absolute_ii", fabsi, viFabs) +FUNC_II(FUNC_II_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_II +#undef ELIDE_FUNC_II +#undef FUNC_II +#endif + +#ifndef FUNC_LL +#define ELIDE_FUNC_LL +#define FUNC_LL(...) +#endif +FUNC_LL(FUNC_SIGN_LL, "sign_ll", signl, vlSign) +FUNC_LL(FUNC_ROUND_LL, "round_ll", rintl, vlRint) +FUNC_LL(FUNC_ABS_LL, "absolute_ll", fabsl, vlFabs) +FUNC_LL(FUNC_LL_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_LL +#undef ELIDE_FUNC_LL +#undef FUNC_LL +#endif diff --git a/venv/Lib/site-packages/numexpr/interp_body.cpp b/venv/Lib/site-packages/numexpr/interp_body.cpp new file mode 100644 index 0000000..743f8ab --- /dev/null +++ b/venv/Lib/site-packages/numexpr/interp_body.cpp @@ -0,0 +1,602 @@ +/********************************************************************* + Numexpr - Fast numerical array expression evaluator for NumPy. + + License: MIT + Author: See AUTHORS.txt + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +// WARNING: This file is included multiple times in `interpreter.cpp`. It is +// essentially a very macro-heavy jump table. Interpretation is best done by +// the developer by expanding all macros (e.g. adding `'-E'` to the `extra_cflags` +// argument in `setup.py` and looking at the resulting `interpreter.cpp`. +// +// Changes made to this file will not be recognized by the compile, so the developer +// must make a trivial change is made to `interpreter.cpp` or delete the `build/` +// directory in-between each build. +{ +#define VEC_LOOP(expr) for(j = 0; j < BLOCK_SIZE; j++) { \ + expr; \ + } + +#define VEC_ARG0(expr) \ + BOUNDS_CHECK(store_in); \ + { \ + char *dest = mem[store_in]; \ + VEC_LOOP(expr); \ + } break + +#define VEC_ARG1(expr) \ + BOUNDS_CHECK(store_in); \ + BOUNDS_CHECK(arg1); \ + { \ + char *dest = mem[store_in]; \ + char *x1 = mem[arg1]; \ + npy_intp ss1 = params.memsizes[arg1]; \ + npy_intp sb1 = memsteps[arg1]; \ + /* nowarns is defined and used so as to \ + avoid compiler warnings about unused \ + variables */ \ + npy_intp nowarns = ss1+sb1+*x1; \ + nowarns += 1; \ + VEC_LOOP(expr); \ + } break + +#define VEC_ARG2(expr) \ + BOUNDS_CHECK(store_in); \ + BOUNDS_CHECK(arg1); \ + BOUNDS_CHECK(arg2); \ + { \ + char *dest = mem[store_in]; \ + char *x1 = mem[arg1]; \ + npy_intp ss1 = params.memsizes[arg1]; \ + npy_intp sb1 = memsteps[arg1]; \ + /* nowarns is defined and used so as to \ + avoid compiler warnings about unused \ + variables */ \ + npy_intp nowarns = ss1+sb1+*x1; \ + char *x2 = mem[arg2]; \ + npy_intp ss2 = params.memsizes[arg2]; \ + npy_intp sb2 = memsteps[arg2]; \ + nowarns += ss2+sb2+*x2; \ + VEC_LOOP(expr); \ + } break + +#define VEC_ARG3(expr) \ + BOUNDS_CHECK(store_in); \ + BOUNDS_CHECK(arg1); \ + BOUNDS_CHECK(arg2); \ + BOUNDS_CHECK(arg3); \ + { \ + char *dest = mem[store_in]; \ + char *x1 = mem[arg1]; \ + npy_intp ss1 = params.memsizes[arg1]; \ + npy_intp sb1 = memsteps[arg1]; \ + /* nowarns is defined and used so as to \ + avoid compiler warnings about unused \ + variables */ \ + npy_intp nowarns = ss1+sb1+*x1; \ + char *x2 = mem[arg2]; \ + npy_intp ss2 = params.memsizes[arg2]; \ + npy_intp sb2 = memsteps[arg2]; \ + char *x3 = mem[arg3]; \ + npy_intp ss3 = params.memsizes[arg3]; \ + npy_intp sb3 = memsteps[arg3]; \ + nowarns += ss2+sb2+*x2; \ + nowarns += ss3+sb3+*x3; \ + VEC_LOOP(expr); \ + } break + +#define VEC_ARG1_VML(expr) \ + BOUNDS_CHECK(store_in); \ + BOUNDS_CHECK(arg1); \ + { \ + char *dest = mem[store_in]; \ + char *x1 = mem[arg1]; \ + expr; \ + } break + +#define VEC_ARG2_VML(expr) \ + BOUNDS_CHECK(store_in); \ + BOUNDS_CHECK(arg1); \ + BOUNDS_CHECK(arg2); \ + { \ + char *dest = mem[store_in]; \ + char *x1 = mem[arg1]; \ + char *x2 = mem[arg2]; \ + expr; \ + } break + +#define VEC_ARG3_VML(expr) \ + BOUNDS_CHECK(store_in); \ + BOUNDS_CHECK(arg1); \ + BOUNDS_CHECK(arg2); \ + BOUNDS_CHECK(arg3); \ + { \ + char *dest = mem[store_in]; \ + char *x1 = mem[arg1]; \ + char *x2 = mem[arg2]; \ + char *x3 = mem[arg3]; \ + expr; \ + } break + + int pc; + unsigned int j; + + // set up pointers to next block of inputs and outputs +#ifdef SINGLE_ITEM_CONST_LOOP + mem[0] = params.output; +#else // SINGLE_ITEM_CONST_LOOP + // use the iterator's inner loop data + memcpy(mem, iter_dataptr, (1+params.n_inputs)*sizeof(char*)); +# ifndef NO_OUTPUT_BUFFERING + // if output buffering is necessary, first write to the buffer + if(params.out_buffer != NULL) { + mem[0] = params.out_buffer; + } +# endif // NO_OUTPUT_BUFFERING + memcpy(memsteps, iter_strides, (1+params.n_inputs)*sizeof(npy_intp)); +#endif // SINGLE_ITEM_CONST_LOOP + + // WARNING: From now on, only do references to mem[arg[123]] + // & memsteps[arg[123]] inside the VEC_ARG[123] macros, + // or you will risk accessing invalid addresses. + + for (pc = 0; pc < params.prog_len; pc += 4) { + unsigned char op = params.program[pc]; + unsigned int store_in = params.program[pc+1]; + unsigned int arg1 = params.program[pc+2]; + unsigned int arg2 = params.program[pc+3]; + #define arg3 params.program[pc+5] + // Iterator reduce macros +#ifdef REDUCTION_INNER_LOOP // Reduce is the inner loop + #define i_reduce *(int *)dest + #define l_reduce *(long long *)dest + #define f_reduce *(float *)dest + #define d_reduce *(double *)dest + #define cr_reduce *(double *)dest + #define ci_reduce *((double *)dest+1) +#else /* Reduce is the outer loop */ + #define i_reduce i_dest + #define l_reduce l_dest + #define f_reduce f_dest + #define d_reduce d_dest + #define cr_reduce cr_dest + #define ci_reduce ci_dest +#endif + #define b_dest ((char *)dest)[j] + #define i_dest ((int *)dest)[j] + #define l_dest ((long long *)dest)[j] + #define f_dest ((float *)dest)[j] + #define d_dest ((double *)dest)[j] + #define cr_dest ((double *)dest)[2*j] + #define ci_dest ((double *)dest)[2*j+1] + #define s_dest ((char *)dest + j*memsteps[store_in]) + #define b1 ((char *)(x1+j*sb1))[0] + #define i1 ((int *)(x1+j*sb1))[0] + #define l1 ((long long *)(x1+j*sb1))[0] + #define f1 ((float *)(x1+j*sb1))[0] + #define d1 ((double *)(x1+j*sb1))[0] + #define c1r ((double *)(x1+j*sb1))[0] + #define c1i ((double *)(x1+j*sb1))[1] + #define s1 ((char *)x1+j*sb1) + #define b2 ((char *)(x2+j*sb2))[0] + #define i2 ((int *)(x2+j*sb2))[0] + #define l2 ((long long *)(x2+j*sb2))[0] + #define f2 ((float *)(x2+j*sb2))[0] + #define d2 ((double *)(x2+j*sb2))[0] + #define c2r ((double *)(x2+j*sb2))[0] + #define c2i ((double *)(x2+j*sb2))[1] + #define s2 ((char *)x2+j*sb2) + #define b3 ((char *)(x3+j*sb3))[0] + #define i3 ((int *)(x3+j*sb3))[0] + #define l3 ((long long *)(x3+j*sb3))[0] + #define f3 ((float *)(x3+j*sb3))[0] + #define d3 ((double *)(x3+j*sb3))[0] + #define c3r ((double *)(x3+j*sb3))[0] + #define c3i ((double *)(x3+j*sb3))[1] + #define s3 ((char *)x3+j*sb3) + /* Some temporaries */ + double da, db; + std::complex ca, cb; + + switch (op) { + + case OP_NOOP: break; + + case OP_COPY_BB: VEC_ARG1(b_dest = b1); + case OP_COPY_SS: VEC_ARG1(memcpy(s_dest, s1, ss1)); + /* The next versions of copy opcodes can cope with unaligned + data even on platforms that crash while accessing it + (like the Sparc architecture under Solaris). */ + case OP_COPY_II: VEC_ARG1(memcpy(&i_dest, s1, sizeof(int))); + case OP_COPY_LL: VEC_ARG1(memcpy(&l_dest, s1, sizeof(long long))); + case OP_COPY_FF: VEC_ARG1(memcpy(&f_dest, s1, sizeof(float))); + case OP_COPY_DD: VEC_ARG1(memcpy(&d_dest, s1, sizeof(double))); + case OP_COPY_CC: VEC_ARG1(memcpy(&cr_dest, s1, sizeof(double)*2)); + + /* Bool */ + case OP_INVERT_BB: VEC_ARG1(b_dest = !b1); + case OP_AND_BBB: VEC_ARG2(b_dest = (b1 && b2)); + case OP_OR_BBB: VEC_ARG2(b_dest = (b1 || b2)); + case OP_XOR_BBB: VEC_ARG2(b_dest = (b1 || b2) && !(b1 && b2) ); + + case OP_EQ_BBB: VEC_ARG2(b_dest = (b1 == b2)); + case OP_NE_BBB: VEC_ARG2(b_dest = (b1 != b2)); + case OP_WHERE_BBBB: VEC_ARG3(b_dest = b1 ? b2 : b3); + + /* Comparisons */ + case OP_GT_BII: VEC_ARG2(b_dest = (i1 > i2)); + case OP_GE_BII: VEC_ARG2(b_dest = (i1 >= i2)); + case OP_EQ_BII: VEC_ARG2(b_dest = (i1 == i2)); + case OP_NE_BII: VEC_ARG2(b_dest = (i1 != i2)); + + case OP_GT_BLL: VEC_ARG2(b_dest = (l1 > l2)); + case OP_GE_BLL: VEC_ARG2(b_dest = (l1 >= l2)); + case OP_EQ_BLL: VEC_ARG2(b_dest = (l1 == l2)); + case OP_NE_BLL: VEC_ARG2(b_dest = (l1 != l2)); + + case OP_GT_BFF: VEC_ARG2(b_dest = (f1 > f2)); + case OP_GE_BFF: VEC_ARG2(b_dest = (f1 >= f2)); + case OP_EQ_BFF: VEC_ARG2(b_dest = (f1 == f2)); + case OP_NE_BFF: VEC_ARG2(b_dest = (f1 != f2)); + + case OP_GT_BDD: VEC_ARG2(b_dest = (d1 > d2)); + case OP_GE_BDD: VEC_ARG2(b_dest = (d1 >= d2)); + case OP_EQ_BDD: VEC_ARG2(b_dest = (d1 == d2)); + case OP_NE_BDD: VEC_ARG2(b_dest = (d1 != d2)); + + case OP_GT_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) > 0)); + case OP_GE_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) >= 0)); + case OP_EQ_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) == 0)); + case OP_NE_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) != 0)); + + case OP_CONTAINS_BSS: VEC_ARG2(b_dest = stringcontains(s1, s2, ss1, ss2)); + + /* Int */ + case OP_CAST_IB: VEC_ARG1(i_dest = (int)(b1)); + case OP_ONES_LIKE_II: VEC_ARG0(i_dest = 1); + case OP_NEG_II: VEC_ARG1(i_dest = -i1); + + case OP_ADD_III: VEC_ARG2(i_dest = i1 + i2); + case OP_SUB_III: VEC_ARG2(i_dest = i1 - i2); + case OP_MUL_III: VEC_ARG2(i_dest = i1 * i2); + case OP_DIV_III: VEC_ARG2(i_dest = i2 ? (i1 / i2) : 0); + case OP_POW_III: VEC_ARG2(i_dest = (i2 < 0) ? (1 / i1) : (int)pow((double)i1, i2)); + case OP_MOD_III: VEC_ARG2(i_dest = i2 == 0 ? 0 :((i1 % i2) + i2) % i2); + case OP_FLOORDIV_III: VEC_ARG2(i_dest = i2 ? (i1 / i2) - ((i1 % i2 != 0) && (i1 < 0 != i2 < 0)) : 0); + case OP_LSHIFT_III: VEC_ARG2(i_dest = i1 << i2); + case OP_RSHIFT_III: VEC_ARG2(i_dest = i1 >> i2); + + case OP_WHERE_IBII: VEC_ARG3(i_dest = b1 ? i2 : i3); + //Bitwise ops + case OP_INVERT_II: VEC_ARG1(i_dest = ~i1); + case OP_AND_III: VEC_ARG2(i_dest = (i1 & i2)); + case OP_OR_III: VEC_ARG2(i_dest = (i1 | i2)); + case OP_XOR_III: VEC_ARG2(i_dest = (i1 ^ i2)); + + /* Long */ + case OP_CAST_LI: VEC_ARG1(l_dest = (long long)(i1)); + case OP_ONES_LIKE_LL: VEC_ARG0(l_dest = 1); + case OP_NEG_LL: VEC_ARG1(l_dest = -l1); + + case OP_ADD_LLL: VEC_ARG2(l_dest = l1 + l2); + case OP_SUB_LLL: VEC_ARG2(l_dest = l1 - l2); + case OP_MUL_LLL: VEC_ARG2(l_dest = l1 * l2); + case OP_DIV_LLL: VEC_ARG2(l_dest = l2 ? (l1 / l2) : 0); +#if defined _MSC_VER && _MSC_VER < 1800 + case OP_POW_LLL: VEC_ARG2(l_dest = (l2 < 0) ? (1 / l1) : (long long)pow((long double)l1, (long double)l2)); +#else + case OP_POW_LLL: VEC_ARG2(l_dest = (l2 < 0) ? (1 / l1) : (long long)llround(pow((long double)l1, (long double)l2))); +#endif + case OP_MOD_LLL: VEC_ARG2(l_dest = l2 == 0 ? 0 :((l1 % l2) + l2) % l2); + case OP_FLOORDIV_LLL: VEC_ARG2(l_dest = l2 ? (l1 / l2) - ((l1 % l2 != 0) && (l1 < 0 != l2 < 0)): 0); + case OP_LSHIFT_LLL: VEC_ARG2(l_dest = l1 << l2); + case OP_RSHIFT_LLL: VEC_ARG2(l_dest = l1 >> l2); + + case OP_WHERE_LBLL: VEC_ARG3(l_dest = b1 ? l2 : l3); + //Bitwise ops + case OP_INVERT_LL: VEC_ARG1(l_dest = ~l1); + case OP_AND_LLL: VEC_ARG2(l_dest = (l1 & l2)); + case OP_OR_LLL: VEC_ARG2(l_dest = (l1 | l2)); + case OP_XOR_LLL: VEC_ARG2(l_dest = (l1 ^ l2)); + + /* Float */ + case OP_CAST_FI: VEC_ARG1(f_dest = (float)(i1)); + case OP_CAST_FL: VEC_ARG1(f_dest = (float)(l1)); + case OP_ONES_LIKE_FF: VEC_ARG0(f_dest = 1.0); + case OP_NEG_FF: VEC_ARG1(f_dest = -f1); + + case OP_ADD_FFF: VEC_ARG2(f_dest = f1 + f2); + case OP_SUB_FFF: VEC_ARG2(f_dest = f1 - f2); + case OP_MUL_FFF: VEC_ARG2(f_dest = f1 * f2); + case OP_DIV_FFF: +#ifdef USE_VML + VEC_ARG2_VML(vsDiv(BLOCK_SIZE, + (float*)x1, (float*)x2, (float*)dest)); +#else + VEC_ARG2(f_dest = f1 / f2); +#endif + case OP_POW_FFF: +#ifdef USE_VML + VEC_ARG2_VML(vsPow(BLOCK_SIZE, + (float*)x1, (float*)x2, (float*)dest)); +#else + VEC_ARG2(f_dest = powf(f1, f2)); +#endif + case OP_MOD_FFF: VEC_ARG2(f_dest = f1 - floorf(f1/f2) * f2); + case OP_FLOORDIV_FFF: VEC_ARG2(f_dest = floorf(f1/f2)); + + case OP_SQRT_FF: +#ifdef USE_VML + VEC_ARG1_VML(vsSqrt(BLOCK_SIZE, (float*)x1, (float*)dest)); +#else + VEC_ARG1(f_dest = sqrtf(f1)); +#endif + + case OP_WHERE_FBFF: VEC_ARG3(f_dest = b1 ? f2 : f3); + + case OP_FUNC_FFN: +#ifdef USE_VML + VEC_ARG1_VML(functions_ff_vml[arg2](BLOCK_SIZE, + (float*)x1, (float*)dest)); +#else + VEC_ARG1(f_dest = functions_ff[arg2](f1)); +#endif + case OP_FUNC_FFFN: +#ifdef USE_VML + VEC_ARG2_VML(functions_fff_vml[arg3](BLOCK_SIZE, + (float*)x1, (float*)x2, + (float*)dest)); +#else + VEC_ARG2(f_dest = functions_fff[arg3](f1, f2)); +#endif + + /* Double */ + case OP_CAST_DI: VEC_ARG1(d_dest = (double)(i1)); + case OP_CAST_DL: VEC_ARG1(d_dest = (double)(l1)); + case OP_CAST_DF: VEC_ARG1(d_dest = (double)(f1)); + case OP_ONES_LIKE_DD: VEC_ARG0(d_dest = 1.0); + case OP_NEG_DD: VEC_ARG1(d_dest = -d1); + + case OP_ADD_DDD: VEC_ARG2(d_dest = d1 + d2); + case OP_SUB_DDD: VEC_ARG2(d_dest = d1 - d2); + case OP_MUL_DDD: VEC_ARG2(d_dest = d1 * d2); + case OP_DIV_DDD: +#ifdef USE_VML + VEC_ARG2_VML(vdDiv(BLOCK_SIZE, + (double*)x1, (double*)x2, (double*)dest)); +#else + VEC_ARG2(d_dest = d1 / d2); +#endif + case OP_POW_DDD: +#ifdef USE_VML + VEC_ARG2_VML(vdPow(BLOCK_SIZE, + (double*)x1, (double*)x2, (double*)dest)); +#else + VEC_ARG2(d_dest = pow(d1, d2)); +#endif + case OP_MOD_DDD: VEC_ARG2(d_dest = d1 - floor(d1/d2) * d2); + case OP_FLOORDIV_DDD: VEC_ARG2(d_dest = floor(d1/d2)); + + case OP_SQRT_DD: +#ifdef USE_VML + VEC_ARG1_VML(vdSqrt(BLOCK_SIZE, (double*)x1, (double*)dest)); +#else + VEC_ARG1(d_dest = sqrt(d1)); +#endif + + case OP_WHERE_DBDD: VEC_ARG3(d_dest = b1 ? d2 : d3); + + case OP_FUNC_DDN: +#ifdef USE_VML + VEC_ARG1_VML(functions_dd_vml[arg2](BLOCK_SIZE, + (double*)x1, (double*)dest)); +#else + VEC_ARG1(d_dest = functions_dd[arg2](d1)); +#endif + case OP_FUNC_DDDN: +#ifdef USE_VML + VEC_ARG2_VML(functions_ddd_vml[arg3](BLOCK_SIZE, + (double*)x1, (double*)x2, + (double*)dest)); +#else + VEC_ARG2(d_dest = functions_ddd[arg3](d1, d2)); +#endif + + /* Complex */ + case OP_CAST_CI: VEC_ARG1(cr_dest = (double)(i1); + ci_dest = 0); + case OP_CAST_CL: VEC_ARG1(cr_dest = (double)(l1); + ci_dest = 0); + case OP_CAST_CF: VEC_ARG1(cr_dest = f1; + ci_dest = 0); + case OP_CAST_CD: VEC_ARG1(cr_dest = d1; + ci_dest = 0); + case OP_ONES_LIKE_CC: VEC_ARG0(cr_dest = 1; + ci_dest = 0); + case OP_NEG_CC: VEC_ARG1(cr_dest = -c1r; + ci_dest = -c1i); + + case OP_ADD_CCC: VEC_ARG2(cr_dest = c1r + c2r; + ci_dest = c1i + c2i); + case OP_SUB_CCC: VEC_ARG2(cr_dest = c1r - c2r; + ci_dest = c1i - c2i); + case OP_MUL_CCC: VEC_ARG2(da = c1r*c2r - c1i*c2i; + ci_dest = c1r*c2i + c1i*c2r; + cr_dest = da); + case OP_DIV_CCC: +#ifdef USE_VMLXXX /* VML complex division is slower */ + VEC_ARG2_VML(vzDiv(BLOCK_SIZE, (const MKL_Complex16*)x1, + (const MKL_Complex16*)x2, (MKL_Complex16*)dest)); +#else + VEC_ARG2(da = c2r*c2r + c2i*c2i; + db = (c1r*c2r + c1i*c2i) / da; + ci_dest = (c1i*c2r - c1r*c2i) / da; + cr_dest = db); +#endif + case OP_EQ_BCC: VEC_ARG2(b_dest = (c1r == c2r && c1i == c2i)); + case OP_NE_BCC: VEC_ARG2(b_dest = (c1r != c2r || c1i != c2i)); + + case OP_WHERE_CBCC: VEC_ARG3(cr_dest = b1 ? c2r : c3r; + ci_dest = b1 ? c2i : c3i); + case OP_FUNC_CCN: +#ifdef USE_VML + VEC_ARG1_VML(functions_cc_vml[arg2](BLOCK_SIZE, + (const MKL_Complex16*)x1, + (MKL_Complex16*)dest)); +#else + VEC_ARG1(ca.real(c1r); + ca.imag(c1i); + functions_cc[arg2](&ca, &ca); + cr_dest = ca.real(); + ci_dest = ca.imag()); +#endif + case OP_FUNC_CCCN: VEC_ARG2(ca.real(c1r); + ca.imag(c1i); + cb.real(c2r); + cb.imag(c2i); + functions_ccc[arg3](&ca, &cb, &ca); + cr_dest = ca.real(); + ci_dest = ca.imag()); + + case OP_REAL_DC: VEC_ARG1(d_dest = c1r); + case OP_IMAG_DC: VEC_ARG1(d_dest = c1i); + case OP_COMPLEX_CDD: VEC_ARG2(cr_dest = d1; + ci_dest = d2); + + // Boolean return types + case OP_FUNC_BFN: +#ifdef USE_VML + VEC_ARG1_VML(functions_bf_vml[arg2](BLOCK_SIZE, + (float*)x1, (bool*)dest)); +#else + VEC_ARG1(b_dest = functions_bf[arg2](f1)); +#endif + + + case OP_FUNC_BDN: +#ifdef USE_VML + VEC_ARG1_VML(functions_bd_vml[arg2](BLOCK_SIZE, + (double*)x1, (bool*)dest)); +#else + VEC_ARG1(b_dest = functions_bd[arg2](d1)); +#endif + + case OP_FUNC_BCN: +#ifdef USE_VML + VEC_ARG1_VML(functions_bc_vml[arg2](BLOCK_SIZE, + (const MKL_Complex16*)x1, (bool*)dest)); +#else + VEC_ARG1(ca.real(c1r); + ca.imag(c1i); + b_dest = functions_bc[arg2](&ca)); +#endif + + /* Integer return types */ + case OP_FUNC_IIN: +#ifdef USE_VML + VEC_ARG1_VML(functions_ii_vml[arg2](BLOCK_SIZE, + (int*)x1, (int*)dest)); +#else + VEC_ARG1(i_dest = functions_ii[arg2](i1)); +#endif + case OP_FUNC_LLN: +#ifdef USE_VML + VEC_ARG1_VML(functions_ll_vml[arg2](BLOCK_SIZE, + (long*)x1, (long*)dest)); +#else + VEC_ARG1(l_dest = functions_ll[arg2](l1)); +#endif + + /* Reductions */ + case OP_SUM_IIN: VEC_ARG1(i_reduce += i1); + case OP_SUM_LLN: VEC_ARG1(l_reduce += l1); + case OP_SUM_FFN: VEC_ARG1(f_reduce += f1); + case OP_SUM_DDN: VEC_ARG1(d_reduce += d1); + case OP_SUM_CCN: VEC_ARG1(cr_reduce += c1r; + ci_reduce += c1i); + + case OP_PROD_IIN: VEC_ARG1(i_reduce *= i1); + case OP_PROD_LLN: VEC_ARG1(l_reduce *= l1); + case OP_PROD_FFN: VEC_ARG1(f_reduce *= f1); + case OP_PROD_DDN: VEC_ARG1(d_reduce *= d1); + case OP_PROD_CCN: VEC_ARG1(da = cr_reduce*c1r - ci_reduce*c1i; + ci_reduce = cr_reduce*c1i + ci_reduce*c1r; + cr_reduce = da); + + case OP_MIN_IIN: VEC_ARG1(i_reduce = fmin(i_reduce, i1)); + case OP_MIN_LLN: VEC_ARG1(l_reduce = fmin(l_reduce, l1)); + case OP_MIN_FFN: VEC_ARG1(f_reduce = fmin(f_reduce, f1)); + case OP_MIN_DDN: VEC_ARG1(d_reduce = fmin(d_reduce, d1)); + + case OP_MAX_IIN: VEC_ARG1(i_reduce = fmax(i_reduce, i1)); + case OP_MAX_LLN: VEC_ARG1(l_reduce = fmax(l_reduce, l1)); + case OP_MAX_FFN: VEC_ARG1(f_reduce = fmax(f_reduce, f1)); + case OP_MAX_DDN: VEC_ARG1(d_reduce = fmax(d_reduce, d1)); + + default: + *pc_error = pc; + return -3; + break; + } + } + +#ifndef NO_OUTPUT_BUFFERING + // If output buffering was necessary, copy the buffer to the output + if(params.out_buffer != NULL) { + memcpy(iter_dataptr[0], params.out_buffer, params.memsizes[0] * BLOCK_SIZE); + } +#endif // NO_OUTPUT_BUFFERING + +#undef VEC_LOOP +#undef VEC_ARG1 +#undef VEC_ARG2 +#undef VEC_ARG3 + +#undef i_reduce +#undef l_reduce +#undef f_reduce +#undef d_reduce +#undef cr_reduce +#undef ci_reduce +#undef b_dest +#undef i_dest +#undef l_dest +#undef f_dest +#undef d_dest +#undef cr_dest +#undef ci_dest +#undef s_dest +#undef b1 +#undef i1 +#undef l1 +#undef f1 +#undef d1 +#undef c1r +#undef c1i +#undef s1 +#undef b2 +#undef i2 +#undef l2 +#undef f2 +#undef d2 +#undef c2r +#undef c2i +#undef s2 +#undef b3 +#undef i3 +#undef l3 +#undef f3 +#undef d3 +#undef c3r +#undef c3i +#undef s3 +} + +/* +Local Variables: + c-basic-offset: 4 +End: +*/ diff --git a/venv/Lib/site-packages/numexpr/interpreter.cp311-win_amd64.pyd b/venv/Lib/site-packages/numexpr/interpreter.cp311-win_amd64.pyd new file mode 100644 index 0000000..4ffb544 Binary files /dev/null and b/venv/Lib/site-packages/numexpr/interpreter.cp311-win_amd64.pyd differ diff --git a/venv/Lib/site-packages/numexpr/interpreter.cpp b/venv/Lib/site-packages/numexpr/interpreter.cpp new file mode 100644 index 0000000..409ad3d --- /dev/null +++ b/venv/Lib/site-packages/numexpr/interpreter.cpp @@ -0,0 +1,1585 @@ +/********************************************************************* + Numexpr - Fast numerical array expression evaluator for NumPy. + + License: MIT + Author: See AUTHORS.txt + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#include "module.hpp" +#include +#include +#include +#include +#include + +#include "numexpr_config.hpp" +#include "complex_functions.hpp" +#include "interpreter.hpp" +#include "numexpr_object.hpp" +#include "bespoke_functions.hpp" + +#ifdef _MSC_VER +/* Some missing symbols and functions for Win */ +#define fmax max +#define fmin min +#define NE_INFINITY (DBL_MAX+DBL_MAX) +#define NE_NAN (INFINITY-INFINITY) +#else +#define NE_INFINITY INFINITY +#define NE_NAN NAN +#endif + +#ifndef SIZE_MAX +#define SIZE_MAX ((size_t)-1) +#endif + +#define RETURN_TYPE char* + +// AVAILABLE(Haystack, Haystack_Len, J, Needle_Len) +// A macro that returns nonzero if there are at least Needle_Len +// bytes left starting at Haystack[J]. +// Haystack is 'unsigned char *', Haystack_Len, J, and Needle_Len +// are 'size_t'; Haystack_Len is an lvalue. For NUL-terminated +// searches, Haystack_Len can be modified each iteration to avoid +// having to compute the end of Haystack up front. + +#define AVAILABLE(Haystack, Haystack_Len, J, Needle_Len) \ + ((Haystack_Len) >= (J) + (Needle_Len)) + +// To allow building with NumPy<2 locally define the new NumPy macros: +#if NPY_ABI_VERSION < 0x02000000 + #define PyDataType_ELSIZE(descr) ((descr)->elsize) + #define PyDataType_SET_ELSIZE(descr, size) (descr)->elsize = size +#endif + +#include "str-two-way.hpp" + +#ifdef DEBUG +#define DEBUG_TEST 1 +#else +#define DEBUG_TEST 0 +#endif + + +using namespace std; + +// Global state +thread_data th_params; + +/* This file and interp_body should really be generated from a description of + the opcodes -- there's too much repetition here for manually editing */ + + +/* bit of a misnomer; includes the return value. */ +#define NUMEXPR_MAX_ARGS 4 + +static char op_signature_table[][NUMEXPR_MAX_ARGS] = { +#define Tb 'b' +#define Ti 'i' +#define Tl 'l' +#define Tf 'f' +#define Td 'd' +#define Tc 'c' +#define Ts 's' +#define Tn 'n' +#define T0 0 +#define OPCODE(n, e, ex, rt, a1, a2, a3) {rt, a1, a2, a3}, +#include "opcodes.hpp" +#undef OPCODE +#undef Tb +#undef Ti +#undef Tl +#undef Tf +#undef Td +#undef Tc +#undef Ts +#undef Tn +#undef T0 +}; + +/* returns the sig of the nth op, '\0' if no more ops -1 on failure */ +static int +op_signature(int op, unsigned int n) { + if (n >= NUMEXPR_MAX_ARGS) { + return 0; + } + if (op < 0 || op > OP_END) { + return -1; + } + return op_signature_table[op][n]; +} + + + +/* + To add a function to the lookup table, add to FUNC_CODES (first + group is 1-arg functions, second is 2-arg functions), also to + functions_f or functions_ff as appropriate. Finally, use add_func + down below to add to funccodes. Functions with more arguments + aren't implemented at present, but should be easy; just copy the 1- + or 2-arg case. + + Some functions (for example, sqrt) are repeated in this table that + are opcodes, but there's no problem with that as the compiler + selects opcodes over functions, and this makes it easier to compare + opcode vs. function speeds. +*/ + +typedef float (*FuncFFPtr)(float); + +#ifdef _WIN32 +inline float signf2(float x) { // needed to wait for bespoke_functions to be loaded + return signf(x); +} +FuncFFPtr functions_ff[] = { +#define FUNC_FF(fop, s, f, f_win32, ...) f_win32, +#include "functions.hpp" +#undef FUNC_FF +}; +#else +FuncFFPtr functions_ff[] = { +#define FUNC_FF(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_FF +}; +#endif + +#ifdef USE_VML +typedef void (*FuncFFPtr_vml)(MKL_INT, const float*, float*); +FuncFFPtr_vml functions_ff_vml[] = { +#define FUNC_FF(fop, s, f, f_win32, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_FF +}; +#endif + +typedef float (*FuncFFFPtr)(float, float); + +#ifdef _WIN32 +FuncFFFPtr functions_fff[] = { +#define FUNC_FFF(fop, s, f, f_win32, ...) f_win32, +#include "functions.hpp" +#undef FUNC_FFF +}; +#else +FuncFFFPtr functions_fff[] = { +#define FUNC_FFF(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_FFF +}; +#endif + +#ifdef USE_VML +typedef void (*FuncFFFPtr_vml)(MKL_INT, const float*, const float*, float*); +FuncFFFPtr_vml functions_fff_vml[] = { +#define FUNC_FFF(fop, s, f, f_win32, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_FFF +}; +#endif + +typedef double (*FuncDDPtr)(double); + +FuncDDPtr functions_dd[] = { +#define FUNC_DD(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_DD +}; + +// Boolean output functions +typedef bool (*FuncBFPtr)(float); +#ifdef _WIN32 +FuncBFPtr functions_bf[] = { +#define FUNC_BF(fop, s, f, f_win32, ...) f_win32, +#include "functions.hpp" +#undef FUNC_BF +}; +#else +FuncBFPtr functions_bf[] = { +#define FUNC_BF(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_BF +}; +#endif + +#ifdef USE_VML +typedef void (*FuncBFPtr_vml)(MKL_INT, const float*, bool*); +FuncBFPtr_vml functions_bf_vml[] = { +#define FUNC_BF(fop, s, f, f_win32, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_BF +}; +#endif + +typedef bool (*FuncBDPtr)(double); +FuncBDPtr functions_bd[] = { +#define FUNC_BD(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_BD +}; + +#ifdef USE_VML +typedef void (*FuncBDPtr_vml)(MKL_INT, const double*, bool*); +FuncBDPtr_vml functions_bd_vml[] = { +#define FUNC_BD(fop, s, f, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_BD +}; +#endif + +typedef bool (*FuncBCPtr)(std::complex*); +FuncBCPtr functions_bc[] = { +#define FUNC_BC(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_BC +}; + + +#ifdef USE_VML +typedef void (*FuncBCPtr_vml)(MKL_INT, const MKL_Complex16[], bool*); +FuncBCPtr_vml functions_bc_vml[] = { +#define FUNC_BC(fop, s, f, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_BC +}; +#endif + +#ifdef USE_VML +typedef void (*FuncDDPtr_vml)(MKL_INT, const double*, double*); +FuncDDPtr_vml functions_dd_vml[] = { +#define FUNC_DD(fop, s, f, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_DD +}; +#endif + +typedef double (*FuncDDDPtr)(double, double); + +FuncDDDPtr functions_ddd[] = { +#define FUNC_DDD(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_DDD +}; + +#ifdef USE_VML +typedef void (*FuncDDDPtr_vml)(MKL_INT, const double*, const double*, double*); +FuncDDDPtr_vml functions_ddd_vml[] = { +#define FUNC_DDD(fop, s, f, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_DDD +}; +#endif + + + +typedef void (*FuncCCPtr)(std::complex*, std::complex*); + +FuncCCPtr functions_cc[] = { +#define FUNC_CC(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_CC +}; + +#ifdef USE_VML +typedef void (*FuncCCPtr_vml)(MKL_INT, const MKL_Complex16[], MKL_Complex16[]); +FuncCCPtr_vml functions_cc_vml[] = { +#define FUNC_CC(fop, s, f, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_CC +}; +#endif + + +typedef void (*FuncCCCPtr)(std::complex*, std::complex*, std::complex*); + +FuncCCCPtr functions_ccc[] = { +#define FUNC_CCC(fop, s, f) f, +#include "functions.hpp" +#undef FUNC_CCC +}; + +/* integer return types*/ +typedef int (*FuncIIPtr)(int); +FuncIIPtr functions_ii[] = { +#define FUNC_II(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_II +}; + +#ifdef USE_VML +typedef void (*FuncIIPtr_vml)(MKL_INT, const int*, int*); +FuncIIPtr_vml functions_ii_vml[] = { +#define FUNC_II(fop, s, f, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_II +}; +#endif + +typedef long (*FuncLLPtr)(long); +FuncLLPtr functions_ll[] = { +#define FUNC_LL(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_LL +}; + +#ifdef USE_VML +typedef void (*FuncLLPtr_vml)(MKL_INT, const long*, long*); +FuncLLPtr_vml functions_ll_vml[] = { +#define FUNC_LL(fop, s, f, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_LL +}; +#endif + + +char +get_return_sig(PyObject* program) +{ // use unsigned chars to match OPCODE table and allow OPCODE > 127 + int sig; + unsigned char last_opcode; + Py_ssize_t end = PyBytes_Size(program); + unsigned char *program_str = (unsigned char *)PyBytes_AS_STRING(program); + + do { + end -= 4; + if (end < 0) return 'X'; + last_opcode = program_str[end]; + } + while (last_opcode == OP_NOOP); + + sig = op_signature(last_opcode, 0); + if (sig <= 0) { + return 'X'; + } else { + return (char)sig; + } +} + +static int +typecode_from_char(char c) +{ + switch (c) { + case 'b': return NPY_BOOL; + case 'i': return NPY_INT; + case 'l': return NPY_LONGLONG; + case 'f': return NPY_FLOAT; + case 'd': return NPY_DOUBLE; + case 'c': return NPY_CDOUBLE; + case 's': return NPY_STRING; + default: + PyErr_SetString(PyExc_TypeError, "signature value not in 'bilfdcs'"); + return -1; + } +} + +static int +last_opcode(PyObject *program_object) { + Py_ssize_t n; + unsigned char *program; + PyBytes_AsStringAndSize(program_object, (char **)&program, &n); + return program[n-4]; +} + +static int +get_reduction_axis(PyObject* program) { + Py_ssize_t end = PyBytes_Size(program); + int axis = ((unsigned char *)PyBytes_AS_STRING(program))[end-1]; + if (axis != 255 && axis >= NPY_MAXDIMS) + axis = NPY_MAXDIMS - axis; + return axis; +} + + + +int +check_program(NumExprObject *self) +{ + unsigned char *program; + Py_ssize_t prog_len, n_buffers, n_inputs; + int pc, arg, argloc, argno, sig; + char *fullsig, *signature; + + if (PyBytes_AsStringAndSize(self->program, (char **)&program, + &prog_len) < 0) { + PyErr_Format(PyExc_RuntimeError, "invalid program: can't read program"); + return -1; + } + if (prog_len % 4 != 0) { + PyErr_Format(PyExc_RuntimeError, "invalid program: prog_len mod 4 != 0"); + return -1; + } + if (PyBytes_AsStringAndSize(self->fullsig, (char **)&fullsig, + &n_buffers) < 0) { + PyErr_Format(PyExc_RuntimeError, "invalid program: can't read fullsig"); + return -1; + } + if (PyBytes_AsStringAndSize(self->signature, (char **)&signature, + &n_inputs) < 0) { + PyErr_Format(PyExc_RuntimeError, "invalid program: can't read signature"); + return -1; + } + if (n_buffers > 255) { + PyErr_Format(PyExc_RuntimeError, "invalid program: too many buffers"); + return -1; + } + for (pc = 0; pc < prog_len; pc += 4) { + unsigned int op = program[pc]; + if (op == OP_NOOP) { + continue; + } + if ((op >= OP_REDUCTION) && pc != prog_len-4) { + PyErr_Format(PyExc_RuntimeError, + "invalid program: reduction operations must occur last"); + return -1; + } + for (argno = 0; ; argno++) { + sig = op_signature(op, argno); + if (sig == -1) { + PyErr_Format(PyExc_RuntimeError, "invalid program: illegal opcode at %i (%d)", pc, op); + return -1; + } + if (sig == 0) break; + if (argno < 3) { + argloc = pc+argno+1; + } + if (argno >= 3) { + if (pc + 1 >= prog_len) { + PyErr_Format(PyExc_RuntimeError, "invalid program: double opcode (%c) at end (%i)", pc, sig); + return -1; + } + argloc = pc+argno+2; + } + arg = program[argloc]; + + if (sig != 'n' && ((arg >= n_buffers) || (arg < 0))) { + PyErr_Format(PyExc_RuntimeError, "invalid program: buffer out of range (%i) at %i", arg, argloc); + return -1; + } + if (sig == 'n') { + if (op == OP_FUNC_FFN) { + if (arg < 0 || arg >= FUNC_FF_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } else if (op == OP_FUNC_FFFN) { + if (arg < 0 || arg >= FUNC_FFF_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } else if (op == OP_FUNC_DDN) { + if (arg < 0 || arg >= FUNC_DD_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } else if (op == OP_FUNC_DDDN) { + if (arg < 0 || arg >= FUNC_DDD_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } else if (op == OP_FUNC_CCN) { + if (arg < 0 || arg >= FUNC_CC_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } else if (op == OP_FUNC_CCCN) { + if (arg < 0 || arg >= FUNC_CCC_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } + else if (op == OP_FUNC_BDN) { + if (arg < 0 || arg >= FUNC_BD_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } + else if (op == OP_FUNC_BFN) { + if (arg < 0 || arg >= FUNC_BF_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } + else if (op == OP_FUNC_BCN) { + if (arg < 0 || arg >= FUNC_BC_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } + else if (op == OP_FUNC_IIN) { + if (arg < 0 || arg >= FUNC_II_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } + else if (op == OP_FUNC_LLN) { + if (arg < 0 || arg >= FUNC_LL_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } + else if (op >= OP_REDUCTION) { + ; + } else { + PyErr_Format(PyExc_RuntimeError, "invalid program: internal checker error processing %i", argloc); + return -1; + } + /* The next is to avoid problems with the ('i','l') duality, + specially in 64-bit platforms */ + } else if (((sig == 'l') && (fullsig[arg] == 'i')) || + ((sig == 'i') && (fullsig[arg] == 'l'))) { + ; + } else if (sig != fullsig[arg]) { + PyErr_Format(PyExc_RuntimeError, + "invalid : opcode signature doesn't match buffer (%c vs %c) at %i", sig, fullsig[arg], argloc); + return -1; + } + } + } + return 0; +} + + + + +struct index_data { + int count; + int size; + int findex; + npy_intp *shape; + npy_intp *strides; + int *index; + char *buffer; +}; + +// BOUNDS_CHECK is used in interp_body.cpp +#define DO_BOUNDS_CHECK 1 + +#if DO_BOUNDS_CHECK +#define BOUNDS_CHECK(arg) if ((arg) >= params.r_end) { \ + *pc_error = pc; \ + return -2; \ + } +#else +#define BOUNDS_CHECK(arg) +#endif + +int +stringcmp(const char *s1, const char *s2, npy_intp maxlen1, npy_intp maxlen2) +{ + npy_intp maxlen, nextpos; + /* Point to this when the end of a string is found, + to simulate infinte trailing NULL characters. */ + const char null = 0; + + // First check if some of the operands is the empty string and if so, + // just check that the first char of the other is the NULL one. + // Fixes #121 + if (maxlen2 == 0) return *s1 != null; + if (maxlen1 == 0) return *s2 != null; + + maxlen = (maxlen1 > maxlen2) ? maxlen1 : maxlen2; + for (nextpos = 1; nextpos <= maxlen; nextpos++) { + if (*s1 < *s2) + return -1; + if (*s1 > *s2) + return +1; + s1 = (nextpos >= maxlen1) ? &null : s1+1; + s2 = (nextpos >= maxlen2) ? &null : s2+1; + } + return 0; +} + + +/* contains(str1, str2) function for string columns. + + Based on Newlib/strstr.c. */ + +int +stringcontains(const char *haystack_start, const char *needle_start, npy_intp max_haystack_len, npy_intp max_needle_len) +{ + // needle_len - Length of needle. + // haystack_len - Known minimum length of haystack. + size_t needle_len = (size_t)max_needle_len; + size_t haystack_len = (size_t)max_haystack_len; + + const char *haystack = haystack_start; + const char *needle = needle_start; + bool ok = true; /* needle is prefix of haystack. */ + char *res; + + size_t si = 0; + size_t min_len = min(needle_len, haystack_len); + while (si < min_len && *haystack && *needle) + { + ok &= *haystack++ == *needle++; + si++; + } + + /* check needle is prefix of haystack and calc needle length */ + if (si == needle_len || *needle == 0) { + if (ok) + return 1; + needle_len = si; + } else { + /* haystack less needle */ + return 0; + } + + /* calc haystack length */ + while (si < haystack_len && *haystack) { + haystack++; + si++; + } + haystack_len = si; + + if (needle_len < LONG_NEEDLE_THRESHOLD) + { + res = two_way_short_needle((const unsigned char *)haystack_start, haystack_len, + (const unsigned char *)needle_start, needle_len); + } else { + res = two_way_long_needle((const unsigned char *)haystack_start, haystack_len, + (const unsigned char *)needle_start, needle_len); + } + return res != NULL ? 1 : 0; +} + + +/* Get space for VM temporary registers */ +int get_temps_space(const vm_params& params, char **mem, size_t block_size) +{ + int r, k = 1 + params.n_inputs + params.n_constants; + + for (r = k; r < k + params.n_temps; r++) { + mem[r] = (char *)malloc(block_size * params.memsizes[r]); + if (mem[r] == NULL) { + return -1; + } + } + return 0; +} + +/* Free space for VM temporary registers */ +void free_temps_space(const vm_params& params, char **mem) +{ + int r, k = 1 + params.n_inputs + params.n_constants; + + for (r = k; r < k + params.n_temps; r++) { + free(mem[r]); + } +} + +/* Serial/parallel task iterator version of the VM engine */ +int vm_engine_iter_task(NpyIter *iter, npy_intp *memsteps, + const vm_params& params, + int *pc_error, char **errmsg) +{ + char **mem = params.mem; + NpyIter_IterNextFunc *iternext; + npy_intp block_size, *size_ptr; + char **iter_dataptr; + npy_intp *iter_strides; + + iternext = NpyIter_GetIterNext(iter, errmsg); + if (iternext == NULL) { + return -1; + } + + size_ptr = NpyIter_GetInnerLoopSizePtr(iter); + iter_dataptr = NpyIter_GetDataPtrArray(iter); + iter_strides = NpyIter_GetInnerStrideArray(iter); + + /* + * First do all the blocks with a compile-time fixed size. + * This makes a big difference (30-50% on some tests). + */ + block_size = *size_ptr; + while (block_size == BLOCK_SIZE1) { +#define REDUCTION_INNER_LOOP +#define BLOCK_SIZE BLOCK_SIZE1 +#include "interp_body.cpp" +#undef BLOCK_SIZE +#undef REDUCTION_INNER_LOOP + iternext(iter); + block_size = *size_ptr; + } + + /* Then finish off the rest */ + if (block_size > 0) do { + block_size = *size_ptr; +#define REDUCTION_INNER_LOOP +#define BLOCK_SIZE block_size +#include "interp_body.cpp" +#undef BLOCK_SIZE +#undef REDUCTION_INNER_LOOP + } while (iternext(iter)); + + return 0; +} + +static int +vm_engine_iter_outer_reduce_task(NpyIter *iter, npy_intp *memsteps, + const vm_params& params, int *pc_error, char **errmsg) +{ + char **mem = params.mem; + NpyIter_IterNextFunc *iternext; + npy_intp block_size, *size_ptr; + char **iter_dataptr; + npy_intp *iter_strides; + + iternext = NpyIter_GetIterNext(iter, errmsg); + if (iternext == NULL) { + return -1; + } + + size_ptr = NpyIter_GetInnerLoopSizePtr(iter); + iter_dataptr = NpyIter_GetDataPtrArray(iter); + iter_strides = NpyIter_GetInnerStrideArray(iter); + + /* + * First do all the blocks with a compile-time fixed size. + * This makes a big difference (30-50% on some tests). + */ + block_size = *size_ptr; + while (block_size == BLOCK_SIZE1) { +#define BLOCK_SIZE BLOCK_SIZE1 +#define NO_OUTPUT_BUFFERING // Because it's a reduction +#include "interp_body.cpp" +#undef NO_OUTPUT_BUFFERING +#undef BLOCK_SIZE + iternext(iter); + block_size = *size_ptr; + } + + /* Then finish off the rest */ + if (block_size > 0) do { + block_size = *size_ptr; +#define BLOCK_SIZE block_size +#define NO_OUTPUT_BUFFERING // Because it's a reduction +#include "interp_body.cpp" +#undef NO_OUTPUT_BUFFERING +#undef BLOCK_SIZE + } while (iternext(iter)); + + return 0; +} + +/* Parallel iterator version of VM engine */ +static int +vm_engine_iter_parallel(NpyIter *iter, const vm_params& params, + bool need_output_buffering, int *pc_error, + char **errmsg) +{ + int i, ret = -1; + npy_intp numblocks, taskfactor; + + if (errmsg == NULL) { + return -1; + } + + /* Ensure only one parallel job is running at a time (otherwise + the global th_params get corrupted). */ + Py_BEGIN_ALLOW_THREADS; + pthread_mutex_lock(&gs.parallel_mutex); + Py_END_ALLOW_THREADS; + + /* Populate parameters for worker threads */ + NpyIter_GetIterIndexRange(iter, &th_params.start, &th_params.vlen); + /* + * Try to make it so each thread gets 16 tasks. This is a compromise + * between 1 task per thread and one block per task. + */ + taskfactor = 16*BLOCK_SIZE1*gs.nthreads; + numblocks = (th_params.vlen - th_params.start + taskfactor - 1) / + taskfactor; + th_params.block_size = numblocks * BLOCK_SIZE1; + + th_params.params = params; + th_params.need_output_buffering = need_output_buffering; + th_params.ret_code = 0; + th_params.pc_error = pc_error; + th_params.errmsg = errmsg; + th_params.iter[0] = iter; + /* Make one copy for each additional thread */ + for (i = 1; i < gs.nthreads; ++i) { + th_params.iter[i] = NpyIter_Copy(iter); + if (th_params.iter[i] == NULL) { + --i; + for (; i > 0; --i) { + NpyIter_Deallocate(th_params.iter[i]); + } + goto end; + } + } + th_params.memsteps[0] = params.memsteps; + /* Make one copy of memsteps for each additional thread */ + for (i = 1; i < gs.nthreads; ++i) { + th_params.memsteps[i] = PyMem_New(npy_intp, + 1 + params.n_inputs + params.n_constants + params.n_temps); + if (th_params.memsteps[i] == NULL) { + --i; + for (; i > 0; --i) { + PyMem_Del(th_params.memsteps[i]); + } + for (i = 0; i < gs.nthreads; ++i) { + NpyIter_Deallocate(th_params.iter[i]); + } + goto end; + } + memcpy(th_params.memsteps[i], th_params.memsteps[0], + sizeof(npy_intp) * + (1 + params.n_inputs + params.n_constants + params.n_temps)); + } + + Py_BEGIN_ALLOW_THREADS; + + /* Synchronization point for all threads (wait for initialization) */ + pthread_mutex_lock(&gs.count_threads_mutex); + if (gs.count_threads < gs.nthreads) { + gs.count_threads++; + /* Beware of spurious wakeups. See issue pydata/numexpr#306. */ + do { + pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex); + } while (!gs.barrier_passed); + } + else { + gs.barrier_passed = 1; + pthread_cond_broadcast(&gs.count_threads_cv); + } + pthread_mutex_unlock(&gs.count_threads_mutex); + + /* Synchronization point for all threads (wait for finalization) */ + pthread_mutex_lock(&gs.count_threads_mutex); + if (gs.count_threads > 0) { + gs.count_threads--; + do { + pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex); + } while (gs.barrier_passed); + } + else { + gs.barrier_passed = 0; + pthread_cond_broadcast(&gs.count_threads_cv); + } + pthread_mutex_unlock(&gs.count_threads_mutex); + + Py_END_ALLOW_THREADS; + + /* Deallocate all the iterator and memsteps copies */ + for (i = 1; i < gs.nthreads; ++i) { + NpyIter_Deallocate(th_params.iter[i]); + PyMem_Del(th_params.memsteps[i]); + } + + ret = th_params.ret_code; + +end: + pthread_mutex_unlock(&gs.parallel_mutex); + return ret; +} + +static int +run_interpreter(NumExprObject *self, NpyIter *iter, NpyIter *reduce_iter, + bool reduction_outer_loop, bool need_output_buffering, + int *pc_error) +{ + int r; + Py_ssize_t plen; + vm_params params; + char *errmsg = NULL; + + *pc_error = -1; + if (PyBytes_AsStringAndSize(self->program, (char **)&(params.program), + &plen) < 0) { + return -1; + } + + params.prog_len = (int)plen; + params.output = NULL; + params.inputs = NULL; + params.index_data = NULL; + params.n_inputs = self->n_inputs; + params.n_constants = self->n_constants; + params.n_temps = self->n_temps; + params.mem = self->mem; + params.memsteps = self->memsteps; + params.memsizes = self->memsizes; + params.r_end = (int)PyBytes_Size(self->fullsig); + params.out_buffer = NULL; + + if ((gs.nthreads == 1) || gs.force_serial) { + // Can do it as one "task" + if (reduce_iter == NULL) { + // Allocate memory for output buffering if needed + vector out_buffer(need_output_buffering ? + (self->memsizes[0] * BLOCK_SIZE1) : 0); + params.out_buffer = need_output_buffering ? &out_buffer[0] : NULL; + // Reset the iterator to allocate its buffers + if(NpyIter_Reset(iter, NULL) != NPY_SUCCEED) { + return -1; + } + get_temps_space(params, params.mem, BLOCK_SIZE1); + Py_BEGIN_ALLOW_THREADS; + r = vm_engine_iter_task(iter, params.memsteps, + params, pc_error, &errmsg); + Py_END_ALLOW_THREADS; + free_temps_space(params, params.mem); + } + else { + if (reduction_outer_loop) { + char **dataptr; + NpyIter_IterNextFunc *iternext; + + dataptr = NpyIter_GetDataPtrArray(reduce_iter); + iternext = NpyIter_GetIterNext(reduce_iter, NULL); + if (iternext == NULL) { + return -1; + } + + get_temps_space(params, params.mem, BLOCK_SIZE1); + Py_BEGIN_ALLOW_THREADS; + do { + r = NpyIter_ResetBasePointers(iter, dataptr, &errmsg); + if (r >= 0) { + r = vm_engine_iter_outer_reduce_task(iter, + params.memsteps, params, + pc_error, &errmsg); + } + if (r < 0) { + break; + } + } while (iternext(reduce_iter)); + Py_END_ALLOW_THREADS; + free_temps_space(params, params.mem); + } + else { + char **dataptr; + NpyIter_IterNextFunc *iternext; + + dataptr = NpyIter_GetDataPtrArray(iter); + iternext = NpyIter_GetIterNext(iter, NULL); + if (iternext == NULL) { + return -1; + } + + get_temps_space(params, params.mem, BLOCK_SIZE1); + Py_BEGIN_ALLOW_THREADS; + do { + r = NpyIter_ResetBasePointers(reduce_iter, dataptr, + &errmsg); + if (r >= 0) { + r = vm_engine_iter_task(reduce_iter, params.memsteps, + params, pc_error, &errmsg); + } + if (r < 0) { + break; + } + } while (iternext(iter)); + Py_END_ALLOW_THREADS; + free_temps_space(params, params.mem); + } + } + } + else { + if (reduce_iter == NULL) { + r = vm_engine_iter_parallel(iter, params, need_output_buffering, + pc_error, &errmsg); + } + else { + errmsg = (char *) "Parallel engine doesn't support reduction yet"; + r = -1; + } + } + + if (r < 0 && errmsg != NULL) { + PyErr_SetString(PyExc_RuntimeError, errmsg); + } + + return 0; +} + +static int +run_interpreter_const(NumExprObject *self, char *output, int *pc_error) +{ + vm_params params; + Py_ssize_t plen; + char **mem; + npy_intp *memsteps; + + *pc_error = -1; + if (PyBytes_AsStringAndSize(self->program, (char **)&(params.program), + &plen) < 0) { + return -1; + } + if (self->n_inputs != 0) { + return -1; + } + params.prog_len = (int)plen; + params.output = output; + params.inputs = NULL; + params.index_data = NULL; + params.n_inputs = self->n_inputs; + params.n_constants = self->n_constants; + params.n_temps = self->n_temps; + params.mem = self->mem; + memsteps = self->memsteps; + params.memsizes = self->memsizes; + params.r_end = (int)PyBytes_Size(self->fullsig); + + mem = params.mem; + get_temps_space(params, mem, 1); +#define SINGLE_ITEM_CONST_LOOP +#define BLOCK_SIZE 1 +#define NO_OUTPUT_BUFFERING // Because it's constant +#include "interp_body.cpp" +#undef NO_OUTPUT_BUFFERING +#undef BLOCK_SIZE +#undef SINGLE_ITEM_CONST_LOOP + free_temps_space(params, mem); + + return 0; +} + +PyObject * +NumExpr_run(NumExprObject *self, PyObject *args, PyObject *kwds) +{ + PyArrayObject *operands[NE_MAXARGS]; + PyArray_Descr *dtypes[NE_MAXARGS], **dtypes_tmp; + PyObject *tmp, *ret; + npy_uint32 op_flags[NE_MAXARGS]; + NPY_CASTING casting = NPY_SAFE_CASTING; + NPY_ORDER order = NPY_KEEPORDER; + unsigned int i, n_inputs; + int r, pc_error = 0; + int reduction_axis = -1; + npy_intp reduction_size = -1; // For #277 change this 1 -> -1 to be in-line with NumPy 1.8, +#ifdef USE_VML + int ex_uses_vml = 0; +#endif + int is_reduction = 0; + bool reduction_outer_loop = false, need_output_buffering = false, full_reduction = false; + + // To specify axes when doing a reduction + int op_axes_values[NE_MAXARGS][NPY_MAXDIMS], + op_axes_reduction_values[NE_MAXARGS]; + int *op_axes_ptrs[NPY_MAXDIMS]; + int oa_ndim = 0; + int **op_axes = NULL; + + NpyIter *iter = NULL, *reduce_iter = NULL; + + // Check whether we need to restart threads + if (!gs.init_threads_done || gs.pid != getpid()) { + numexpr_set_nthreads(gs.nthreads); + } + + // Don't force serial mode by default + gs.force_serial = 0; + + // Check whether there's a reduction as the final step + is_reduction = last_opcode(self->program) > OP_REDUCTION; + + n_inputs = (int)PyTuple_Size(args); + if (PyBytes_Size(self->signature) != n_inputs) { + return PyErr_Format(PyExc_ValueError, + "number of inputs doesn't match program"); + } + else if (n_inputs+1 > NPY_MAXARGS) { + return PyErr_Format(PyExc_ValueError, + "too many inputs"); + } + + memset(operands, 0, sizeof(operands)); + memset(dtypes, 0, sizeof(dtypes)); + + if (kwds && PyDict_Size(kwds) > 0) { + tmp = PyDict_GetItemString(kwds, "casting"); // borrowed ref + if (tmp != NULL && !PyArray_CastingConverter(tmp, &casting)) { + return NULL; + } + tmp = PyDict_GetItemString(kwds, "order"); // borrowed ref + if (tmp != NULL && !PyArray_OrderConverter(tmp, &order)) { + return NULL; + } + tmp = PyDict_GetItemString(kwds, "ex_uses_vml"); // borrowed ref + if (tmp == NULL) { + return PyErr_Format(PyExc_ValueError, + "ex_uses_vml parameter is required"); + } +#ifdef USE_VML + if (tmp == Py_True) { + ex_uses_vml = 1; + } +#endif + // borrowed ref + operands[0] = (PyArrayObject *)PyDict_GetItemString(kwds, "out"); + if (operands[0] != NULL) { + if ((PyObject *)operands[0] == Py_None) { + operands[0] = NULL; + } + else if (!PyArray_Check(operands[0])) { + return PyErr_Format(PyExc_ValueError, + "out keyword parameter is not an array"); + } + else { + Py_INCREF(operands[0]); + } + } + } + + for (i = 0; i < n_inputs; i++) { + PyObject *o = PyTuple_GET_ITEM(args, i); // borrowed ref + PyObject *a; + char c = PyBytes_AS_STRING(self->signature)[i]; + int typecode = typecode_from_char(c); + // Convert it if it's not an array + if (!PyArray_Check(o)) { + if (typecode == -1) goto fail; + a = PyArray_FROM_OTF(o, typecode, NPY_ARRAY_NOTSWAPPED); + } + else { + Py_INCREF(o); + a = o; + } + operands[i+1] = (PyArrayObject *)a; + dtypes[i+1] = PyArray_DescrFromType(typecode); + + if (operands[0] != NULL) { + // Check for the case where "out" is one of the inputs + // TODO: Probably should deal with the general overlap case, + // but NumPy ufuncs don't do that yet either. + if (PyArray_DATA(operands[0]) == PyArray_DATA(operands[i+1])) { + need_output_buffering = true; + } + } + + if (operands[i+1] == NULL || dtypes[i+1] == NULL) { + goto fail; + } + op_flags[i+1] = NPY_ITER_READONLY| +#ifdef USE_VML + (ex_uses_vml ? (NPY_ITER_CONTIG|NPY_ITER_ALIGNED) : 0)| +#endif +#ifndef USE_UNALIGNED_ACCESS + NPY_ITER_ALIGNED| +#endif + NPY_ITER_NBO + ; + } + + if (is_reduction) { + // A reduction can not result in a string, + // so we don't need to worry about item sizes here. + char retsig = get_return_sig(self->program); + reduction_axis = get_reduction_axis(self->program); + + // Need to set up op_axes for the non-reduction part + if (reduction_axis != 255) { + // Get the number of broadcast dimensions + for (i = 0; i < n_inputs; ++i) { + int ndim = PyArray_NDIM(operands[i+1]); + if (ndim > oa_ndim) { + oa_ndim = ndim; + } + } + if (reduction_axis < 0 || reduction_axis >= oa_ndim) { + PyErr_Format(PyExc_ValueError, + "reduction axis is out of bounds"); + goto fail; + } + // Fill in the op_axes + op_axes_ptrs[0] = NULL; + op_axes_reduction_values[0] = -1; + for (i = 0; i < n_inputs; ++i) { + int j = 0, idim, ndim = PyArray_NDIM(operands[i+1]); + for (idim = 0; idim < oa_ndim-ndim; ++idim) { + if (idim != reduction_axis) { + op_axes_values[i+1][j++] = -1; + } + else { + op_axes_reduction_values[i+1] = -1; + } + } + for (idim = oa_ndim-ndim; idim < oa_ndim; ++idim) { + if (idim != reduction_axis) { + op_axes_values[i+1][j++] = idim-(oa_ndim-ndim); + } + else { + npy_intp size = PyArray_DIM(operands[i+1], + idim-(oa_ndim-ndim)); + if (size > reduction_size) { + reduction_size = size; + } + op_axes_reduction_values[i+1] = idim-(oa_ndim-ndim); + } + } + op_axes_ptrs[i+1] = op_axes_values[i+1]; + } + // op_axes has one less than the broadcast dimensions + --oa_ndim; + if (oa_ndim > 0) { + op_axes = op_axes_ptrs; + } + else { + reduction_size = 1; + } + } + // A full reduction can be done without nested iteration + if (oa_ndim == 0) { + full_reduction = true; + if (operands[0] == NULL) { + npy_intp dim = 1; + operands[0] = (PyArrayObject *)PyArray_SimpleNew(0, &dim, + typecode_from_char(retsig)); + if (!operands[0]) + goto fail; + } else if (PyArray_SIZE(operands[0]) != 1) { + PyErr_Format(PyExc_ValueError, + "out argument must have size 1 for a full reduction"); + goto fail; + } + } + + dtypes[0] = PyArray_DescrFromType(typecode_from_char(retsig)); + + op_flags[0] = NPY_ITER_READWRITE| + NPY_ITER_ALLOCATE| + // Copy, because it can't buffer the reduction + NPY_ITER_UPDATEIFCOPY| + NPY_ITER_NBO| +#ifndef USE_UNALIGNED_ACCESS + NPY_ITER_ALIGNED| +#endif + (oa_ndim == 0 ? 0 : NPY_ITER_NO_BROADCAST); + } + else { + char retsig = get_return_sig(self->program); + if (retsig != 's') { + dtypes[0] = PyArray_DescrFromType(typecode_from_char(retsig)); + } else { + /* Since the *only* supported operation returning a string + * is a copy, the size of returned strings + * can be directly gotten from the first (and only) + * input/constant/temporary. */ + if (n_inputs > 0) { // input, like in 'a' where a -> 'foo' + dtypes[0] = PyArray_DESCR(operands[1]); + Py_INCREF(dtypes[0]); + } else { // constant, like in '"foo"' + dtypes[0] = PyArray_DescrNewFromType(NPY_STRING); + PyDataType_SET_ELSIZE(dtypes[0], (npy_intp)self->memsizes[1]); + } // no string temporaries, so no third case + } + if (dtypes[0] == NULL) { + goto fail; + } + op_flags[0] = NPY_ITER_WRITEONLY| + NPY_ITER_ALLOCATE| + NPY_ITER_CONTIG| + NPY_ITER_NBO| +#ifndef USE_UNALIGNED_ACCESS + NPY_ITER_ALIGNED| +#endif + NPY_ITER_NO_BROADCAST; + } + + // Check for empty arrays in expression + if (n_inputs > 0) { + char retsig = get_return_sig(self->program); + + // Check length for all inputs + int zeroi, zerolen = 0; + for (i=0; i < n_inputs; i++) { + if (PyArray_SIZE(operands[i+1]) == 0) { + zerolen = 1; + zeroi = i+1; + break; + } + } + + if (zerolen != 0) { + // Allocate the output + int ndim = PyArray_NDIM(operands[zeroi]); + npy_intp *dims = PyArray_DIMS(operands[zeroi]); + operands[0] = (PyArrayObject *)PyArray_SimpleNew(ndim, dims, + typecode_from_char(retsig)); + if (operands[0] == NULL) { + goto fail; + } + + ret = (PyObject *)operands[0]; + Py_INCREF(ret); + goto cleanup_and_exit; + } + } + + + /* A case with a single constant output */ + PyArrayObject *singleton; + bool writeback; + // NOTE: cannot assign on declaration due to `goto` statements + singleton = NULL; + writeback = false; + if (n_inputs == 0) { + char retsig = get_return_sig(self->program); + + /* Allocate the output */ + if (operands[0] == NULL) { + npy_intp dim = 1; + operands[0] = (PyArrayObject *)PyArray_SimpleNew(0, &dim, + typecode_from_char(retsig)); + if (operands[0] == NULL) { + goto fail; + } + } + else { // Use the provided output array + if (PyArray_SIZE(operands[0]) != 1) { + PyErr_SetString(PyExc_ValueError, + "output for a constant expression must have size 1"); + goto fail; + } + else if (!PyArray_ISWRITEABLE(operands[0])) { + PyErr_SetString(PyExc_ValueError, + "output is not writeable"); + goto fail; + } + Py_INCREF(dtypes[0]); + + // NumPy folks suggested using WRITEBACKIFCOPY to resolve issue #397 + singleton = (PyArrayObject *)PyArray_FromArray(operands[0], dtypes[0], + NPY_ARRAY_ALIGNED|NPY_ARRAY_WRITEBACKIFCOPY); + if (singleton == NULL) { + goto fail; + } + writeback = true; + Py_DECREF(operands[0]); + operands[0] = singleton; + } + + r = run_interpreter_const(self, PyArray_BYTES(operands[0]), &pc_error); + + if (writeback) { + // Write-back our copy to the passed in output array if we had to make a copy + // (which only happens if the input was not aligned) + int retval = PyArray_ResolveWritebackIfCopy(singleton); + if (retval < 0) { + // 1 means it copied the value, 0 means no copy, only -1 is an error. + PyErr_Format(PyExc_ValueError, "Writeback to singleton failed with error code: %d", retval); + goto fail; + } + } + ret = (PyObject *)operands[0]; + Py_INCREF(ret); + goto cleanup_and_exit; + } + + + /* Allocate the iterator or nested iterators */ + if (reduction_size < 0 || full_reduction) { + /* When there's no reduction, reduction_size is 1 as well */ + // RAM: in issue #277 this was also the case for reductions on arrays + // with axis=0 having singleton dimension, i.e. such ops were interpreted + // as full_reductions when they weren't in Numpy. As such, the default + // reduction_size is now -1 and we add the flag for full_reduction, + // e.g. ne.evaluate("sum(a)")" + iter = NpyIter_AdvancedNew(n_inputs+1, operands, + NPY_ITER_BUFFERED| + NPY_ITER_REDUCE_OK| + NPY_ITER_RANGED| + NPY_ITER_DELAY_BUFALLOC| + NPY_ITER_EXTERNAL_LOOP, + order, casting, + op_flags, dtypes, + -1, NULL, NULL, + BLOCK_SIZE1); + if (iter == NULL) { + goto fail; + } + } else { + npy_uint32 op_flags_outer[NPY_MAXDIMS]; + /* The outer loop is unbuffered */ + op_flags_outer[0] = NPY_ITER_READWRITE| + NPY_ITER_ALLOCATE| + NPY_ITER_NO_BROADCAST; + for (i = 0; i < n_inputs; ++i) { + op_flags_outer[i+1] = NPY_ITER_READONLY; + } + /* Arbitrary threshold for which is the inner loop...benchmark? */ + if (reduction_size < 64) { + reduction_outer_loop = true; + iter = NpyIter_AdvancedNew(n_inputs+1, operands, + NPY_ITER_BUFFERED| + NPY_ITER_RANGED| + NPY_ITER_DELAY_BUFALLOC| + NPY_ITER_EXTERNAL_LOOP, + order, casting, + op_flags, dtypes, + oa_ndim, op_axes, NULL, + BLOCK_SIZE1); + if (iter == NULL) { + goto fail; + } + + /* If the output was allocated, get it for the second iterator */ + if (operands[0] == NULL) { + operands[0] = NpyIter_GetOperandArray(iter)[0]; + Py_INCREF(operands[0]); + } + + op_axes[0] = &op_axes_reduction_values[0]; + for (i = 0; i < n_inputs; ++i) { + op_axes[i+1] = &op_axes_reduction_values[i+1]; + } + op_flags_outer[0] &= ~NPY_ITER_NO_BROADCAST; + reduce_iter = NpyIter_AdvancedNew(n_inputs+1, operands, + NPY_ITER_REDUCE_OK, + order, casting, + op_flags_outer, NULL, + 1, op_axes, NULL, + 0); + if (reduce_iter == NULL) { + goto fail; + } + } + else { + PyArray_Descr *dtypes_outer[NPY_MAXDIMS]; + + /* If the output is being allocated, need to specify its dtype */ + dtypes_outer[0] = dtypes[0]; + for (i = 0; i < n_inputs; ++i) { + dtypes_outer[i+1] = NULL; + } + iter = NpyIter_AdvancedNew(n_inputs+1, operands, + NPY_ITER_RANGED, + order, casting, + op_flags_outer, dtypes_outer, + oa_ndim, op_axes, NULL, + 0); + if (iter == NULL) { + goto fail; + } + + /* If the output was allocated, get it for the second iterator */ + if (operands[0] == NULL) { + operands[0] = NpyIter_GetOperandArray(iter)[0]; + Py_INCREF(operands[0]); + } + + op_axes[0] = &op_axes_reduction_values[0]; + for (i = 0; i < n_inputs; ++i) { + op_axes[i+1] = &op_axes_reduction_values[i+1]; + } + op_flags[0] &= ~NPY_ITER_NO_BROADCAST; + reduce_iter = NpyIter_AdvancedNew(n_inputs+1, operands, + NPY_ITER_BUFFERED| + NPY_ITER_REDUCE_OK| + NPY_ITER_DELAY_BUFALLOC| + NPY_ITER_EXTERNAL_LOOP, + order, casting, + op_flags, dtypes, + 1, op_axes, NULL, + BLOCK_SIZE1); + if (reduce_iter == NULL) { + goto fail; + } + } + } + + /* Initialize the output to the reduction unit */ + if (is_reduction) { + PyArrayObject *a = NpyIter_GetOperandArray(iter)[0]; + PyObject *fill; + int op = last_opcode(self->program); + if (op < OP_PROD) { + /* sum identity is 0 */ + fill = PyLong_FromLong(0); + } else if (op >= OP_PROD && op < OP_MIN) { + /* product identity is 1 */ + fill = PyLong_FromLong(1); + } else if (PyArray_DESCR(a)->kind == 'f') { + /* floating point min/max identity is NaN */ + fill = PyFloat_FromDouble(NE_NAN); + } else if (op >= OP_MIN && op < OP_MAX) { + /* integer min identity */ + fill = PyLong_FromLong(LONG_MAX); + } else { + /* integer max identity */ + fill = PyLong_FromLong(LONG_MIN); + } + PyArray_FillWithScalar(a, fill); + Py_DECREF(fill); + } + + /* Get the sizes of all the operands */ + dtypes_tmp = NpyIter_GetDescrArray(iter); + for (i = 0; i < n_inputs+1; ++i) { + self->memsizes[i] = PyDataType_ELSIZE(dtypes_tmp[i]); + } + + /* For small calculations, just use 1 thread */ + if (NpyIter_GetIterSize(iter) < 2*BLOCK_SIZE1) { + gs.force_serial = 1; + } + + /* Reductions do not support parallel execution yet */ + if (is_reduction) { + gs.force_serial = 1; + } + + r = run_interpreter(self, iter, reduce_iter, + reduction_outer_loop, need_output_buffering, + &pc_error); + + if (r < 0) { + if (r == -1) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, + "an error occurred while running the program"); + } + } else if (r == -2) { + PyErr_Format(PyExc_RuntimeError, + "bad argument at pc=%d", pc_error); + } else if (r == -3) { + PyErr_Format(PyExc_RuntimeError, + "bad opcode at pc=%d", pc_error); + } else { + PyErr_SetString(PyExc_RuntimeError, + "unknown error occurred while running the program"); + } + goto fail; + } + + /* Get the output from the iterator */ + ret = (PyObject *)NpyIter_GetOperandArray(iter)[0]; + Py_INCREF(ret); + + NpyIter_Deallocate(iter); + if (reduce_iter != NULL) { + NpyIter_Deallocate(reduce_iter); + } +cleanup_and_exit: + for (i = 0; i < n_inputs+1; i++) { + Py_XDECREF(operands[i]); + Py_XDECREF(dtypes[i]); + } + + return ret; +fail: + for (i = 0; i < n_inputs+1; i++) { + Py_XDECREF(operands[i]); + Py_XDECREF(dtypes[i]); + } + if (iter != NULL) { + NpyIter_Deallocate(iter); + } + if (reduce_iter != NULL) { + NpyIter_Deallocate(reduce_iter); + } + + return NULL; +} + +/* +Local Variables: + c-basic-offset: 4 +End: +*/ diff --git a/venv/Lib/site-packages/numexpr/interpreter.hpp b/venv/Lib/site-packages/numexpr/interpreter.hpp new file mode 100644 index 0000000..004bddd --- /dev/null +++ b/venv/Lib/site-packages/numexpr/interpreter.hpp @@ -0,0 +1,137 @@ +#ifndef NUMEXPR_INTERPRETER_HPP +#define NUMEXPR_INTERPRETER_HPP + +#include "numexpr_config.hpp" + +// Forward declaration +struct NumExprObject; + +enum OpCodes { +#define OPCODE(n, e, ...) e = n, +#include "opcodes.hpp" +#undef OPCODE +}; + +enum FuncFFCodes { +#define FUNC_FF(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_FF +}; + +enum FuncBFCodes { +#define FUNC_BF(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_BF +}; + +enum FuncFFFCodes { +#define FUNC_FFF(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_FFF +}; + +enum FuncDDCodes { +#define FUNC_DD(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_DD +}; + +enum FuncBDCodes { +#define FUNC_BD(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_BD +}; + +enum FuncBCCodes { +#define FUNC_BC(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_BC +}; + +enum FuncIICodes { +#define FUNC_II(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_II +}; + +enum FuncLLCodes { +#define FUNC_LL(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_LL +}; + +enum FuncDDDCodes { +#define FUNC_DDD(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_DDD +}; + +enum FuncCCCodes { +#define FUNC_CC(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_CC +}; + +enum FuncCCCCodes { +#define FUNC_CCC(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_CCC +}; + +struct vm_params { + int prog_len; + unsigned char *program; + int n_inputs; + int n_constants; + int n_temps; + unsigned int r_end; + char *output; + char **inputs; + char **mem; + npy_intp *memsteps; + npy_intp *memsizes; + struct index_data *index_data; + // Memory for output buffering. If output buffering is unneeded, + // it contains NULL. + char *out_buffer; +}; + +// Structure for parameters in worker threads +struct thread_data { + npy_intp start; + npy_intp vlen; + npy_intp block_size; + vm_params params; + int ret_code; + int *pc_error; + char **errmsg; + // NOTE: memsteps, iter, and reduce_iter are arrays, they MUST be allocated + // to length `global_max_threads` before module load. + // One memsteps array per thread + // npy_intp *memsteps[MAX_THREADS]; + npy_intp **memsteps; + // One iterator per thread */ + // NpyIter *iter[MAX_THREADS]; + NpyIter **iter; + // When doing nested iteration for a reduction + // NpyIter *reduce_iter[MAX_THREADS] + NpyIter **reduce_iter; + // Flag indicating reduction is the outer loop instead of the inner + bool reduction_outer_loop; + // Flag indicating whether output buffering is needed + bool need_output_buffering; +}; + +// Global state which holds thread parameters +extern thread_data th_params; + +PyObject *NumExpr_run(NumExprObject *self, PyObject *args, PyObject *kwds); + +char get_return_sig(PyObject* program); +int check_program(NumExprObject *self); +int get_temps_space(const vm_params& params, char **mem, size_t block_size); +void free_temps_space(const vm_params& params, char **mem); +int vm_engine_iter_task(NpyIter *iter, npy_intp *memsteps, + const vm_params& params, int *pc_error, char **errmsg); + +#endif // NUMEXPR_INTERPRETER_HPP diff --git a/venv/Lib/site-packages/numexpr/missing_posix_functions.hpp b/venv/Lib/site-packages/numexpr/missing_posix_functions.hpp new file mode 100644 index 0000000..bcbcb87 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/missing_posix_functions.hpp @@ -0,0 +1,102 @@ +#ifndef NUMEXPR_MISSING_POSIX_FUNCTIONS_HPP +#define NUMEXPR_MISSING_POSIX_FUNCTIONS_HPP + +/********************************************************************* + Numexpr - Fast numerical array expression evaluator for NumPy. + + License: MIT + Author: See AUTHORS.txt + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/* These functions are not included in some non-POSIX compilers, + like MSVC 7.1 */ + + +/* Double precision versions */ + +inline double log1p(double x) +{ + double u = 1.0 + x; + if (u == 1.0) { + return x; + } else { + return log(u) * x / (u-1.0); + } +} + +inline double expm1(double x) +{ + double u = exp(x); + if (u == 1.0) { + return x; + } else if (u-1.0 == -1.0) { + return -1; + } else { + return (u-1.0) * x/log(u); + } +} + +inline double asinh(double xx) +{ + double x, d; + int sign; + if (xx < 0.0) { + sign = -1; + x = -xx; + } + else { + sign = 1; + x = xx; + } + if (x > 1e8) { + d = x; + } else { + d = sqrt(x*x + 1.0); + } + return sign*log1p(x*(1.0 + x/(d+1.0))); +} + +inline double acosh(double x) +{ + return 2*log(sqrt((x+1.0)/2)+sqrt((x-1.0)/2)); +} + +inline double atanh(double x) +{ + /* This definition is different from that in NumPy 1.3 and follows + the convention of MatLab. This will allow for double checking both + approaches. */ + return 0.5*log((1.0+x)/(1.0-x)); +} + + +/* Single precision versions */ + +inline float log1pf(float x) +{ + return (float) log1p((double)x); +} + +inline float expm1f(float x) +{ + return (float) expm1((double)x); +} + +inline float asinhf(float x) +{ + return (float) asinh((double)x); +} + +inline float acoshf(float x) +{ + return (float) acosh((double)x); +} + +inline float atanhf(float x) +{ + return (float) atanh((double)x); +} + +#endif // NUMEXPR_MISSING_POSIX_FUNCTIONS_HPP diff --git a/venv/Lib/site-packages/numexpr/module.cpp b/venv/Lib/site-packages/numexpr/module.cpp new file mode 100644 index 0000000..67629bd --- /dev/null +++ b/venv/Lib/site-packages/numexpr/module.cpp @@ -0,0 +1,552 @@ +// Numexpr - Fast numerical array expression evaluator for NumPy. +// +// License: MIT +// Author: See AUTHORS.txt +// +// See LICENSE.txt for details about copyright and rights to use. +// +// module.cpp contains the CPython-specific module exposure. + +#define DO_NUMPY_IMPORT_ARRAY + +#include "module.hpp" +#include +#include + +#include + +#include "interpreter.hpp" +#include "numexpr_object.hpp" + +using namespace std; + +// Global state. The file interpreter.hpp also has some global state +// in its 'th_params' variable +global_state gs; +long global_max_threads=DEFAULT_MAX_THREADS; + +/* Do the worker job for a certain thread */ +void *th_worker(void *tidptr) +{ + int tid = *(int *)tidptr; + /* Parameters for threads */ + npy_intp start; + npy_intp vlen; + npy_intp block_size; + NpyIter *iter; + vm_params params; + int *pc_error; + int ret; + int n_inputs; + int n_constants; + int n_temps; + size_t memsize; + char **mem; + npy_intp *memsteps; + npy_intp istart, iend; + char **errmsg; + // For output buffering if needed + vector out_buffer; + + while (1) { + + /* Sentinels have to be initialised yet */ + if (tid == 0) { + gs.init_sentinels_done = 0; + } + + /* Meeting point for all threads (wait for initialization) */ + pthread_mutex_lock(&gs.count_threads_mutex); + if (gs.count_threads < gs.nthreads) { + gs.count_threads++; + /* Beware of spurious wakeups. See issue pydata/numexpr#306. */ + do { + pthread_cond_wait(&gs.count_threads_cv, + &gs.count_threads_mutex); + } while (!gs.barrier_passed); + } + else { + gs.barrier_passed = 1; + pthread_cond_broadcast(&gs.count_threads_cv); + } + pthread_mutex_unlock(&gs.count_threads_mutex); + + /* Check if thread has been asked to return */ + if (gs.end_threads) { + return(0); + } + + /* Get parameters for this thread before entering the main loop */ + start = th_params.start; + vlen = th_params.vlen; + block_size = th_params.block_size; + params = th_params.params; + pc_error = th_params.pc_error; + + // If output buffering is needed, allocate it + if (th_params.need_output_buffering) { + out_buffer.resize(params.memsizes[0] * BLOCK_SIZE1); + params.out_buffer = &out_buffer[0]; + } else { + params.out_buffer = NULL; + } + + /* Populate private data for each thread */ + n_inputs = params.n_inputs; + n_constants = params.n_constants; + n_temps = params.n_temps; + memsize = (1+n_inputs+n_constants+n_temps) * sizeof(char *); + /* XXX malloc seems thread safe for POSIX, but for Win? */ + mem = (char **)malloc(memsize); + memcpy(mem, params.mem, memsize); + + errmsg = th_params.errmsg; + + params.mem = mem; + + /* Loop over blocks */ + pthread_mutex_lock(&gs.count_mutex); + if (!gs.init_sentinels_done) { + /* Set sentinels and other global variables */ + gs.gindex = start; + istart = gs.gindex; + iend = istart + block_size; + if (iend > vlen) { + iend = vlen; + } + gs.init_sentinels_done = 1; /* sentinels have been initialised */ + gs.giveup = 0; /* no giveup initially */ + } else { + gs.gindex += block_size; + istart = gs.gindex; + iend = istart + block_size; + if (iend > vlen) { + iend = vlen; + } + } + /* Grab one of the iterators */ + iter = th_params.iter[tid]; + if (iter == NULL) { + th_params.ret_code = -1; + gs.giveup = 1; + } + memsteps = th_params.memsteps[tid]; + /* Get temporary space for each thread */ + ret = get_temps_space(params, mem, BLOCK_SIZE1); + if (ret < 0) { + /* Propagate error to main thread */ + th_params.ret_code = ret; + gs.giveup = 1; + } + pthread_mutex_unlock(&gs.count_mutex); + + while (istart < vlen && !gs.giveup) { + /* Reset the iterator to the range for this task */ + ret = NpyIter_ResetToIterIndexRange(iter, istart, iend, + errmsg); + /* Execute the task */ + if (ret >= 0) { + ret = vm_engine_iter_task(iter, memsteps, params, pc_error, errmsg); + } + + if (ret < 0) { + pthread_mutex_lock(&gs.count_mutex); + gs.giveup = 1; + /* Propagate error to main thread */ + th_params.ret_code = ret; + pthread_mutex_unlock(&gs.count_mutex); + break; + } + + pthread_mutex_lock(&gs.count_mutex); + gs.gindex += block_size; + istart = gs.gindex; + iend = istart + block_size; + if (iend > vlen) { + iend = vlen; + } + pthread_mutex_unlock(&gs.count_mutex); + } + + /* Meeting point for all threads (wait for finalization) */ + pthread_mutex_lock(&gs.count_threads_mutex); + if (gs.count_threads > 0) { + gs.count_threads--; + do { + pthread_cond_wait(&gs.count_threads_cv, + &gs.count_threads_mutex); + } while (gs.barrier_passed); + } + else { + gs.barrier_passed = 0; + pthread_cond_broadcast(&gs.count_threads_cv); + } + pthread_mutex_unlock(&gs.count_threads_mutex); + + /* Release resources */ + free_temps_space(params, mem); + free(mem); + + } /* closes while(1) */ + + /* This should never be reached, but anyway */ + return(0); +} + +/* Initialize threads */ +int init_threads(void) +{ + int tid, rc; + + if ( !(gs.nthreads > 1 && (!gs.init_threads_done || gs.pid != getpid())) ) { + /* Thread pool must always be initialized once and once only. */ + return(0); + } + + /* Initialize mutex and condition variable objects */ + pthread_mutex_init(&gs.count_mutex, NULL); + pthread_mutex_init(&gs.parallel_mutex, NULL); + + /* Barrier initialization */ + pthread_mutex_init(&gs.count_threads_mutex, NULL); + pthread_cond_init(&gs.count_threads_cv, NULL); + gs.count_threads = 0; /* Reset threads counter */ + gs.barrier_passed = 0; + + /* + * Our worker threads should not deal with signals from the rest of the + * application - mask everything temporarily in this thread, so our workers + * can inherit that mask + */ + sigset_t sigset_block_all, sigset_restore; + rc = sigfillset(&sigset_block_all); + if (rc != 0) { + fprintf(stderr, "ERROR; failed to block signals: sigfillset: %s", + strerror(rc)); + exit(-1); + } + rc = pthread_sigmask( SIG_BLOCK, &sigset_block_all, &sigset_restore); + if (rc != 0) { + fprintf(stderr, "ERROR; failed to block signals: pthread_sigmask: %s", + strerror(rc)); + exit(-1); + } + + /* Now create the threads */ + for (tid = 0; tid < gs.nthreads; tid++) { + gs.tids[tid] = tid; + rc = pthread_create(&gs.threads[tid], NULL, th_worker, + (void *)&gs.tids[tid]); + if (rc) { + fprintf(stderr, + "ERROR; return code from pthread_create() is %d\n", rc); + fprintf(stderr, "\tError detail: %s\n", strerror(rc)); + exit(-1); + } + } + + /* + * Restore the signal mask so the main thread can process signals as + * expected + */ + rc = pthread_sigmask( SIG_SETMASK, &sigset_restore, NULL); + if (rc != 0) { + fprintf(stderr, + "ERROR: failed to restore signal mask: pthread_sigmask: %s", + strerror(rc)); + exit(-1); + } + + gs.init_threads_done = 1; /* Initialization done! */ + gs.pid = (int)getpid(); /* save the PID for this process */ + + return(0); +} + +/* Set the number of threads in numexpr's VM */ +int numexpr_set_nthreads(int nthreads_new) +{ + int nthreads_old = gs.nthreads; + int t, rc; + void *status; + + // if (nthreads_new > MAX_THREADS) { + // fprintf(stderr, + // "Error. nthreads cannot be larger than MAX_THREADS (%d)", + // MAX_THREADS); + // return -1; + // } + if (nthreads_new > global_max_threads) { + fprintf(stderr, + "Error. nthreads cannot be larger than environment variable \"NUMEXPR_MAX_THREADS\" (%ld)", + global_max_threads); + return -1; + } + else if (nthreads_new <= 0) { + fprintf(stderr, "Error. nthreads must be a positive integer"); + return -1; + } + + /* Only join threads if they are not initialized or if our PID is + different from that in pid var (probably means that we are a + subprocess, and thus threads are non-existent). */ + if (gs.nthreads > 1 && gs.init_threads_done && gs.pid == getpid()) { + /* Tell all existing threads to finish */ + gs.end_threads = 1; + pthread_mutex_lock(&gs.count_threads_mutex); + if (gs.count_threads < gs.nthreads) { + gs.count_threads++; + do { + pthread_cond_wait(&gs.count_threads_cv, + &gs.count_threads_mutex); + } while (!gs.barrier_passed); + } + else { + gs.barrier_passed = 1; + pthread_cond_broadcast(&gs.count_threads_cv); + } + pthread_mutex_unlock(&gs.count_threads_mutex); + + /* Join exiting threads */ + for (t=0; t +#include +#include + +#include "numexpr_config.hpp" + +struct global_state { + /* Global variables for threads */ + int nthreads; /* number of desired threads in pool */ + int init_threads_done; /* pool of threads initialized? */ + int end_threads; /* should exisiting threads end? */ + // pthread_t threads[MAX_THREADS]; /* opaque structure for threads */ + // int tids[MAX_THREADS]; /* ID per each thread */ + /* NOTE: threads and tids are arrays, they MUST be allocated to length + `global_max_threads` before module load. */ + pthread_t *threads; /* opaque structure for threads */ + int *tids; /* ID per each thread */ + npy_intp gindex; /* global index for all threads */ + int init_sentinels_done; /* sentinels initialized? */ + int giveup; /* should parallel code giveup? */ + int force_serial; /* force serial code instead of parallel? */ + int pid; /* the PID for this process */ + + /* Synchronization variables for threadpool state */ + pthread_mutex_t count_mutex; + int count_threads; + int barrier_passed; /* indicates if the thread pool's thread barrier + is unlocked and ready for the VM to process.*/ + pthread_mutex_t count_threads_mutex; + pthread_cond_t count_threads_cv; + + /* Mutual exclusion for access to global thread params (th_params) */ + pthread_mutex_t parallel_mutex; + + global_state() { + nthreads = 1; + init_threads_done = 0; + barrier_passed = 0; + end_threads = 0; + pid = 0; + } +}; + +extern global_state gs; + +int numexpr_set_nthreads(int nthreads_new); + +#endif // NUMEXPR_MODULE_HPP diff --git a/venv/Lib/site-packages/numexpr/msvc_function_stubs.hpp b/venv/Lib/site-packages/numexpr/msvc_function_stubs.hpp new file mode 100644 index 0000000..8e4d722 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/msvc_function_stubs.hpp @@ -0,0 +1,231 @@ +#include // for _finite, _isnan on MSVC + +#ifndef NUMEXPR_MSVC_FUNCTION_STUBS_HPP +#define NUMEXPR_MSVC_FUNCTION_STUBS_HPP + +/********************************************************************* + Numexpr - Fast numerical array expression evaluator for NumPy. + + License: MIT + Author: See AUTHORS.txt + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/* Declare stub functions for MSVC. It turns out that single precision + definitions in are actually #define'd and are not usable + as function pointers :-/ */ + +/* Due to casting problems (normally return ints not bools, easiest to define +non-overloaded wrappers for these functions) */ +// MSVC version: use global ::isfinite / ::isnan +inline bool isfinitef_(float x) { return !!::_finite(x); } // MSVC has _finite +inline bool isnanf_(float x) { return !!::_isnan(x); } // MSVC has _isnan +inline bool isfinited(double x) { return !!::_finite(x); } +inline bool isnand(double x) { return !!::_isnan(x); } +inline bool isinfd(double x) { return !!::isinf(x); } +inline bool isinff_(float x) { return !!::isinf(x); } + +// To handle overloading of fmax/fmin in cmath and match NumPy behaviour for NaNs +inline double fmaxd(double x, double y) { return (isnand(x) | isnand(y))? NAN : fmax(x, y); } +inline double fmind(double x, double y) { return (isnand(x) | isnand(y))? NAN : fmin(x, y); } + + +#if _MSC_VER < 1400 // 1310 == MSVC 7.1 + /* Apparently, single precision functions are not included in MSVC 7.1 */ + + #define sqrtf(x) ((float)sqrt((double)(x))) + #define sinf(x) ((float)sin((double)(x))) + #define cosf(x) ((float)cos((double)(x))) + #define tanf(x) ((float)tan((double)(x))) + #define asinf(x) ((float)asin((double)(x))) + #define acosf(x) ((float)acos((double)(x))) + #define atanf(x) ((float)atan((double)(x))) + #define sinhf(x) ((float)sinh((double)(x))) + #define coshf(x) ((float)cosh((double)(x))) + #define tanhf(x) ((float)tanh((double)(x))) + #define asinhf(x) ((float)asinh((double)(x))) + #define acoshf(x) ((float)acosh((double)(x))) + #define atanhf(x) ((float)atanh((double)(x))) + #define logf(x) ((float)log((double)(x))) + #define log1pf(x) ((float)log1p((double)(x))) + #define log10f(x) ((float)log10((double)(x))) + #define log2f(x) ((float)log2((double)(x))) + #define expf(x) ((float)exp((double)(x))) + #define expm1f(x) ((float)expm1((double)(x))) + #define fabsf(x) ((float)fabs((double)(x))) + #define fmodf(x, y) ((float)fmod((double)(x), (double)(y))) + #define atan2f(x, y) ((float)atan2((double)(x), (double)(y))) + #define hypotf(x, y) ((float)hypot((double)(x), (double)(y))) + #define copysignf(x, y) ((float)copysign((double)(x), (double)(y))) + #define nextafterf(x, y) ((float)nextafter((double)(x), (double)(y))) + #define ceilf(x) ((float)ceil((double)(x))) + #define hypotf(x) ((float)hypot((double)(x))) + #define rintf(x) ((float)rint((double)(x))) + #define truncf(x) ((float)trunc((double)(x))) + + + /* The next are directly called from interp_body.cpp */ + #define powf(x, y) ((float)pow((double)(x), (double)(y))) + #define floorf(x) ((float)floor((double)(x))) + + #define fmaxf_(x, y) ((float)fmaxd((double)(x), (double)(y))) // define fmaxf_ since fmaxf doesn't exist for early MSVC + #define fminf_(x, y) ((float)fmind((double)(x), (double)(y))) +#else + inline float fmaxf_(float x, float y) { return (isnanf_(x) | isnanf_(y))? NAN : fmaxf(x, y); } + inline float fminf_(float x, float y) { return (isnanf_(x) | isnanf_(y))? NAN : fminf(x, y); } +#endif // _MSC_VER < 1400 + + +/* Now the actual stubs */ + +inline float sqrtf2(float x) { + return sqrtf(x); +} + +inline float sinf2(float x) { + return sinf(x); +} + +inline float cosf2(float x) { + return cosf(x); +} + +inline float tanf2(float x) { + return tanf(x); +} + +inline float asinf2(float x) { + return asinf(x); +} + +inline float acosf2(float x) { + return acosf(x); +} + +inline float atanf2(float x) { + return atanf(x); +} + +inline float sinhf2(float x) { + return sinhf(x); +} + +inline float coshf2(float x) { + return coshf(x); +} + +inline float tanhf2(float x) { + return tanhf(x); +} + +inline float asinhf2(float x) { + return asinhf(x); +} + +inline float acoshf2(float x) { + return acoshf(x); +} + +inline float atanhf2(float x) { + return atanhf(x); +} + +inline float logf2(float x) { + return logf(x); +} + +inline float log1pf2(float x) { + return log1pf(x); +} + +inline float log10f2(float x) { + return log10f(x); +} + +inline float log2f2(float x) { + return log2f(x); +} + +inline float expf2(float x) { + return expf(x); +} + +inline float expm1f2(float x) { + return expm1f(x); +} + +inline float fabsf2(float x) { + return fabsf(x); +} + +inline float fmodf2(float x, float y) { + return fmodf(x, y); +} + +inline float atan2f2(float x, float y) { + return atan2f(x, y); +} + +inline float hypotf2(float x, float y) { + return hypotf(x, y); +} + +inline float nextafterf2(float x, float y) { + return nextafterf(x, y); +} + +inline float copysignf2(float x, float y) { + return copysignf(x, y); +} + +inline float fmaxf2(float x, float y) { + return fmaxf_(x, y); +} + +inline float fminf2(float x, float y) { + return fminf_(x, y); +} + + +// Boolean output functions +inline bool isnanf2(float x) { + return isnanf_(x); +} + +inline bool isfinitef2(float x) { + return isfinitef_(x); +} + +inline bool isinff2(float x) { + return isinff_(x); +} + + +// Needed for allowing the internal casting in numexpr machinery for +// conjugate operations +inline float fconjf2(float x) { + return x; +} + +inline float ceilf2(float x) { + return ceilf(x); +} + +inline float floorf2(float x) { + return floorf(x); +} + +inline float rintf2(float x) { + return rintf(x); +} + +inline float truncf2(float x) { + return truncf(x); +} + +inline bool signbitf2(float x) { + return signbitf(x); +} + +#endif // NUMEXPR_MSVC_FUNCTION_STUBS_HPP diff --git a/venv/Lib/site-packages/numexpr/necompiler.py b/venv/Lib/site-packages/numexpr/necompiler.py new file mode 100644 index 0000000..8b80737 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/necompiler.py @@ -0,0 +1,1022 @@ +################################################################### +# Numexpr - Fast numerical array expression evaluator for NumPy. +# +# License: MIT +# Author: See AUTHORS.txt +# +# See LICENSE.txt and LICENSES/*.txt for details about copyright and +# rights to use. +#################################################################### + +import __future__ + +import os +import re +import sys +import threading +from typing import Dict, Optional + +import numpy + +is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE +from numexpr import expressions, interpreter, use_vml +from numexpr.utils import CacheDict, ContextDict + +# Declare a double type that does not exist in Python space +double = numpy.double +double = numpy.double + +int_ = numpy.int32 +long_ = numpy.int64 + +typecode_to_kind = {'b': 'bool', 'i': 'int', 'l': 'long', 'f': 'float', 'd': 'double', + 'c': 'complex', 'n': 'none', 's': 'str'} +kind_to_typecode = {'bool': 'b', 'int': 'i', 'long': 'l', 'float': 'f', 'double': 'd', + 'complex': 'c', 'bytes': 's', 'str': 's', 'none': 'n'} +type_to_typecode = {bool: 'b', int_: 'i', long_: 'l', float: 'f', + double: 'd', complex: 'c', bytes: 's', str: 's'} +type_to_kind = expressions.type_to_kind +kind_to_type = expressions.kind_to_type +default_type = kind_to_type[expressions.default_kind] +scalar_constant_kinds = list(kind_to_typecode.keys()) + +# VML functions that are implemented in numexpr +vml_functions = [ + "div", # interp_body.cpp + "inv", # interp_body.cpp + "pow", # interp_body.cpp + # Keep the rest of this list in sync with the ones listed in functions.hpp + "sqrt", + "sin", + "cos", + "tan", + "arcsin", + "arccos", + "arctan", + "sinh", + "cosh", + "tanh", + "arcsinh", + "arccosh", + "arctanh", + "log", + "log1p", + "log10", + "log2", + "exp", + "expm1", + "absolute", + "conjugate", + "arctan2", + "fmod", + "ceil", + "floor", + "isnan", + "isfinite", + "isinf", + "hypot", + "round", + "trunc", + "nextafter", + "copysign", + "signbit", + "sign", + "minimum", + "maximum", + ] + + +class ASTNode(): + """Abstract Syntax Tree node. + + Members: + + astType -- type of node (op, constant, variable, raw, or alias) + astKind -- the type of the result (bool, float, etc.) + value -- value associated with this node. + An opcode, numerical value, a variable name, etc. + children -- the children below this node + reg -- the register assigned to the result for this node. + """ + cmpnames = ['astType', 'astKind', 'value', 'children'] + + def __init__(self, astType='generic', astKind='unknown', value=None, children=()): + self.astType = astType + self.astKind = astKind + self.value = value + self.children = tuple(children) + self.reg = None + + def __eq__(self, other): + if self.astType == 'alias': + self = self.value + if other.astType == 'alias': + other = other.value + if not isinstance(other, ASTNode): + return False + for name in self.cmpnames: + if getattr(self, name) != getattr(other, name): + return False + return True + + def __lt__(self,other): + # RAM: this is a fix for issue #88 whereby sorting on constants + # that may be of astKind == 'complex' but type(self.value) == int or float + # Here we let NumPy sort as it will cast data properly for comparison + # when the Python built-ins will raise an error. + if self.astType == 'constant': + if self.astKind == other.astKind: + return numpy.array(self.value) < numpy.array(other.value) + return self.astKind < other.astKind + else: + raise TypeError('Sorting not implemented for astType: %s'%self.astType) + + def __hash__(self): + if self.astType == 'alias': + self = self.value + return hash((self.astType, self.astKind, self.value, self.children)) + + def __str__(self): + return 'AST(%s, %s, %s, %s, %s)' % (self.astType, self.astKind, + self.value, self.children, self.reg) + + def __repr__(self): + return '' % id(self) + + def key(self): + return (self.astType, self.astKind, self.value, self.children) + + def typecode(self): + return kind_to_typecode[self.astKind] + + def postorderWalk(self): + for c in self.children: + for w in c.postorderWalk(): + yield w + yield self + + def allOf(self, *astTypes): + astTypes = set(astTypes) + for w in self.postorderWalk(): + if w.astType in astTypes: + yield w + + +def expressionToAST(ex): + """Take an expression tree made out of expressions.ExpressionNode, + and convert to an AST tree. + + This is necessary as ExpressionNode overrides many methods to act + like a number. + """ + return ASTNode(ex.astType, ex.astKind, ex.value, + [expressionToAST(c) for c in ex.children]) + + +def sigPerms(s): + """Generate all possible signatures derived by upcasting the given + signature. + """ + codes = 'bilfdc' + if not s: + yield '' + elif s[0] in codes: + start = codes.index(s[0]) + for x in codes[start:]: + for y in sigPerms(s[1:]): + yield x + y + elif s[0] == 's': # numbers shall not be cast to strings + for y in sigPerms(s[1:]): + yield 's' + y + else: + yield s + + +def typeCompileAst(ast): + """Assign appropriate types to each node in the AST. + + Will convert opcodes and functions to appropriate upcast version, + and add "cast" ops if needed. + """ + children = list(ast.children) + if ast.astType == 'op': + retsig = ast.typecode() + basesig = ''.join(x.typecode() for x in list(ast.children)) + # Find some operation that will work on an acceptable casting of args. + for sig in sigPerms(basesig): + value = (ast.value + '_' + retsig + sig).encode('ascii') + if value in interpreter.opcodes: + break + else: + for sig in sigPerms(basesig): + funcname = (ast.value + '_' + retsig + sig).encode('ascii') + if funcname in interpreter.funccodes: + value = ('func_%sn' % (retsig + sig)).encode('ascii') + children += [ASTNode('raw', 'none', + interpreter.funccodes[funcname])] + break + else: + raise NotImplementedError( + "couldn't find matching opcode for '%s'" + % (ast.value + '_' + retsig + basesig)) + # First just cast constants, then cast variables if necessary: + for i, (have, want) in enumerate(zip(basesig, sig)): + if have != want: + kind = typecode_to_kind[want] + if children[i].astType == 'constant': + children[i] = ASTNode('constant', kind, children[i].value) + else: + opname = "cast" + children[i] = ASTNode('op', kind, opname, [children[i]]) + else: + value = ast.value + children = ast.children + return ASTNode(ast.astType, ast.astKind, value, + [typeCompileAst(c) for c in children]) + + +class Register(): + """Abstraction for a register in the VM. + + Members: + node -- the AST node this corresponds to + temporary -- True if this isn't an input or output + immediate -- not a register, but an immediate value + n -- the physical register number. + None if no number assigned yet. + """ + + def __init__(self, astnode, temporary=False): + self.node = astnode + self.temporary = temporary + self.immediate = False + self.n = None + + def __str__(self): + if self.temporary: + name = 'Temporary' + else: + name = 'Register' + return '%s(%s, %s, %s)' % (name, self.node.astType, + self.node.astKind, self.n,) + + def __repr__(self): + return self.__str__() + + +class Immediate(Register): + """Representation of an immediate (integer) operand, instead of + a register. + """ + + def __init__(self, astnode): + Register.__init__(self, astnode) + self.immediate = True + + def __str__(self): + return 'Immediate(%d)' % (self.node.value,) + + +_flow_pat = r'[\;\[\:]' +_dunder_pat = r'(^|[^\w])__[\w]+__($|[^\w])' +_attr_pat = r'\.\b(?!(real|imag|(\d*[eE]?[+-]?\d+)|(\d*[eE]?[+-]?\d+j)|(\d*j))\b)' +_blacklist_re = re.compile(f'{_flow_pat}|{_dunder_pat}|{_attr_pat}') + +def stringToExpression(s, types, context, sanitize: bool=True): + """Given a string, convert it to a tree of ExpressionNode's. + """ + # sanitize the string for obvious attack vectors that NumExpr cannot + # parse into its homebrew AST. This is to protect the call to `eval` below. + # We forbid `;`, `:`. `[` and `__`, and attribute access via '.'. + # We cannot ban `.real` or `.imag` however... + # We also cannot ban `.\d*j`, where `\d*` is some digits (or none), e.g. 1.5j, 1.j + if sanitize: + no_whitespace = re.sub(r'\s+', '', s) + skip_quotes = re.sub(r'(\'[^\']*\')', '', no_whitespace) + if _blacklist_re.search(skip_quotes) is not None: + raise ValueError(f'Expression {s} has forbidden control characters.') + + old_ctx = expressions._context.get_current_context() + try: + expressions._context.set_new_context(context) + # first compile to a code object to determine the names + if context.get('truediv', False): + flags = __future__.division.compiler_flag + else: + flags = 0 + c = compile(s, '', 'eval', flags) + # make VariableNode's for the names + names = {} + for name in c.co_names: + if name == "None": + names[name] = None + elif name == "True": + names[name] = True + elif name == "False": + names[name] = False + else: + t = types.get(name, default_type) + names[name] = expressions.VariableNode(name, type_to_kind[t]) + names.update(expressions.functions) + + # now build the expression + ex = eval(c, names) + + if expressions.isConstant(ex): + ex = expressions.ConstantNode(ex, expressions.getKind(ex)) + elif not isinstance(ex, expressions.ExpressionNode): + raise TypeError("unsupported expression type: %s" % type(ex)) + finally: + expressions._context.set_new_context(old_ctx) + return ex + + +def isReduction(ast): + prefixes = (b'sum_', b'prod_', b'min_', b'max_') + return any(ast.value.startswith(p) for p in prefixes) + + +def getInputOrder(ast, input_order=None): + """ + Derive the input order of the variables in an expression. + """ + variables = {} + for a in ast.allOf('variable'): + variables[a.value] = a + variable_names = set(variables.keys()) + + if input_order: + if variable_names != set(input_order): + raise ValueError( + "input names (%s) don't match those found in expression (%s)" + % (input_order, variable_names)) + + ordered_names = input_order + else: + ordered_names = list(variable_names) + ordered_names.sort() + ordered_variables = [variables[v] for v in ordered_names] + return ordered_variables + + +def convertConstantToKind(x, kind): + # Exception for 'float' types that will return the NumPy float32 type + if kind == 'float': + return numpy.float32(x) + elif isinstance(x,str): + return x.encode('ascii') + return kind_to_type[kind](x) + + +def getConstants(ast): + """ + RAM: implemented magic method __lt__ for ASTNode to fix issues + #88 and #209. The following test code works now, as does the test suite. + + import numexpr as ne + a = 1 + 3j; b = 5.0 + ne.evaluate('a*2 + 15j - b') + """ + constant_registers = set([node.reg for node in ast.allOf("constant")]) + constants_order = sorted([r.node for r in constant_registers]) + constants = [convertConstantToKind(a.value, a.astKind) + for a in constants_order] + return constants_order, constants + + +def sortNodesByOrder(nodes, order): + order_map = {} + for i, (_, v, _) in enumerate(order): + order_map[v] = i + dec_nodes = [(order_map[n.value], n) for n in nodes] + dec_nodes.sort() + return [a[1] for a in dec_nodes] + + +def assignLeafRegisters(inodes, registerMaker): + """ + Assign new registers to each of the leaf nodes. + """ + leafRegisters = {} + for node in inodes: + key = node.key() + if key in leafRegisters: + node.reg = leafRegisters[key] + else: + node.reg = leafRegisters[key] = registerMaker(node) + + +def assignBranchRegisters(inodes, registerMaker): + """ + Assign temporary registers to each of the branch nodes. + """ + for node in inodes: + node.reg = registerMaker(node, temporary=True) + + +def collapseDuplicateSubtrees(ast): + """ + Common subexpression elimination. + """ + seen = {} + aliases = [] + for a in ast.allOf('op'): + if a in seen: + target = seen[a] + a.astType = 'alias' + a.value = target + a.children = () + aliases.append(a) + else: + seen[a] = a + # Set values and registers so optimizeTemporariesAllocation + # doesn't get confused + for a in aliases: + while a.value.astType == 'alias': + a.value = a.value.value + return aliases + + +def optimizeTemporariesAllocation(ast): + """ + Attempt to minimize the number of temporaries needed, by reusing old ones. + """ + nodes = [n for n in ast.postorderWalk() if n.reg.temporary] + users_of = dict((n.reg, set()) for n in nodes) + + node_regs = dict((n, set(c.reg for c in n.children if c.reg.temporary)) + for n in nodes) + if nodes and nodes[-1] is not ast: + nodes_to_check = nodes + [ast] + else: + nodes_to_check = nodes + for n in nodes_to_check: + for c in n.children: + if c.reg.temporary: + users_of[c.reg].add(n) + + unused = dict([(tc, set()) for tc in scalar_constant_kinds]) + for n in nodes: + for c in n.children: + reg = c.reg + if reg.temporary: + users = users_of[reg] + users.discard(n) + if not users: + unused[reg.node.astKind].add(reg) + if unused[n.astKind]: + reg = unused[n.astKind].pop() + users_of[reg] = users_of[n.reg] + n.reg = reg + + +def setOrderedRegisterNumbers(order, start): + """ + Given an order of nodes, assign register numbers. + """ + for i, node in enumerate(order): + node.reg.n = start + i + return start + len(order) + + +def setRegisterNumbersForTemporaries(ast, start): + """ + Assign register numbers for temporary registers, keeping track of + aliases and handling immediate operands. + """ + seen = 0 + signature = '' + aliases = [] + for node in ast.postorderWalk(): + if node.astType == 'alias': + aliases.append(node) + node = node.value + if node.reg.immediate: + node.reg.n = node.value + continue + reg = node.reg + if reg.n is None: + reg.n = start + seen + seen += 1 + signature += reg.node.typecode() + for node in aliases: + node.reg = node.value.reg + return start + seen, signature + + +def convertASTtoThreeAddrForm(ast): + """ + Convert an AST to a three address form. + + Three address form is (op, reg1, reg2, reg3), where reg1 is the + destination of the result of the instruction. + + I suppose this should be called three register form, but three + address form is found in compiler theory. + """ + return [(node.value, node.reg) + tuple([c.reg for c in node.children]) + for node in ast.allOf('op')] + + +def compileThreeAddrForm(program): + """ + Given a three address form of the program, compile it a string that + the VM understands. + """ + + def nToChr(reg): + if reg is None: + return b'\xff' + elif reg.n < 0: + raise ValueError("negative value for register number %s" % reg.n) + else: + return bytes([reg.n]) + + def quadrupleToString(opcode, store, a1=None, a2=None): + cop = chr(interpreter.opcodes[opcode]).encode('latin_1') + cs = nToChr(store) + ca1 = nToChr(a1) + ca2 = nToChr(a2) + return cop + cs + ca1 + ca2 + + def toString(args): + while len(args) < 4: + args += (None,) + opcode, store, a1, a2 = args[:4] + s = quadrupleToString(opcode, store, a1, a2) + l = [s] + args = args[4:] + while args: + s = quadrupleToString(b'noop', *args[:3]) + l.append(s) + args = args[3:] + return b''.join(l) + + prog_str = b''.join([toString(t) for t in program]) + return prog_str + + +context_info = [ + ('optimization', ('none', 'moderate', 'aggressive'), 'aggressive'), + ('truediv', (False, True, 'auto'), 'auto') +] + + +def getContext(kwargs, _frame_depth=1): + d = kwargs.copy() + context = {} + for name, allowed, default in context_info: + value = d.pop(name, default) + if value in allowed: + context[name] = value + else: + raise ValueError("'%s' must be one of %s" % (name, allowed)) + + if d: + raise ValueError("Unknown keyword argument '%s'" % d.popitem()[0]) + if context['truediv'] == 'auto': + caller_globals = sys._getframe(_frame_depth + 1).f_globals + context['truediv'] = caller_globals.get('division', None) == __future__.division + + return context + + +def precompile(ex, signature=(), context={}, sanitize: bool=True): + """ + Compile the expression to an intermediate form. + """ + types = dict(signature) + input_order = [name for (name, type_) in signature] + + if isinstance(ex, str): + ex = stringToExpression(ex, types, context, sanitize) + + # the AST is like the expression, but the node objects don't have + # any odd interpretations + + ast = expressionToAST(ex) + + if ex.astType != 'op': + ast = ASTNode('op', value='copy', astKind=ex.astKind, children=(ast,)) + + ast = typeCompileAst(ast) + + aliases = collapseDuplicateSubtrees(ast) + + assignLeafRegisters(ast.allOf('raw'), Immediate) + assignLeafRegisters(ast.allOf('variable', 'constant'), Register) + assignBranchRegisters(ast.allOf('op'), Register) + + # assign registers for aliases + for a in aliases: + a.reg = a.value.reg + + input_order = getInputOrder(ast, input_order) + constants_order, constants = getConstants(ast) + + if isReduction(ast): + ast.reg.temporary = False + + optimizeTemporariesAllocation(ast) + + ast.reg.temporary = False + r_output = 0 + ast.reg.n = 0 + + r_inputs = r_output + 1 + r_constants = setOrderedRegisterNumbers(input_order, r_inputs) + r_temps = setOrderedRegisterNumbers(constants_order, r_constants) + r_end, tempsig = setRegisterNumbersForTemporaries(ast, r_temps) + + threeAddrProgram = convertASTtoThreeAddrForm(ast) + input_names = tuple([a.value for a in input_order]) + signature = ''.join(type_to_typecode[types.get(x, default_type)] + for x in input_names) + return threeAddrProgram, signature, tempsig, constants, input_names + + +def NumExpr(ex, signature=(), sanitize: bool=True, **kwargs): + """ + Compile an expression built using E. variables to a function. + + ex can also be specified as a string "2*a+3*b". + + The order of the input variables and their types can be specified using the + signature parameter, which is a list of (name, type) pairs. + + Returns a `NumExpr` object containing the compiled function. + """ + + # In that case _frame_depth is wrong (it should be 2) but it doesn't matter + # since it will not be used (because truediv='auto' has already been + # translated to either True or False). + _frame_depth = 1 + context = getContext(kwargs, _frame_depth=_frame_depth) + threeAddrProgram, inputsig, tempsig, constants, input_names = precompile(ex, signature, context, sanitize=sanitize) + program = compileThreeAddrForm(threeAddrProgram) + return interpreter.NumExpr(inputsig.encode('ascii'), + tempsig.encode('ascii'), + program, constants, input_names) + + +def disassemble(nex): + """ + Given a NumExpr object, return a list which is the program disassembled. + """ + rev_opcodes = {} + for op in interpreter.opcodes: + rev_opcodes[interpreter.opcodes[op]] = op + r_constants = 1 + len(nex.signature) + r_temps = r_constants + len(nex.constants) + + def parseOp(op): + name, sig = [*op.rsplit(b'_', 1), ''][:2] + return name, sig + + def getArg(pc, offset): + arg = nex.program[pc + (offset if offset < 4 else offset+1)] + _, sig = parseOp(rev_opcodes.get(nex.program[pc])) + try: + code = sig[offset - 1] + except IndexError: + return None + + code = bytes([code]) + + if arg == 255: + return None + if code != b'n': + if arg == 0: + return b'r0' + elif arg < r_constants: + return ('r%d[%s]' % (arg, nex.input_names[arg - 1])).encode('ascii') + elif arg < r_temps: + return ('c%d[%s]' % (arg, nex.constants[arg - r_constants])).encode('ascii') + else: + return ('t%d' % (arg,)).encode('ascii') + else: + return arg + + source = [] + for pc in range(0, len(nex.program), 4): + op = rev_opcodes.get(nex.program[pc]) + _, sig = parseOp(op) + parsed = [op] + for i in range(len(sig)): + parsed.append(getArg(pc, 1 + i)) + while len(parsed) < 4: + parsed.append(None) + source.append(parsed) + return source + + +def getType(a): + kind = a.dtype.kind + if kind == 'b': + return bool + if kind in 'iu': + if a.dtype.itemsize > 4: + return long_ # ``long`` is for integers of more than 32 bits + if kind == 'u' and a.dtype.itemsize == 4: + return long_ # use ``long`` here as an ``int`` is not enough + return int_ + if kind == 'f': + if a.dtype.itemsize > 4: + return double # ``double`` is for floats of more than 32 bits + return float + if kind == 'c': + return complex + if kind == 'S': + return bytes + if kind == 'U': + raise ValueError('NumExpr 2 does not support Unicode as a dtype.') + raise ValueError("unknown type %s" % a.dtype.name) + + +def getExprNames(text, context, sanitize: bool=True): + ex = stringToExpression(text, {}, context, sanitize) + ast = expressionToAST(ex) + input_order = getInputOrder(ast, None) + #try to figure out if vml operations are used by expression + if not use_vml: + ex_uses_vml = False + else: + for node in ast.postorderWalk(): + if node.astType == 'op' and node.value in vml_functions: + ex_uses_vml = True + break + else: + ex_uses_vml = False + + return [a.value for a in input_order], ex_uses_vml + + +def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2): + """ + Get the arguments based on the names. + """ + call_frame = sys._getframe(_frame_depth) + + clear_local_dict = False + if local_dict is None: + local_dict = call_frame.f_locals + clear_local_dict = True + try: + frame_globals = call_frame.f_globals + if global_dict is None: + global_dict = frame_globals + + # If `call_frame` is the top frame of the interpreter we can't clear its + # `local_dict`, because it is actually the `global_dict`. + clear_local_dict = clear_local_dict and not frame_globals is local_dict + + arguments = [] + for name in names: + try: + a = local_dict[name] + except KeyError: + a = global_dict[name] + arguments.append(numpy.asarray(a)) + finally: + # If we generated local_dict via an explicit reference to f_locals, + # clear the dict to prevent creating extra ref counts in the caller's scope + # See https://github.com/pydata/numexpr/issues/310 + if clear_local_dict and hasattr(local_dict, 'clear'): + local_dict.clear() + + return arguments + + +# Dictionaries for caching variable names and compiled expressions +_names_cache = threading.local() +_numexpr_cache = threading.local() +_numexpr_last = threading.local() +evaluate_lock = threading.Lock() + + +def validate(ex: str, + local_dict: Optional[Dict] = None, + global_dict: Optional[Dict] = None, + out: numpy.ndarray = None, + order: str = 'K', + casting: str = 'safe', + _frame_depth: int = 2, + sanitize: Optional[bool] = None, + **kwargs) -> Optional[Exception]: + r""" + Validate a NumExpr expression with the given `local_dict` or `locals()`. + Returns `None` on success and the Exception object if one occurs. Note that + you can proceed directly to call `re_evaluate()` if you use `validate()` + to sanitize your expressions and variables in advance. + + Parameters + ---------- + ex: str + a string forming an expression, like "2*a+3*b". The values for "a" + and "b" will by default be taken from the calling function's frame + (through use of sys._getframe()). Alternatively, they can be specified + using the 'local_dict' or 'global_dict' arguments. + + local_dict: dictionary, optional + A dictionary that replaces the local operands in current frame. + + global_dict: dictionary, optional + A dictionary that replaces the global operands in current frame. + + out: NumPy array, optional + An existing array where the outcome is going to be stored. Care is + required so that this array has the same shape and type than the + actual outcome of the computation. Useful for avoiding unnecessary + new array allocations. + + order: {'C', 'F', 'A', or 'K'}, optional + Controls the iteration order for operands. 'C' means C order, 'F' + means Fortran order, 'A' means 'F' order if all the arrays are + Fortran contiguous, 'C' order otherwise, and 'K' means as close to + the order the array elements appear in memory as possible. For + efficient computations, typically 'K'eep order (the default) is + desired. + + casting: {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur when making a copy or + buffering. Setting this to 'unsafe' is not recommended, as it can + adversely affect accumulations. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + + sanitize: Optional[bool] + Both `validate` and by extension `evaluate` call `eval(ex)`, which is + potentially dangerous on unsanitized inputs. As such, NumExpr by default + performs simple sanitization, banning the character ':;[', the + dunder '__[\w+]__', and attribute access to all but '.real' and '.imag'. + + Using `None` defaults to `True` unless the environment variable + `NUMEXPR_SANITIZE=0` is set, in which case the default is `False`. + Nominally this can be set via `os.environ` before `import numexpr`. + + _frame_depth: int + The calling frame depth. Unless you are a NumExpr developer you should + not set this value. + + Note + ---- + + """ + if not hasattr(_numexpr_last, 'l'): + _numexpr_last.l = ContextDict() + + if not hasattr(_names_cache, 'c'): + _names_cache.c = CacheDict(256) + + if not hasattr(_numexpr_cache, 'c'): + _numexpr_cache.c = CacheDict(256) + + try: + + if not isinstance(ex, str): + raise ValueError("must specify expression as a string") + + if sanitize is None: + if 'NUMEXPR_SANITIZE' in os.environ: + sanitize = bool(int(os.environ['NUMEXPR_SANITIZE'])) + else: + sanitize = True + + # Get the names for this expression + context = getContext(kwargs) + expr_key = (ex, tuple(sorted(context.items()))) + if expr_key not in _names_cache.c: + _names_cache.c[expr_key] = getExprNames(ex, context, sanitize=sanitize) + names, ex_uses_vml = _names_cache.c[expr_key] + arguments = getArguments(names, local_dict, global_dict, _frame_depth=_frame_depth) + + # Create a signature + signature = [(name, getType(arg)) for (name, arg) in + zip(names, arguments)] + + # Look up numexpr if possible. + numexpr_key = expr_key + (tuple(signature),) + try: + compiled_ex = _numexpr_cache.c[numexpr_key] + except KeyError: + compiled_ex = _numexpr_cache.c[numexpr_key] = NumExpr(ex, signature, sanitize=sanitize, **context) + kwargs = {'out': out, 'order': order, 'casting': casting, + 'ex_uses_vml': ex_uses_vml} + _numexpr_last.l.set(ex=compiled_ex, argnames=names, kwargs=kwargs) + except Exception as e: + return e + return None + +def evaluate(ex: str, + local_dict: Optional[Dict] = None, + global_dict: Optional[Dict] = None, + out: numpy.ndarray = None, + order: str = 'K', + casting: str = 'same_kind', + sanitize: Optional[bool] = None, + _frame_depth: int = 3, + **kwargs) -> numpy.ndarray: + r""" + Evaluate a simple array expression element-wise using the virtual machine. + + Parameters + ---------- + ex: str + a string forming an expression, like "2*a+3*b". The values for "a" + and "b" will by default be taken from the calling function's frame + (through use of sys._getframe()). Alternatively, they can be specified + using the 'local_dict' or 'global_dict' arguments. + + local_dict: dictionary, optional + A dictionary that replaces the local operands in current frame. + + global_dict: dictionary, optional + A dictionary that replaces the global operands in current frame. + + out: NumPy array, optional + An existing array where the outcome is going to be stored. Care is + required so that this array has the same shape and type than the + actual outcome of the computation. Useful for avoiding unnecessary + new array allocations. + + order: {'C', 'F', 'A', or 'K'}, optional + Controls the iteration order for operands. 'C' means C order, 'F' + means Fortran order, 'A' means 'F' order if all the arrays are + Fortran contiguous, 'C' order otherwise, and 'K' means as close to + the order the array elements appear in memory as possible. For + efficient computations, typically 'K'eep order (the default) is + desired. + + casting: {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur when making a copy or + buffering. Setting this to 'unsafe' is not recommended, as it can + adversely affect accumulations. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + + sanitize: bool + `validate` (and by extension `evaluate`) call `eval(ex)`, which is + potentially dangerous on non-sanitized inputs. As such, NumExpr by default + performs simple sanitization, banning the characters ':;[', the + dunder '__[\w+]__', and attribute access to all but '.real' and '.imag'. + + Using `None` defaults to `True` unless the environment variable + `NUMEXPR_SANITIZE=0` is set, in which case the default is `False`. + Nominally this can be set via `os.environ` before `import numexpr`. + + _frame_depth: int + The calling frame depth. Unless you are a NumExpr developer you should + not set this value. + + """ + # We could avoid code duplication if we called validate and then re_evaluate + # here, but we have difficulties with the `sys.getframe(2)` call in + # `getArguments` + e = validate(ex, local_dict=local_dict, global_dict=global_dict, + out=out, order=order, casting=casting, + _frame_depth=_frame_depth, sanitize=sanitize, **kwargs) + if e is None: + return re_evaluate(local_dict=local_dict, global_dict=global_dict, _frame_depth=_frame_depth) + else: + raise e + +def re_evaluate(local_dict: Optional[Dict] = None, + global_dict: Optional[Dict] = None, + _frame_depth: int=2) -> numpy.ndarray: + """ + Re-evaluate the previous executed array expression without any check. + + This is meant for accelerating loops that are re-evaluating the same + expression repeatedly without changing anything else than the operands. + If unsure, use evaluate() which is safer. + + Parameters + ---------- + local_dict: dictionary, optional + A dictionary that replaces the local operands in current frame. + _frame_depth: int + The calling frame depth. Unless you are a NumExpr developer you should + not set this value. + """ + if not hasattr(_numexpr_last, 'l'): + _numexpr_last.l = ContextDict() + + try: + compiled_ex = _numexpr_last.l['ex'] + except KeyError: + raise RuntimeError("A previous evaluate() execution was not found, please call `validate` or `evaluate` once before `re_evaluate`") + argnames = _numexpr_last.l['argnames'] + args = getArguments(argnames, local_dict, global_dict, _frame_depth=_frame_depth) + kwargs = _numexpr_last.l['kwargs'] + # with evaluate_lock: + return compiled_ex(*args, **kwargs) diff --git a/venv/Lib/site-packages/numexpr/numexpr_config.hpp b/venv/Lib/site-packages/numexpr/numexpr_config.hpp new file mode 100644 index 0000000..99d5231 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/numexpr_config.hpp @@ -0,0 +1,71 @@ +#ifndef NUMEXPR_CONFIG_HPP +#define NUMEXPR_CONFIG_HPP + +// x86 platform works with unaligned reads and writes +// MW: I have seen exceptions to this when the compiler chooses to use aligned SSE +#if (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64)) +# define USE_UNALIGNED_ACCESS 1 +#endif + +// #ifdef SCIPY_MKL_H +// #define USE_VML +// #endif + +#ifdef USE_VML +/* The values below have been tuned for a Skylake processor (E3-1245 v5 @ 3.50GHz) */ +#define BLOCK_SIZE1 1024 +#else +/* The values below have been tuned for a Skylake processor (E3-1245 v5 @ 3.50GHz) */ +#define BLOCK_SIZE1 1024 +#endif + +// The default threadpool size. It's prefer that the user set this via an +// environment variable, "NUMEXPR_MAX_THREADS" +#define DEFAULT_MAX_THREADS 64 + +// Remove dependence on NPY_MAXARGS, which would be a runtime constant instead of compiletime +// constant. If numpy raises NPY_MAXARGS, we should notice and raise this as well +#define NE_MAXARGS 64 + +#if defined(_WIN32) + #include "win32/pthread.h" + #include + #define getpid _getpid +#else + #include + #include "unistd.h" +#endif + +#ifdef USE_VML +#include "mkl_vml.h" +#include "mkl_service.h" +#endif +#include +//no single precision version of signbit in C++ standard +inline bool signbitf(float x) { return signbit((double)x); } + +#ifdef _WIN32 + #ifndef __MINGW32__ + #include "missing_posix_functions.hpp" + #endif + #include "msvc_function_stubs.hpp" +#else +/* GCC/Clang version: use std:: (can't use it for windows) + msvc_function_stubs contains windows alternatives */ +/* Due to casting problems (normally return ints not bools, easiest to define + non-overloaded wrappers for these functions) */ +inline bool isfinitef_(float x) { return !!std::isfinite(x); } +inline bool isnanf_(float x) { return !!std::isnan(x); } +inline bool isfinited(double x) { return !!std::isfinite(x); } +inline bool isnand(double x) { return !!std::isnan(x); } +inline bool isinff_(float x) { return !!std::isinf(x); } +inline bool isinfd(double x) { return !!std::isinf(x); } + +// To handle overloading of fmax/fmin in cmath and match NumPy behaviour for NaNs +inline double fmaxd(double x, double y) { return (isnand(x) | isnand(y))? NAN : fmax(x, y); } +inline double fmind(double x, double y) { return (isnand(x) | isnand(y))? NAN : fmin(x, y); } +inline float fmaxf_(float x, float y) { return (isnanf_(x) | isnanf_(y))? NAN : fmaxf(x, y); } +inline float fminf_(float x, float y) { return (isnanf_(x) | isnanf_(y))? NAN : fminf(x, y); } +#endif + +#endif // NUMEXPR_CONFIG_HPP diff --git a/venv/Lib/site-packages/numexpr/numexpr_object.cpp b/venv/Lib/site-packages/numexpr/numexpr_object.cpp new file mode 100644 index 0000000..b20aef0 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/numexpr_object.cpp @@ -0,0 +1,407 @@ +/********************************************************************* + Numexpr - Fast numerical array expression evaluator for NumPy. + + License: MIT + Author: See AUTHORS.txt + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#include "module.hpp" +#include + +#include "numexpr_config.hpp" +#include "interpreter.hpp" +#include "numexpr_object.hpp" + +static int +size_from_char(char c) +{ + switch (c) { + case 'b': return sizeof(char); + case 'i': return sizeof(int); + case 'l': return sizeof(long long); + case 'f': return sizeof(float); + case 'd': return sizeof(double); + case 'c': return 2*sizeof(double); + case 's': return 0; /* strings are ok but size must be computed */ + default: + PyErr_SetString(PyExc_TypeError, "signature value not in 'bilfdcs'"); + return -1; + } +} + +static void +NumExpr_dealloc(NumExprObject *self) +{ + Py_XDECREF(self->signature); + Py_XDECREF(self->tempsig); + Py_XDECREF(self->constsig); + Py_XDECREF(self->fullsig); + Py_XDECREF(self->program); + Py_XDECREF(self->constants); + Py_XDECREF(self->input_names); + PyMem_Del(self->mem); + PyMem_Del(self->rawmem); + PyMem_Del(self->memsteps); + PyMem_Del(self->memsizes); + Py_TYPE(self)->tp_free((PyObject*)self); +} + +static PyObject * +NumExpr_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + NumExprObject *self = (NumExprObject *)type->tp_alloc(type, 0); + if (self != NULL) { +#define INIT_WITH(name, object) \ + self->name = object; \ + if (!self->name) { \ + Py_DECREF(self); \ + return NULL; \ + } + + INIT_WITH(signature, PyBytes_FromString("")); + INIT_WITH(tempsig, PyBytes_FromString("")); + INIT_WITH(constsig, PyBytes_FromString("")); + INIT_WITH(fullsig, PyBytes_FromString("")); + INIT_WITH(program, PyBytes_FromString("")); + INIT_WITH(constants, PyTuple_New(0)); + Py_INCREF(Py_None); + self->input_names = Py_None; + self->mem = NULL; + self->rawmem = NULL; + self->memsteps = NULL; + self->memsizes = NULL; + self->rawmemsize = 0; + self->n_inputs = 0; + self->n_constants = 0; + self->n_temps = 0; +#undef INIT_WITH + } + return (PyObject *)self; +} + +#define CHARP(s) ((char *)(s)) + +static int +NumExpr_init(NumExprObject *self, PyObject *args, PyObject *kwds) +{ + int i, j, mem_offset; + int n_inputs, n_constants, n_temps; + PyObject *signature = NULL, *tempsig = NULL, *constsig = NULL; + PyObject *fullsig = NULL, *program = NULL, *constants = NULL; + PyObject *input_names = NULL, *o_constants = NULL; + int *itemsizes = NULL; + char **mem = NULL, *rawmem = NULL; + npy_intp *memsteps; + npy_intp *memsizes; + int rawmemsize; + static char *kwlist[] = {CHARP("signature"), CHARP("tempsig"), + CHARP("program"), CHARP("constants"), + CHARP("input_names"), NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "SSS|OO", kwlist, + &signature, + &tempsig, + &program, &o_constants, + &input_names)) { + return -1; + } + + n_inputs = (int)PyBytes_Size(signature); + n_temps = (int)PyBytes_Size(tempsig); + + if (o_constants) { + if (!PySequence_Check(o_constants) ) { + PyErr_SetString(PyExc_TypeError, "constants must be a sequence"); + return -1; + } + n_constants = (int)PySequence_Length(o_constants); + if (!(constants = PyTuple_New(n_constants))) + return -1; + if (!(constsig = PyBytes_FromStringAndSize(NULL, n_constants))) { + Py_DECREF(constants); + return -1; + } + if (!(itemsizes = PyMem_New(int, n_constants))) { + Py_DECREF(constants); + return -1; + } + for (i = 0; i < n_constants; i++) { + PyObject *o; + if (!(o = PySequence_GetItem(o_constants, i))) { /* new reference */ + Py_DECREF(constants); + Py_DECREF(constsig); + PyMem_Del(itemsizes); + return -1; + } + PyTuple_SET_ITEM(constants, i, o); /* steals reference */ + if (PyBool_Check(o)) { + PyBytes_AS_STRING(constsig)[i] = 'b'; + itemsizes[i] = size_from_char('b'); + continue; + } + + if (PyArray_IsScalar(o, Int32)) { + PyBytes_AS_STRING(constsig)[i] = 'i'; + itemsizes[i] = size_from_char('i'); + continue; + } + + if (PyArray_IsScalar(o, Int64)) { + PyBytes_AS_STRING(constsig)[i] = 'l'; + itemsizes[i] = size_from_char('l'); + continue; + } + /* The Float32 scalars are the only ones that should reach here */ + if (PyArray_IsScalar(o, Float32)) { + PyBytes_AS_STRING(constsig)[i] = 'f'; + itemsizes[i] = size_from_char('f'); + continue; + } + if (PyFloat_Check(o)) { + /* Python float constants are double precision by default */ + PyBytes_AS_STRING(constsig)[i] = 'd'; + itemsizes[i] = size_from_char('d'); + continue; + } + if (PyComplex_Check(o)) { + PyBytes_AS_STRING(constsig)[i] = 'c'; + itemsizes[i] = size_from_char('c'); + continue; + } + if (PyBytes_Check(o)) { + PyBytes_AS_STRING(constsig)[i] = 's'; + itemsizes[i] = (int)PyBytes_GET_SIZE(o); + continue; + } + PyErr_SetString(PyExc_TypeError, "constants must be of type bool/int/long/float/double/complex/bytes"); + Py_DECREF(constsig); + Py_DECREF(constants); + PyMem_Del(itemsizes); + return -1; + } + } else { + n_constants = 0; + if (!(constants = PyTuple_New(0))) + return -1; + if (!(constsig = PyBytes_FromString(""))) { + Py_DECREF(constants); + return -1; + } + } + + fullsig = PyBytes_FromFormat("%c%s%s%s", get_return_sig(program), + PyBytes_AS_STRING(signature), PyBytes_AS_STRING(constsig), + PyBytes_AS_STRING(tempsig)); + if (!fullsig) { + Py_DECREF(constants); + Py_DECREF(constsig); + PyMem_Del(itemsizes); + return -1; + } + + if (!input_names) { + input_names = Py_None; + } + + /* Compute the size of registers. We leave temps out (will be + malloc'ed later on). */ + rawmemsize = 0; + for (i = 0; i < n_constants; i++) + rawmemsize += itemsizes[i]; + rawmemsize *= BLOCK_SIZE1; + + mem = PyMem_New(char *, 1 + n_inputs + n_constants + n_temps); + rawmem = PyMem_New(char, rawmemsize); + memsteps = PyMem_New(npy_intp, 1 + n_inputs + n_constants + n_temps); + memsizes = PyMem_New(npy_intp, 1 + n_inputs + n_constants + n_temps); + if (!mem || !rawmem || !memsteps || !memsizes) { + Py_DECREF(constants); + Py_DECREF(constsig); + Py_DECREF(fullsig); + PyMem_Del(itemsizes); + PyMem_Del(mem); + PyMem_Del(rawmem); + PyMem_Del(memsteps); + PyMem_Del(memsizes); + return -1; + } + /* + 0 -> output + [1, n_inputs+1) -> inputs + [n_inputs+1, n_inputs+n_consts+1) -> constants + [n_inputs+n_consts+1, n_inputs+n_consts+n_temps+1) -> temps + */ + /* Fill in 'mem' and 'rawmem' for constants */ + mem_offset = 0; + for (i = 0; i < n_constants; i++) { + char c = PyBytes_AS_STRING(constsig)[i]; + int size = itemsizes[i]; + mem[i+n_inputs+1] = rawmem + mem_offset; + mem_offset += BLOCK_SIZE1 * size; + memsteps[i+n_inputs+1] = memsizes[i+n_inputs+1] = size; + /* fill in the constants */ + if (c == 'b') { + char *bmem = (char*)mem[i+n_inputs+1]; + char value = (char)PyLong_AsLong(PyTuple_GET_ITEM(constants, i)); + for (j = 0; j < BLOCK_SIZE1; j++) { + bmem[j] = value; + } + } else if (c == 'i') { + int *imem = (int*)mem[i+n_inputs+1]; + int value = (int)PyLong_AsLong(PyTuple_GET_ITEM(constants, i)); + for (j = 0; j < BLOCK_SIZE1; j++) { + imem[j] = value; + } + } else if (c == 'l') { + long long *lmem = (long long*)mem[i+n_inputs+1]; + long long value = PyLong_AsLongLong(PyTuple_GET_ITEM(constants, i)); + for (j = 0; j < BLOCK_SIZE1; j++) { + lmem[j] = value; + } + } else if (c == 'f') { + /* In this particular case the constant is in a NumPy scalar + and in a regular Python object */ + float *fmem = (float*)mem[i+n_inputs+1]; + float value = PyArrayScalar_VAL(PyTuple_GET_ITEM(constants, i), + Float); + for (j = 0; j < BLOCK_SIZE1; j++) { + fmem[j] = value; + } + } else if (c == 'd') { + double *dmem = (double*)mem[i+n_inputs+1]; + double value = PyFloat_AS_DOUBLE(PyTuple_GET_ITEM(constants, i)); + for (j = 0; j < BLOCK_SIZE1; j++) { + dmem[j] = value; + } + } else if (c == 'c') { + double *cmem = (double*)mem[i+n_inputs+1]; + Py_complex value = PyComplex_AsCComplex(PyTuple_GET_ITEM(constants, i)); + for (j = 0; j < 2*BLOCK_SIZE1; j+=2) { + cmem[j] = value.real; + cmem[j+1] = value.imag; + } + } else if (c == 's') { + char *smem = (char*)mem[i+n_inputs+1]; + char *value = PyBytes_AS_STRING(PyTuple_GET_ITEM(constants, i)); + for (j = 0; j < size*BLOCK_SIZE1; j+=size) { + memcpy(smem + j, value, size); + } + } + } + /* This is no longer needed since no unusual item sizes appear + in temporaries (there are no string temporaries). */ + PyMem_Del(itemsizes); + + /* Fill in 'memsteps' and 'memsizes' for temps */ + for (i = 0; i < n_temps; i++) { + char c = PyBytes_AS_STRING(tempsig)[i]; + int size = size_from_char(c); + memsteps[i+n_inputs+n_constants+1] = size; + memsizes[i+n_inputs+n_constants+1] = size; + } + /* See if any errors occured (e.g., in size_from_char) or if mem_offset is wrong */ + if (PyErr_Occurred() || mem_offset != rawmemsize) { + if (mem_offset != rawmemsize) { + PyErr_Format(PyExc_RuntimeError, "mem_offset does not match rawmemsize"); + } + Py_DECREF(constants); + Py_DECREF(constsig); + Py_DECREF(fullsig); + PyMem_Del(mem); + PyMem_Del(rawmem); + PyMem_Del(memsteps); + PyMem_Del(memsizes); + return -1; + } + + + #define REPLACE_OBJ(arg) \ + {PyObject *tmp = self->arg; \ + self->arg = arg; \ + Py_XDECREF(tmp);} + #define INCREF_REPLACE_OBJ(arg) {Py_INCREF(arg); REPLACE_OBJ(arg);} + #define REPLACE_MEM(arg) {PyMem_Del(self->arg); self->arg=arg;} + + INCREF_REPLACE_OBJ(signature); + INCREF_REPLACE_OBJ(tempsig); + REPLACE_OBJ(constsig); + REPLACE_OBJ(fullsig); + INCREF_REPLACE_OBJ(program); + REPLACE_OBJ(constants); + INCREF_REPLACE_OBJ(input_names); + REPLACE_MEM(mem); + REPLACE_MEM(rawmem); + REPLACE_MEM(memsteps); + REPLACE_MEM(memsizes); + self->rawmemsize = rawmemsize; + self->n_inputs = n_inputs; + self->n_constants = n_constants; + self->n_temps = n_temps; + + #undef REPLACE_OBJ + #undef INCREF_REPLACE_OBJ + #undef REPLACE_MEM + + return check_program(self); +} + +static PyMethodDef NumExpr_methods[] = { + {"run", (PyCFunction) NumExpr_run, METH_VARARGS|METH_KEYWORDS, NULL}, + {NULL, NULL} +}; + +static PyMemberDef NumExpr_members[] = { + {CHARP("signature"), T_OBJECT_EX, offsetof(NumExprObject, signature), READONLY, NULL}, + {CHARP("constsig"), T_OBJECT_EX, offsetof(NumExprObject, constsig), READONLY, NULL}, + {CHARP("tempsig"), T_OBJECT_EX, offsetof(NumExprObject, tempsig), READONLY, NULL}, + {CHARP("fullsig"), T_OBJECT_EX, offsetof(NumExprObject, fullsig), READONLY, NULL}, + + {CHARP("program"), T_OBJECT_EX, offsetof(NumExprObject, program), READONLY, NULL}, + {CHARP("constants"), T_OBJECT_EX, offsetof(NumExprObject, constants), + READONLY, NULL}, + {CHARP("input_names"), T_OBJECT, offsetof(NumExprObject, input_names), 0, NULL}, + {NULL}, +}; + +PyTypeObject NumExprType = { + PyVarObject_HEAD_INIT(NULL, 0) + "numexpr.NumExpr", /*tp_name*/ + sizeof(NumExprObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)NumExpr_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + (ternaryfunc)NumExpr_run, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "NumExpr objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + NumExpr_methods, /* tp_methods */ + NumExpr_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)NumExpr_init, /* tp_init */ + 0, /* tp_alloc */ + NumExpr_new, /* tp_new */ +}; diff --git a/venv/Lib/site-packages/numexpr/numexpr_object.hpp b/venv/Lib/site-packages/numexpr/numexpr_object.hpp new file mode 100644 index 0000000..55eda1a --- /dev/null +++ b/venv/Lib/site-packages/numexpr/numexpr_object.hpp @@ -0,0 +1,34 @@ +#ifndef NUMEXPR_OBJECT_HPP +#define NUMEXPR_OBJECT_HPP +/********************************************************************* + Numexpr - Fast numerical array expression evaluator for NumPy. + + License: MIT + Author: See AUTHORS.txt + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +struct NumExprObject +{ + PyObject_HEAD + PyObject *signature; /* a python string */ + PyObject *tempsig; + PyObject *constsig; + PyObject *fullsig; + PyObject *program; /* a python string */ + PyObject *constants; /* a tuple of int/float/complex */ + PyObject *input_names; /* tuple of strings */ + char **mem; /* pointers to registers */ + char *rawmem; /* a chunks of raw memory for storing registers */ + npy_intp *memsteps; + npy_intp *memsizes; + int rawmemsize; + int n_inputs; + int n_constants; + int n_temps; +}; + +extern PyTypeObject NumExprType; + +#endif // NUMEXPR_OBJECT_HPP diff --git a/venv/Lib/site-packages/numexpr/opcodes.hpp b/venv/Lib/site-packages/numexpr/opcodes.hpp new file mode 100644 index 0000000..5b1c46f --- /dev/null +++ b/venv/Lib/site-packages/numexpr/opcodes.hpp @@ -0,0 +1,214 @@ +/********************************************************************* + Numexpr - Fast numerical array expression evaluator for NumPy. + + License: MIT + Author: See AUTHORS.txt + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/* +OPCODE(n, enum_name, exported, return_type, arg1_type, arg2_type, arg3_type) + +`exported` is NULL if the opcode shouldn't exported by the Python module. + +Types are Tb, Ti, Tl, Tf, Td, Tc, Ts, Tn, and T0; these symbols should be +#defined to whatever is needed. (T0 is the no-such-arg type.) + +When adding new OPCODES, one has to respect the order of the numeration, as +there are parts of the code (iterations) which assume that the OPCODES are ordered. + +*/ +OPCODE(0, OP_NOOP, "noop", T0, T0, T0, T0) + +OPCODE(1, OP_COPY_BB, "copy_bb", Tb, Tb, T0, T0) + +OPCODE(2, OP_INVERT_BB, "invert_bb", Tb, Tb, T0, T0) +OPCODE(3, OP_AND_BBB, "and_bbb", Tb, Tb, Tb, T0) +OPCODE(4, OP_OR_BBB, "or_bbb", Tb, Tb, Tb, T0) +OPCODE(5, OP_XOR_BBB, "xor_bbb", Tb, Tb, Tb, T0) + +OPCODE(6, OP_EQ_BBB, "eq_bbb", Tb, Tb, Tb, T0) +OPCODE(7, OP_NE_BBB, "ne_bbb", Tb, Tb, Tb, T0) + +OPCODE(8, OP_GT_BII, "gt_bii", Tb, Ti, Ti, T0) +OPCODE(9, OP_GE_BII, "ge_bii", Tb, Ti, Ti, T0) +OPCODE(10, OP_EQ_BII, "eq_bii", Tb, Ti, Ti, T0) +OPCODE(11, OP_NE_BII, "ne_bii", Tb, Ti, Ti, T0) + +OPCODE(12, OP_GT_BLL, "gt_bll", Tb, Tl, Tl, T0) +OPCODE(13, OP_GE_BLL, "ge_bll", Tb, Tl, Tl, T0) +OPCODE(14, OP_EQ_BLL, "eq_bll", Tb, Tl, Tl, T0) +OPCODE(15, OP_NE_BLL, "ne_bll", Tb, Tl, Tl, T0) + +OPCODE(16, OP_GT_BFF, "gt_bff", Tb, Tf, Tf, T0) +OPCODE(17, OP_GE_BFF, "ge_bff", Tb, Tf, Tf, T0) +OPCODE(18, OP_EQ_BFF, "eq_bff", Tb, Tf, Tf, T0) +OPCODE(19, OP_NE_BFF, "ne_bff", Tb, Tf, Tf, T0) + +OPCODE(20, OP_GT_BDD, "gt_bdd", Tb, Td, Td, T0) +OPCODE(21, OP_GE_BDD, "ge_bdd", Tb, Td, Td, T0) +OPCODE(22, OP_EQ_BDD, "eq_bdd", Tb, Td, Td, T0) +OPCODE(23, OP_NE_BDD, "ne_bdd", Tb, Td, Td, T0) + +OPCODE(24, OP_GT_BSS, "gt_bss", Tb, Ts, Ts, T0) +OPCODE(25, OP_GE_BSS, "ge_bss", Tb, Ts, Ts, T0) +OPCODE(26, OP_EQ_BSS, "eq_bss", Tb, Ts, Ts, T0) +OPCODE(27, OP_NE_BSS, "ne_bss", Tb, Ts, Ts, T0) + +OPCODE(28, OP_CAST_IB, "cast_ib", Ti, Tb, T0, T0) +OPCODE(29, OP_COPY_II, "copy_ii", Ti, Ti, T0, T0) +OPCODE(30, OP_ONES_LIKE_II, "ones_like_ii", Ti, T0, T0, T0) +OPCODE(31, OP_NEG_II, "neg_ii", Ti, Ti, T0, T0) +OPCODE(32, OP_ADD_III, "add_iii", Ti, Ti, Ti, T0) +OPCODE(33, OP_SUB_III, "sub_iii", Ti, Ti, Ti, T0) +OPCODE(34, OP_MUL_III, "mul_iii", Ti, Ti, Ti, T0) +OPCODE(35, OP_DIV_III, "div_iii", Ti, Ti, Ti, T0) +OPCODE(36, OP_POW_III, "pow_iii", Ti, Ti, Ti, T0) +OPCODE(37, OP_MOD_III, "mod_iii", Ti, Ti, Ti, T0) +OPCODE(38, OP_FLOORDIV_III, "floordiv_iii", Ti, Ti, Ti, T0) + + +OPCODE(39, OP_LSHIFT_III, "lshift_iii", Ti, Ti, Ti, T0) +OPCODE(40, OP_RSHIFT_III, "rshift_iii", Ti, Ti, Ti, T0) + +OPCODE(41, OP_WHERE_IBII, "where_ibii", Ti, Tb, Ti, Ti) +// Bitwise ops +OPCODE(42, OP_INVERT_II, "invert_ii", Ti, Ti, T0, T0) +OPCODE(43, OP_AND_III, "and_iii", Ti, Ti, Ti, T0) +OPCODE(44, OP_OR_III, "or_iii", Ti, Ti, Ti, T0) +OPCODE(45, OP_XOR_III, "xor_iii", Ti, Ti, Ti, T0) + +OPCODE(46, OP_CAST_LI, "cast_li", Tl, Ti, T0, T0) +OPCODE(47, OP_COPY_LL, "copy_ll", Tl, Tl, T0, T0) +OPCODE(48, OP_ONES_LIKE_LL, "ones_like_ll", Tl, T0, T0, T0) +OPCODE(49, OP_NEG_LL, "neg_ll", Tl, Tl, T0, T0) +OPCODE(50, OP_ADD_LLL, "add_lll", Tl, Tl, Tl, T0) +OPCODE(51, OP_SUB_LLL, "sub_lll", Tl, Tl, Tl, T0) +OPCODE(52, OP_MUL_LLL, "mul_lll", Tl, Tl, Tl, T0) +OPCODE(53, OP_DIV_LLL, "div_lll", Tl, Tl, Tl, T0) +OPCODE(54, OP_POW_LLL, "pow_lll", Tl, Tl, Tl, T0) +OPCODE(55, OP_MOD_LLL, "mod_lll", Tl, Tl, Tl, T0) +OPCODE(56, OP_FLOORDIV_LLL, "floordiv_lll", Tl, Tl, Tl, T0) + +OPCODE(57, OP_LSHIFT_LLL, "lshift_lll", Tl, Tl, Tl, T0) +OPCODE(58, OP_RSHIFT_LLL, "rshift_lll", Tl, Tl, Tl, T0) + +OPCODE(59, OP_WHERE_LBLL, "where_lbll", Tl, Tb, Tl, Tl) +// Bitwise ops +OPCODE(60, OP_INVERT_LL, "invert_ll", Tl, Tl, T0, T0) +OPCODE(61, OP_AND_LLL, "and_lll", Tl, Tl, Tl, T0) +OPCODE(62, OP_OR_LLL, "or_lll", Tl, Tl, Tl, T0) +OPCODE(63, OP_XOR_LLL, "xor_lll", Tl, Tl, Tl, T0) + +OPCODE(64, OP_CAST_FI, "cast_fi", Tf, Ti, T0, T0) +OPCODE(65, OP_CAST_FL, "cast_fl", Tf, Tl, T0, T0) +OPCODE(66, OP_COPY_FF, "copy_ff", Tf, Tf, T0, T0) +OPCODE(67, OP_ONES_LIKE_FF, "ones_like_ff", Tf, T0, T0, T0) +OPCODE(68, OP_NEG_FF, "neg_ff", Tf, Tf, T0, T0) +OPCODE(69, OP_ADD_FFF, "add_fff", Tf, Tf, Tf, T0) +OPCODE(70, OP_SUB_FFF, "sub_fff", Tf, Tf, Tf, T0) +OPCODE(71, OP_MUL_FFF, "mul_fff", Tf, Tf, Tf, T0) +OPCODE(72, OP_DIV_FFF, "div_fff", Tf, Tf, Tf, T0) +OPCODE(73, OP_POW_FFF, "pow_fff", Tf, Tf, Tf, T0) +OPCODE(74, OP_MOD_FFF, "mod_fff", Tf, Tf, Tf, T0) +OPCODE(75, OP_FLOORDIV_FFF, "floordiv_fff", Tf, Tf, Tf, T0) +OPCODE(76, OP_SQRT_FF, "sqrt_ff", Tf, Tf, T0, T0) +OPCODE(77, OP_WHERE_FBFF, "where_fbff", Tf, Tb, Tf, Tf) + +OPCODE(78, OP_FUNC_FFN, "func_ffn", Tf, Tf, Tn, T0) +OPCODE(79, OP_FUNC_FFFN, "func_fffn", Tf, Tf, Tf, Tn) + +OPCODE(80, OP_CAST_DI, "cast_di", Td, Ti, T0, T0) +OPCODE(81, OP_CAST_DL, "cast_dl", Td, Tl, T0, T0) +OPCODE(82, OP_CAST_DF, "cast_df", Td, Tf, T0, T0) +OPCODE(83, OP_COPY_DD, "copy_dd", Td, Td, T0, T0) +OPCODE(84, OP_ONES_LIKE_DD, "ones_like_dd", Td, T0, T0, T0) +OPCODE(85, OP_NEG_DD, "neg_dd", Td, Td, T0, T0) +OPCODE(86, OP_ADD_DDD, "add_ddd", Td, Td, Td, T0) +OPCODE(87, OP_SUB_DDD, "sub_ddd", Td, Td, Td, T0) +OPCODE(88, OP_MUL_DDD, "mul_ddd", Td, Td, Td, T0) +OPCODE(89, OP_DIV_DDD, "div_ddd", Td, Td, Td, T0) +OPCODE(90, OP_POW_DDD, "pow_ddd", Td, Td, Td, T0) +OPCODE(91, OP_MOD_DDD, "mod_ddd", Td, Td, Td, T0) +OPCODE(92, OP_FLOORDIV_DDD, "floordiv_ddd", Td, Td, Td, T0) + +OPCODE(93, OP_SQRT_DD, "sqrt_dd", Td, Td, T0, T0) +OPCODE(94, OP_WHERE_DBDD, "where_dbdd", Td, Tb, Td, Td) +OPCODE(95, OP_FUNC_DDN, "func_ddn", Td, Td, Tn, T0) +OPCODE(96, OP_FUNC_DDDN, "func_dddn", Td, Td, Td, Tn) + +OPCODE(97, OP_EQ_BCC, "eq_bcc", Tb, Tc, Tc, T0) +OPCODE(98, OP_NE_BCC, "ne_bcc", Tb, Tc, Tc, T0) + +OPCODE(99, OP_CAST_CI, "cast_ci", Tc, Ti, T0, T0) +OPCODE(100, OP_CAST_CL, "cast_cl", Tc, Tl, T0, T0) +OPCODE(101, OP_CAST_CF, "cast_cf", Tc, Tf, T0, T0) +OPCODE(102, OP_CAST_CD, "cast_cd", Tc, Td, T0, T0) +OPCODE(103, OP_ONES_LIKE_CC, "ones_like_cc", Tc, T0, T0, T0) +OPCODE(104, OP_COPY_CC, "copy_cc", Tc, Tc, T0, T0) +OPCODE(105, OP_NEG_CC, "neg_cc", Tc, Tc, T0, T0) +OPCODE(106, OP_ADD_CCC, "add_ccc", Tc, Tc, Tc, T0) +OPCODE(107, OP_SUB_CCC, "sub_ccc", Tc, Tc, Tc, T0) +OPCODE(108, OP_MUL_CCC, "mul_ccc", Tc, Tc, Tc, T0) +OPCODE(109, OP_DIV_CCC, "div_ccc", Tc, Tc, Tc, T0) +OPCODE(110, OP_WHERE_CBCC, "where_cbcc", Tc, Tb, Tc, Tc) +OPCODE(111, OP_FUNC_CCN, "func_ccn", Tc, Tc, Tn, T0) +OPCODE(112, OP_FUNC_CCCN, "func_cccn", Tc, Tc, Tc, Tn) + +OPCODE(113, OP_REAL_DC, "real_dc", Td, Tc, T0, T0) +OPCODE(114, OP_IMAG_DC, "imag_dc", Td, Tc, T0, T0) +OPCODE(115, OP_COMPLEX_CDD, "complex_cdd", Tc, Td, Td, T0) + +OPCODE(116, OP_COPY_SS, "copy_ss", Ts, Ts, T0, T0) + +OPCODE(117, OP_WHERE_BBBB, "where_bbbb", Tb, Tb, Tb, Tb) + +OPCODE(118, OP_CONTAINS_BSS, "contains_bss", Tb, Ts, Ts, T0) +//Boolean outputs +OPCODE(119, OP_FUNC_BDN, "func_bdn", Tb, Td, Tn, T0) +OPCODE(120, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) +OPCODE(121, OP_FUNC_BCN, "func_bcn", Tb, Tc, Tn, T0) +//Integer funcs +OPCODE(122, OP_FUNC_IIN, "func_iin", Ti, Ti, Tn, T0) +OPCODE(123, OP_FUNC_LLN, "func_lln", Tl, Tl, Tn, T0) + +// Reductions always have to be at the end - parts of the code +// use > OP_REDUCTION to decide whether operation is a reduction +OPCODE(124, OP_REDUCTION, NULL, T0, T0, T0, T0) + +/* Last argument in a reduction is the axis of the array the + reduction should be applied along. */ + +OPCODE(125, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) +OPCODE(126, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) +OPCODE(127, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) +OPCODE(128, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) +OPCODE(129, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) + +OPCODE(130, OP_PROD, NULL, T0, T0, T0, T0) +OPCODE(131, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) +OPCODE(132, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) +OPCODE(133, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) +OPCODE(134, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) +OPCODE(135, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) + +OPCODE(136, OP_MIN, NULL, T0, T0, T0, T0) +OPCODE(137, OP_MIN_IIN, "min_iin", Ti, Ti, Tn, T0) +OPCODE(138, OP_MIN_LLN, "min_lln", Tl, Tl, Tn, T0) +OPCODE(139, OP_MIN_FFN, "min_ffn", Tf, Tf, Tn, T0) +OPCODE(140, OP_MIN_DDN, "min_ddn", Td, Td, Tn, T0) + +OPCODE(141, OP_MAX, NULL, T0, T0, T0, T0) +OPCODE(142, OP_MAX_IIN, "max_iin", Ti, Ti, Tn, T0) +OPCODE(143, OP_MAX_LLN, "max_lln", Tl, Tl, Tn, T0) +OPCODE(144, OP_MAX_FFN, "max_ffn", Tf, Tf, Tn, T0) +OPCODE(145, OP_MAX_DDN, "max_ddn", Td, Td, Tn, T0) + +/* +When we get to 255, will maybe have to change code again +(change latin_1 encoding in necompiler.py, use something +other than unsigned char for OPCODE table) +*/ +/* Should be the last opcode */ +OPCODE(146, OP_END, NULL, T0, T0, T0, T0) diff --git a/venv/Lib/site-packages/numexpr/str-two-way.hpp b/venv/Lib/site-packages/numexpr/str-two-way.hpp new file mode 100644 index 0000000..3aceea6 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/str-two-way.hpp @@ -0,0 +1,435 @@ +/* Byte-wise substring search, using the Two-Way algorithm. + * Copyright (C) 2008, 2010 Eric Blake + * Permission to use, copy, modify, and distribute this software + * is freely granted, provided that this notice is preserved. + */ + + +/* Before including this file, you need to include , and define: + RETURN_TYPE A macro that expands to the return type. + AVAILABLE(h, h_l, j, n_l) A macro that returns nonzero if there are + at least N_L bytes left starting at + H[J]. H is 'unsigned char *', H_L, J, + and N_L are 'size_t'; H_L is an + lvalue. For NUL-terminated searches, + H_L can be modified each iteration to + avoid having to compute the end of H + up front. + + For case-insensitivity, you may optionally define: + CMP_FUNC(p1, p2, l) A macro that returns 0 iff the first L + characters of P1 and P2 are equal. + CANON_ELEMENT(c) A macro that canonicalizes an element + right after it has been fetched from + one of the two strings. The argument + is an 'unsigned char'; the result must + be an 'unsigned char' as well. + + This file undefines the macros documented above, and defines + LONG_NEEDLE_THRESHOLD. +*/ + +#include + +/* + Python 2.7 (the only Python 2.x version supported as of now and until 2020) + is built on windows with Visual Studio 2008 C compiler. That dictates that + the compiler which must be used by authors of third party Python modules. + See https://mail.python.org/pipermail/distutils-sig/2014-September/024885.html + + Unfortunately this version of Visual Studio doesn't claim to be C99 compatible + and in particular it lacks the stdint.h header. So we have to replace it with + a public domain version. + + Visual Studio 2010 and later have stdint.h. +*/ + +#ifdef _MSC_VER + #if _MSC_VER <= 1500 + #include "win32/stdint.h" + #endif +#else + #include +#endif + +/* We use the Two-Way string matching algorithm, which guarantees + linear complexity with constant space. Additionally, for long + needles, we also use a bad character shift table similar to the + Boyer-Moore algorithm to achieve improved (potentially sub-linear) + performance. + + See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 + and http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm +*/ + +/* Point at which computing a bad-byte shift table is likely to be + worthwhile. Small needles should not compute a table, since it + adds (1 << CHAR_BIT) + NEEDLE_LEN computations of preparation for a + speedup no greater than a factor of NEEDLE_LEN. The larger the + needle, the better the potential performance gain. On the other + hand, on non-POSIX systems with CHAR_BIT larger than eight, the + memory required for the table is prohibitive. */ +#if CHAR_BIT < 10 +# define LONG_NEEDLE_THRESHOLD 32U +#else +# define LONG_NEEDLE_THRESHOLD SIZE_MAX +#endif + +#define MAX(a, b) ((a < b) ? (b) : (a)) + +#ifndef CANON_ELEMENT +# define CANON_ELEMENT(c) c +#endif +#ifndef CMP_FUNC +# define CMP_FUNC memcmp +#endif + +/* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN. + Return the index of the first byte in the right half, and set + *PERIOD to the global period of the right half. + + The global period of a string is the smallest index (possibly its + length) at which all remaining bytes in the string are repetitions + of the prefix (the last repetition may be a subset of the prefix). + + When NEEDLE is factored into two halves, a local period is the + length of the smallest word that shares a suffix with the left half + and shares a prefix with the right half. All factorizations of a + non-empty NEEDLE have a local period of at least 1 and no greater + than NEEDLE_LEN. + + A critical factorization has the property that the local period + equals the global period. All strings have at least one critical + factorization with the left half smaller than the global period. + + Given an ordered alphabet, a critical factorization can be computed + in linear time, with 2 * NEEDLE_LEN comparisons, by computing the + larger of two ordered maximal suffixes. The ordered maximal + suffixes are determined by lexicographic comparison of + periodicity. */ +static size_t +critical_factorization (const unsigned char *needle, size_t needle_len, + size_t *period) +{ + /* Index of last byte of left half, or SIZE_MAX. */ + size_t max_suffix, max_suffix_rev; + size_t j; /* Index into NEEDLE for current candidate suffix. */ + size_t k; /* Offset into current period. */ + size_t p; /* Intermediate period. */ + unsigned char a, b; /* Current comparison bytes. */ + + /* Invariants: + 0 <= j < NEEDLE_LEN - 1 + -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed) + min(max_suffix, max_suffix_rev) < global period of NEEDLE + 1 <= p <= global period of NEEDLE + p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j] + 1 <= k <= p + */ + + /* Perform lexicographic search. */ + max_suffix = SIZE_MAX; + j = 0; + k = p = 1; + while (j + k < needle_len) + { + a = CANON_ELEMENT (needle[j + k]); + b = CANON_ELEMENT (needle[(size_t)(max_suffix + k)]); + if (a < b) + { + /* Suffix is smaller, period is entire prefix so far. */ + j += k; + k = 1; + p = j - max_suffix; + } + else if (a == b) + { + /* Advance through repetition of the current period. */ + if (k != p) + ++k; + else + { + j += p; + k = 1; + } + } + else /* b < a */ + { + /* Suffix is larger, start over from current location. */ + max_suffix = j++; + k = p = 1; + } + } + *period = p; + + /* Perform reverse lexicographic search. */ + max_suffix_rev = SIZE_MAX; + j = 0; + k = p = 1; + while (j + k < needle_len) + { + a = CANON_ELEMENT (needle[j + k]); + b = CANON_ELEMENT (needle[max_suffix_rev + k]); + if (b < a) + { + /* Suffix is smaller, period is entire prefix so far. */ + j += k; + k = 1; + p = j - max_suffix_rev; + } + else if (a == b) + { + /* Advance through repetition of the current period. */ + if (k != p) + ++k; + else + { + j += p; + k = 1; + } + } + else /* a < b */ + { + /* Suffix is larger, start over from current location. */ + max_suffix_rev = j++; + k = p = 1; + } + } + + /* Choose the longer suffix. Return the first byte of the right + half, rather than the last byte of the left half. */ + if (max_suffix_rev + 1 < max_suffix + 1) + return max_suffix + 1; + *period = p; + return max_suffix_rev + 1; +} + +/* Return the first location of non-empty NEEDLE within HAYSTACK, or + NULL. HAYSTACK_LEN is the minimum known length of HAYSTACK. This + method is optimized for NEEDLE_LEN < LONG_NEEDLE_THRESHOLD. + Performance is guaranteed to be linear, with an initialization cost + of 2 * NEEDLE_LEN comparisons. + + If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at + most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. + If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * + HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. */ +static RETURN_TYPE +two_way_short_needle (const unsigned char *haystack, size_t haystack_len, + const unsigned char *needle, size_t needle_len) +{ + size_t i; /* Index into current byte of NEEDLE. */ + size_t j; /* Index into current window of HAYSTACK. */ + size_t period; /* The period of the right half of needle. */ + size_t suffix; /* The index of the right half of needle. */ + + /* Factor the needle into two halves, such that the left half is + smaller than the global period, and the right half is + periodic (with a period as large as NEEDLE_LEN - suffix). */ + suffix = critical_factorization (needle, needle_len, &period); + + /* Perform the search. Each iteration compares the right half + first. */ + if (CMP_FUNC (needle, needle + period, suffix) == 0) + { + /* Entire needle is periodic; a mismatch can only advance by the + period, so use memory to avoid rescanning known occurrences + of the period. */ + size_t memory = 0; + j = 0; + while (AVAILABLE (haystack, haystack_len, j, needle_len)) + { + /* Scan for matches in right half. */ + i = MAX (suffix, memory); + while (i < needle_len && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + ++i; + if (needle_len <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (memory < i + 1 && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + --i; + if (i + 1 < memory + 1) + return (RETURN_TYPE) (haystack + j); + /* No match, so remember how many repetitions of period + on the right half were scanned. */ + j += period; + memory = needle_len - period; + } + else + { + j += i - suffix + 1; + memory = 0; + } + } + } + else + { + /* The two halves of needle are distinct; no extra memory is + required, and any mismatch results in a maximal shift. */ + period = MAX (suffix, needle_len - suffix) + 1; + j = 0; + while (AVAILABLE (haystack, haystack_len, j, needle_len)) + { + /* Scan for matches in right half. */ + i = suffix; + while (i < needle_len && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + ++i; + if (needle_len <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (i != SIZE_MAX && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + --i; + if (i == SIZE_MAX) + return (RETURN_TYPE) (haystack + j); + j += period; + } + else + j += i - suffix + 1; + } + } + return NULL; +} + +/* Return the first location of non-empty NEEDLE within HAYSTACK, or + NULL. HAYSTACK_LEN is the minimum known length of HAYSTACK. This + method is optimized for LONG_NEEDLE_THRESHOLD <= NEEDLE_LEN. + Performance is guaranteed to be linear, with an initialization cost + of 3 * NEEDLE_LEN + (1 << CHAR_BIT) operations. + + If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at + most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, + and sublinear performance O(HAYSTACK_LEN / NEEDLE_LEN) is possible. + If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * + HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and + sublinear performance is not possible. */ +static RETURN_TYPE +two_way_long_needle (const unsigned char *haystack, size_t haystack_len, + const unsigned char *needle, size_t needle_len) +{ + size_t i; /* Index into current byte of NEEDLE. */ + size_t j; /* Index into current window of HAYSTACK. */ + size_t period; /* The period of the right half of needle. */ + size_t suffix; /* The index of the right half of needle. */ + size_t shift_table[1U << CHAR_BIT]; /* See below. */ + + /* Factor the needle into two halves, such that the left half is + smaller than the global period, and the right half is + periodic (with a period as large as NEEDLE_LEN - suffix). */ + suffix = critical_factorization (needle, needle_len, &period); + + /* Populate shift_table. For each possible byte value c, + shift_table[c] is the distance from the last occurrence of c to + the end of NEEDLE, or NEEDLE_LEN if c is absent from the NEEDLE. + shift_table[NEEDLE[NEEDLE_LEN - 1]] contains the only 0. */ + for (i = 0; i < 1U << CHAR_BIT; i++) + shift_table[i] = needle_len; + for (i = 0; i < needle_len; i++) + shift_table[CANON_ELEMENT (needle[i])] = needle_len - i - 1; + + /* Perform the search. Each iteration compares the right half + first. */ + if (CMP_FUNC (needle, needle + period, suffix) == 0) + { + /* Entire needle is periodic; a mismatch can only advance by the + period, so use memory to avoid rescanning known occurrences + of the period. */ + size_t memory = 0; + size_t shift; + j = 0; + while (AVAILABLE (haystack, haystack_len, j, needle_len)) + { + /* Check the last byte first; if it does not match, then + shift to the next possible match location. */ + shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])]; + if (0 < shift) + { + if (memory && shift < period) + { + /* Since needle is periodic, but the last period has + a byte out of place, there can be no match until + after the mismatch. */ + shift = needle_len - period; + } + memory = 0; + j += shift; + continue; + } + /* Scan for matches in right half. The last byte has + already been matched, by virtue of the shift table. */ + i = MAX (suffix, memory); + while (i < needle_len - 1 && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + ++i; + if (needle_len - 1 <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (memory < i + 1 && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + --i; + if (i + 1 < memory + 1) + return (RETURN_TYPE) (haystack + j); + /* No match, so remember how many repetitions of period + on the right half were scanned. */ + j += period; + memory = needle_len - period; + } + else + { + j += i - suffix + 1; + memory = 0; + } + } + } + else + { + /* The two halves of needle are distinct; no extra memory is + required, and any mismatch results in a maximal shift. */ + size_t shift; + period = MAX (suffix, needle_len - suffix) + 1; + j = 0; + while (AVAILABLE (haystack, haystack_len, j, needle_len)) + { + /* Check the last byte first; if it does not match, then + shift to the next possible match location. */ + shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])]; + if (0 < shift) + { + j += shift; + continue; + } + /* Scan for matches in right half. The last byte has + already been matched, by virtue of the shift table. */ + i = suffix; + while (i < needle_len - 1 && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + ++i; + if (needle_len - 1 <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (i != SIZE_MAX && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + --i; + if (i == SIZE_MAX) + return (RETURN_TYPE) (haystack + j); + j += period; + } + else + j += i - suffix + 1; + } + } + return NULL; +} + +#undef AVAILABLE +#undef CANON_ELEMENT +#undef CMP_FUNC +#undef MAX +#undef RETURN_TYPE diff --git a/venv/Lib/site-packages/numexpr/tests/__init__.py b/venv/Lib/site-packages/numexpr/tests/__init__.py new file mode 100644 index 0000000..f47c8cc --- /dev/null +++ b/venv/Lib/site-packages/numexpr/tests/__init__.py @@ -0,0 +1,14 @@ +################################################################### +# Numexpr - Fast numerical array expression evaluator for NumPy. +# +# License: MIT +# Author: See AUTHORS.txt +# +# See LICENSE.txt and LICENSES/*.txt for details about copyright and +# rights to use. +#################################################################### + +from numexpr.tests.test_numexpr import print_versions, test + +if __name__ == '__main__': + test() diff --git a/venv/Lib/site-packages/numexpr/tests/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/numexpr/tests/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..8b9e018 Binary files /dev/null and b/venv/Lib/site-packages/numexpr/tests/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/numexpr/tests/__pycache__/conftest.cpython-311.pyc b/venv/Lib/site-packages/numexpr/tests/__pycache__/conftest.cpython-311.pyc new file mode 100644 index 0000000..64ce5ee Binary files /dev/null and b/venv/Lib/site-packages/numexpr/tests/__pycache__/conftest.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/numexpr/tests/__pycache__/test_numexpr.cpython-311.pyc b/venv/Lib/site-packages/numexpr/tests/__pycache__/test_numexpr.cpython-311.pyc new file mode 100644 index 0000000..ca4213a Binary files /dev/null and b/venv/Lib/site-packages/numexpr/tests/__pycache__/test_numexpr.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/numexpr/tests/conftest.py b/venv/Lib/site-packages/numexpr/tests/conftest.py new file mode 100644 index 0000000..3d32260 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/tests/conftest.py @@ -0,0 +1,17 @@ +################################################################### +# Numexpr - Fast numerical array expression evaluator for NumPy. +# +# License: MIT +# Author: See AUTHORS.txt +# +# See LICENSE.txt and LICENSES/*.txt for details about copyright and +# rights to use. +#################################################################### + +import pytest + + +def pytest_configure(config): + config.addinivalue_line( + "markers", "thread_unsafe: mark test as unsafe for parallel execution" + ) diff --git a/venv/Lib/site-packages/numexpr/tests/test_numexpr.py b/venv/Lib/site-packages/numexpr/tests/test_numexpr.py new file mode 100644 index 0000000..9e98ff1 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/tests/test_numexpr.py @@ -0,0 +1,1553 @@ + +################################################################### +# Numexpr - Fast numerical array expression evaluator for NumPy. +# +# License: MIT +# Author: See AUTHORS.txt +# +# See LICENSE.txt and LICENSES/*.txt for details about copyright and +# rights to use. +#################################################################### + + +import os +import platform +import subprocess +import sys +import unittest +import warnings +from contextlib import contextmanager +from unittest.mock import MagicMock + +import numpy +import numpy as np +from numpy import all as alltrue +from numpy import (allclose, arange, arccos, arccosh, arcsin, arcsinh, arctan, + arctan2, arctanh, array, array_equal, cdouble, ceil, conj, + copy, copysign, cos, cosh, empty, exp, expm1, float64, + floor, fmod, hypot, int32, int64, isfinite, isinf, isnan, + linspace, log, log1p, log2, log10, maximum, minimum, + nextafter, ones_like, prod, ravel, rec, round, shape, sign, + signbit, sin, sinh, sqrt, sum, tan, tanh, trunc, uint16, + where, zeros) +from numpy.testing import (assert_allclose, assert_array_almost_equal, + assert_array_equal, assert_equal) + +import numexpr +from numexpr import (E, NumExpr, disassemble, evaluate, re_evaluate, use_vml, + validate) +from numexpr.expressions import ConstantNode +from numexpr.utils import detect_number_of_cores + +try: + import pytest + pytest_available = True +except ImportError: + pytest_available = False + +TestCase = unittest.TestCase + +double = np.double +long = int +MAX_THREADS = 16 + + +if not pytest_available: + def identity(f): + return f + + pytest = MagicMock() + pytest.mark = MagicMock() + pytest.mark.thread_unsafe = identity + + +class test_numexpr(TestCase): + """Testing with 1 thread""" + nthreads = 1 + + def setUp(self): + numexpr.set_num_threads(self.nthreads) + + def test_simple(self): + ex = 2.0 * E.a + 3.0 * E.b * E.c + sig = [('a', double), ('b', double), ('c', double)] + func = NumExpr(ex, signature=sig) + x = func(array([1., 2, 3]), array([4., 5, 6]), array([7., 8, 9])) + assert_array_equal(x, array([86., 124., 168.])) + + def test_simple_expr_small_array(self): + func = NumExpr(E.a) + x = arange(100.0) + y = func(x) + assert_array_equal(x, y) + + def test_simple_expr(self): + func = NumExpr(E.a) + x = arange(1e6) + y = func(x) + assert_array_equal(x, y) + + def test_rational_expr(self): + func = NumExpr((E.a + 2.0 * E.b) / (1 + E.a + 4 * E.b * E.b)) + a = arange(1e6) + b = arange(1e6) * 0.1 + x = (a + 2 * b) / (1 + a + 4 * b * b) + y = func(a, b) + assert_array_almost_equal(x, y) + + def test_reductions(self): + # Check that they compile OK. + assert_equal(disassemble( + NumExpr("sum(x**2+2, axis=None)", [('x', double)])), + [(b'mul_ddd', b't3', b'r1[x]', b'r1[x]'), + (b'add_ddd', b't3', b't3', b'c2[2.0]'), + (b'sum_ddn', b'r0', b't3', None)]) + assert_equal(disassemble( + NumExpr("sum(x**2+2, axis=1)", [('x', double)])), + [(b'mul_ddd', b't3', b'r1[x]', b'r1[x]'), + (b'add_ddd', b't3', b't3', b'c2[2.0]'), + (b'sum_ddn', b'r0', b't3', 1)]) + assert_equal(disassemble( + NumExpr("prod(x**2+2, axis=2)", [('x', double)])), + [(b'mul_ddd', b't3', b'r1[x]', b'r1[x]'), + (b'add_ddd', b't3', b't3', b'c2[2.0]'), + (b'prod_ddn', b'r0', b't3', 2)]) + # Check that full reductions work. + x = zeros(100000) + .01 # checks issue #41 + assert_allclose(evaluate("sum(x+2,axis=None)"), sum(x + 2, axis=None)) + assert_allclose(evaluate("sum(x+2,axis=0)"), sum(x + 2, axis=0)) + assert_allclose(evaluate("prod(x,axis=0)"), prod(x, axis=0)) + assert_allclose(evaluate("min(x)"), np.min(x)) + assert_allclose(evaluate("max(x,axis=0)"), np.max(x, axis=0)) + + # Fix for #277, array with leading singleton dimension + x = np.arange(10).reshape(1,10) + assert_allclose(evaluate("sum(x,axis=None)"), sum(x, axis=None) ) + assert_allclose(evaluate("sum(x,axis=0)"), sum(x, axis=0) ) + assert_allclose(evaluate("sum(x,axis=1)"), sum(x, axis=1) ) + + x = arange(10.0) + assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0)) + assert_allclose(evaluate("prod(x**2+2,axis=0)"), prod(x ** 2 + 2, axis=0)) + assert_allclose(evaluate("min(x**2+2,axis=0)"), np.min(x ** 2 + 2, axis=0)) + assert_allclose(evaluate("max(x**2+2,axis=0)"), np.max(x ** 2 + 2, axis=0)) + + x = arange(100.0) + assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0)) + assert_allclose(evaluate("prod(x-1,axis=0)"), prod(x - 1, axis=0)) + assert_allclose(evaluate("min(x-1,axis=0)"), np.min(x - 1, axis=0)) + assert_allclose(evaluate("max(x-1,axis=0)"), np.max(x - 1, axis=0)) + x = linspace(0.1, 1.0, 2000) + assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0)) + assert_allclose(evaluate("prod(x-1,axis=0)"), prod(x - 1, axis=0)) + assert_allclose(evaluate("min(x-1,axis=0)"), np.min(x - 1, axis=0)) + assert_allclose(evaluate("max(x-1,axis=0)"), np.max(x - 1, axis=0)) + + # Check that reductions along an axis work + y = arange(9.0).reshape(3, 3) + assert_allclose(evaluate("sum(y**2, axis=1)"), sum(y ** 2, axis=1)) + assert_allclose(evaluate("sum(y**2, axis=0)"), sum(y ** 2, axis=0)) + assert_allclose(evaluate("sum(y**2, axis=None)"), sum(y ** 2, axis=None)) + assert_allclose(evaluate("prod(y**2, axis=1)"), prod(y ** 2, axis=1)) + assert_allclose(evaluate("prod(y**2, axis=0)"), prod(y ** 2, axis=0)) + assert_allclose(evaluate("prod(y**2, axis=None)"), prod(y ** 2, axis=None)) + assert_allclose(evaluate("min(y**2, axis=1)"), np.min(y ** 2, axis=1)) + assert_allclose(evaluate("min(y**2, axis=0)"), np.min(y ** 2, axis=0)) + assert_allclose(evaluate("min(y**2, axis=None)"), np.min(y ** 2, axis=None)) + assert_allclose(evaluate("max(y**2, axis=1)"), np.max(y ** 2, axis=1)) + assert_allclose(evaluate("max(y**2, axis=0)"), np.max(y ** 2, axis=0)) + assert_allclose(evaluate("max(y**2, axis=None)"), np.max(y ** 2, axis=None)) + # Check integers + x = arange(10.) + x = x.astype(int) + assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0)) + assert_allclose(evaluate("prod(x**2+2,axis=0)"), prod(x ** 2 + 2, axis=0)) + assert_allclose(evaluate("min(x**2+2,axis=0)"), np.min(x ** 2 + 2, axis=0)) + assert_allclose(evaluate("max(x**2+2,axis=0)"), np.max(x ** 2 + 2, axis=0)) + # Check longs + x = x.astype(int) + assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0)) + assert_allclose(evaluate("prod(x**2+2,axis=0)"), prod(x ** 2 + 2, axis=0)) + assert_allclose(evaluate("min(x**2+2,axis=0)"), np.min(x ** 2 + 2, axis=0)) + assert_allclose(evaluate("max(x**2+2,axis=0)"), np.max(x ** 2 + 2, axis=0)) + # Check complex + x = x + .1j + assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0)) + assert_allclose(evaluate("prod(x-1,axis=0)"), prod(x - 1, axis=0)) + + def test_in_place(self): + x = arange(10000.).reshape(1000, 10) + evaluate("x + 3", out=x) + assert_equal(x, arange(10000.).reshape(1000, 10) + 3) + y = arange(10) + evaluate("(x - 3) * y + (x - 3)", out=x) + assert_equal(x, arange(10000.).reshape(1000, 10) * (arange(10) + 1)) + + def test_axis(self): + y = arange(9.0).reshape(3, 3) + try: + evaluate("sum(y, axis=2)") + except ValueError: + pass + else: + raise ValueError("should raise exception!") + try: + evaluate("sum(y, axis=-3)") + except ValueError: + pass + else: + raise ValueError("should raise exception!") + try: + # Negative axis are not supported + evaluate("sum(y, axis=-1)") + except ValueError: + pass + else: + raise ValueError("should raise exception!") + + def test_r0_reuse(self): + assert_equal(disassemble(NumExpr("x * x + 2", [('x', double)])), + [(b'mul_ddd', b'r0', b'r1[x]', b'r1[x]'), + (b'add_ddd', b'r0', b'r0', b'c2[2.0]')]) + + def test_str_contains_basic0(self): + res = evaluate('contains(b"abc", b"ab")') + assert_equal(res, True) + + def test_str_contains_basic1(self): + haystack = array([b'abc', b'def', b'xyz', b'x11', b'za']) + res = evaluate('contains(haystack, b"ab")') + assert_equal(res, [True, False, False, False, False]) + + def test_str_contains_basic2(self): + haystack = array([b'abc', b'def', b'xyz', b'x11', b'za']) + res = evaluate('contains(b"abcd", haystack)') + assert_equal(res, [True, False, False, False, False]) + + def test_str_contains_basic3(self): + haystacks = array( + [b'abckkk', b'adef', b'xyz', b'x11abcp', b'za', b'abc']) + needles = array( + [b'abc', b'def', b'aterr', b'oot', b'zu', b'ab']) + res = evaluate('contains(haystacks, needles)') + assert_equal(res, [True, True, False, False, False, True]) + + def test_str_contains_basic4(self): + needles = array( + [b'abc', b'def', b'aterr', b'oot', b'zu', b'ab c', b' abc', + b'abc ']) + res = evaluate('contains(b"test abc here", needles)') + assert_equal(res, [True, False, False, False, False, False, True, True]) + + def test_str_contains_basic5(self): + needles = array( + [b'abc', b'ab c', b' abc', b' abc ', b'\tabc', b'c h']) + res = evaluate('contains(b"test abc here", needles)') + assert_equal(res, [True, False, True, True, False, True]) + + # Compare operation of Python 'in' operator with 'contains' using a + # product of two lists of strings. + + def test_str_contains_listproduct(self): + from itertools import product + + small = [ + 'It w', 'as th', 'e Whit', 'e Rab', 'bit,', ' tro', 'tting', + ' sl', 'owly', ' back ', 'again,', ' and', ' lo', 'okin', 'g a', + 'nxious', 'ly a', 'bou', 't a', 's it w', 'ent,', ' as i', 'f it', + ' had l', 'ost', ' some', 'thi', 'ng; a', 'nd ', 'she ', 'heard ', + 'it mut', 'terin', 'g to ', 'its', 'elf ', "'The", + ' Duch', 'ess! T', 'he ', 'Duches', 's! Oh ', 'my dea', 'r paws', + '! Oh ', 'my f', 'ur ', 'and ', 'whiske', 'rs! ', 'She', "'ll g", + 'et me', ' ex', 'ecu', 'ted, ', 'as su', 're a', 's f', 'errets', + ' are f', 'errets', '! Wh', 'ere ', 'CAN', ' I hav', 'e d', + 'roppe', 'd t', 'hem,', ' I wo', 'nder?', "' A", 'lice', + ' gu', 'essed', ' in a', ' mom', 'ent ', 'tha', 't it w', 'as ', + 'looki', 'ng f', 'or ', 'the fa', 'n and ', 'the', ' pai', + 'r of w', 'hit', 'e kid', ' glo', 'ves', ', and ', 'she ', + 'very g', 'ood', '-na', 'turedl', 'y be', 'gan h', 'unt', 'ing', + ' about', ' for t', 'hem', ', but', ' they ', 'wer', 'e nowh', + 'ere to', ' be', ' se', 'en--', 'ever', 'ythin', 'g seem', 'ed ', + 'to ', 'have c', 'hang', 'ed ', 'since', ' he', 'r swim', ' in', + ' the', ' pool,', ' and', ' the g', 'reat ', 'hal', 'l, w', 'ith', + ' th', 'e gl', 'ass t', 'abl', 'e and ', 'the', ' li', 'ttle', + ' doo', 'r, ha', 'd v', 'ani', 'shed c', 'omp', 'lete', 'ly.'] + big = [ + 'It wa', 's the', ' W', 'hit', 'e ', 'Ra', 'bb', 'it, t', 'ro', + 'tting s', 'lowly', ' back ', 'agai', 'n, and', ' l', 'ookin', + 'g ', 'an', 'xiously', ' about ', 'as it w', 'ent, as', ' if ', + 'it had', ' los', 't ', 'so', 'mething', '; and', ' she h', + 'eard ', 'it ', 'mutteri', 'ng to', ' itself', " 'The ", + 'Duchess', '! ', 'Th', 'e ', 'Duchess', '! Oh m', 'y de', + 'ar paws', '! ', 'Oh my ', 'fu', 'r and w', 'hiskers', "! She'", + 'll ', 'get', ' me ', 'execute', 'd,', ' a', 's ', 'su', 're as ', + 'fe', 'rrets', ' are f', 'errets!', ' Wher', 'e CAN', ' I ha', + 've dro', 'pped t', 'hem', ', I ', 'won', "der?' A", + 'lice g', 'uess', 'ed ', 'in a m', 'omen', 't that', ' i', + 't was l', 'ook', 'ing f', 'or th', 'e ', 'fan and', ' th', 'e p', + 'air o', 'f whit', 'e ki', 'd glove', 's, and ', 'she v', 'ery ', + 'good-na', 'tu', 'redl', 'y be', 'gan hun', 'ti', 'ng abou', + 't for t', 'he', 'm, bu', 't t', 'hey ', 'were n', 'owhere', + ' to b', 'e s', 'een-', '-eve', 'rythi', 'ng see', 'me', 'd ', + 'to ha', 've', ' c', 'hanged', ' sinc', 'e her s', 'wim ', + 'in the ', 'pool,', ' an', 'd the g', 'rea', 't h', 'all, wi', + 'th the ', 'glas', 's t', 'able an', 'd th', 'e littl', 'e door,', + ' had va', 'ni', 'shed co', 'mpletel', 'y.'] + p = list(product(small, big)) + python_in = [x[0] in x[1] for x in p] + a = [x[0].encode() for x in p] + b = [x[1].encode() for x in p] + res = [bool(x) for x in evaluate('contains(b, a)')] + assert_equal(res, python_in) + + def test_str_contains_withemptystr1(self): + withemptystr = array([b'abc', b'def', b'']) + res = evaluate('contains(b"abcd", withemptystr)') + assert_equal(res, [True, False, True]) + + def test_str_contains_withemptystr2(self): + withemptystr = array([b'abc', b'def', b'']) + res = evaluate('contains(withemptystr, b"")') + assert_equal(res, [True, True, True]) + + def test_str_contains_long_needle(self): + a = b'1' + b'a' * 40 + b = b'a' * 40 + res = evaluate('contains(a, b)') + assert_equal(res, True) + + def test_where_scalar_bool(self): + a = True + b = array([1, 2]) + c = array([3, 4]) + res = evaluate('where(a, b, c)') + assert_array_equal(res, b) + a = False + res = evaluate('where(a, b, c)') + assert_array_equal(res, c) + + # Comment out this test completely, as modern Python optimizes handling refcounts. + # See #511 for more info. + @unittest.skipIf(hasattr(sys, "pypy_version_info"), + "PyPy does not have sys.getrefcount()") + def _test_refcount(self): + # Regression test for issue #310 + a = array([1]) + assert sys.getrefcount(a) == 2 + evaluate('1') + assert sys.getrefcount(a) == 2 + + @pytest.mark.thread_unsafe + def test_locals_clears_globals(self): + # Check for issue #313, whereby clearing f_locals also clear f_globals + # if in the top-frame. This cannot be done inside `unittest` as it is always + # executing code in a child frame. + script = r';'.join([ + r"import numexpr as ne", + r"a=10", + r"ne.evaluate('1')", + r"a += 1", + r"ne.evaluate('2', local_dict={})", + r"a += 1", + r"ne.evaluate('3', global_dict={})", + r"a += 1", + r"ne.evaluate('4', local_dict={}, global_dict={})", + r"a += 1", + ]) + # Raises CalledProcessError on a non-normal exit + check = subprocess.check_call([sys.executable, '-c', script]) + # Ideally this test should also be done against ipython but it's not + # a requirement. + + + +@pytest.mark.thread_unsafe +class test_numexpr2(test_numexpr): + """Testing with 2 threads""" + nthreads = 2 + + +class test_evaluate(TestCase): + def test_simple(self): + a = array([1., 2., 3.]) + b = array([4., 5., 6.]) + c = array([7., 8., 9.]) + x = evaluate("2*a + 3*b*c") + assert_array_equal(x, array([86., 124., 168.])) + + def test_simple_expr_small_array(self): + x = arange(100.0) + y = evaluate("x") + assert_array_equal(x, y) + + def test_simple_expr(self): + x = arange(1e6) + y = evaluate("x") + assert_array_equal(x, y) + + def test_re_evaluate(self): + a = array([1., 2., 3.]) + b = array([4., 5., 6.]) + c = array([7., 8., 9.]) + x = evaluate("2*a + 3*b*c") + x = re_evaluate() + assert_array_equal(x, array([86., 124., 168.])) + + def test_re_evaluate_dict(self): + a1 = array([1., 2., 3.]) + b1 = array([4., 5., 6.]) + c1 = array([7., 8., 9.]) + local_dict={'a': a1, 'b': b1, 'c': c1} + x = evaluate("2*a + 3*b*c", local_dict=local_dict) + x = re_evaluate(local_dict=local_dict) + assert_array_equal(x, array([86., 124., 168.])) + + def test_validate(self): + a = array([1., 2., 3.]) + b = array([4., 5., 6.]) + c = array([7., 8., 9.]) + retval = validate("2*a + 3*b*c") + assert(retval is None) + x = re_evaluate() + assert_array_equal(x, array([86., 124., 168.])) + + def test_validate_missing_var(self): + a = array([1., 2., 3.]) + b = array([4., 5., 6.]) + retval = validate("2*a + 3*b*c") + assert(isinstance(retval, KeyError)) + + def test_validate_syntax(self): + retval = validate("2+") + assert(isinstance(retval, SyntaxError)) + + def test_validate_dict(self): + a1 = array([1., 2., 3.]) + b1 = array([4., 5., 6.]) + c1 = array([7., 8., 9.]) + local_dict={'a': a1, 'b': b1, 'c': c1} + retval = validate("2*a + 3*b*c", local_dict=local_dict) + assert(retval is None) + x = re_evaluate(local_dict=local_dict) + assert_array_equal(x, array([86., 124., 168.])) + + # Test for issue #22 + def test_true_div(self): + x = arange(10, dtype='i4') + assert_array_equal(evaluate("x/2"), x / 2) + assert_array_equal(evaluate("x/2", truediv=False), x / 2) + assert_array_equal(evaluate("x/2", truediv='auto'), x / 2) + assert_array_equal(evaluate("x/2", truediv=True), x / 2.0) + + def test_left_shift(self): + x = arange(10, dtype='i4') + assert_array_equal(evaluate("x<<2"), x << 2) + + def test_right_shift(self): + x = arange(10, dtype='i4') + assert_array_equal(evaluate("x>>2"), x >> 2) + + # PyTables uses __nonzero__ among ExpressionNode objects internally + # so this should be commented out for the moment. See #24. + def test_boolean_operator(self): + x = arange(10, dtype='i4') + try: + evaluate("(x > 1) and (x < 9)") + except TypeError: + pass + else: + raise ValueError("should raise exception!") + + x = np.ones(10, dtype='bool') + y = np.zeros(10, dtype='bool') + assert_array_equal(evaluate("x & y"), x & y) # and + assert_array_equal(evaluate("x ^ y"), x ^ y) # xor + assert_array_equal(evaluate("x | y"), x | y) # or + assert_array_equal(evaluate("~x"), ~x) # invert + + def test_bitwise_operators(self): + x = arange(10, dtype='i4') + y = arange(10, dtype='i4') + assert_array_equal(evaluate("x & y"), x & y) # and + assert_array_equal(evaluate("x ^ y"), x ^ y) # xor + assert_array_equal(evaluate("x | y"), x | y) # or + assert_array_equal(evaluate("~x"), ~x) # invert + + x = arange(10, dtype='i8') + y = arange(10, dtype='i8') + assert_array_equal(evaluate("x & y"), x & y) # and + assert_array_equal(evaluate("x ^ y"), x ^ y) # xor + assert_array_equal(evaluate("x | y"), x | y) # or + assert_array_equal(evaluate("~x"), ~x) # invert + + def test_complex_tan(self): + # old version of NumExpr had overflow problems + x = np.arange(1, 400., step=16., dtype=np.complex128) + y = 1j*np.arange(1, 400., step=16., dtype=np.complex128) + assert_array_almost_equal(evaluate("tan(x + y)"), tan(x + y)) + assert_array_almost_equal(evaluate("tanh(x + y)"), tanh(x + y)) + + def test_maximum_minimum(self): + for dtype in [float, double, int, np.int64]: + x = arange(10, dtype=dtype) + y = 2 * arange(10, dtype=dtype)[::-1] + if dtype in (float, double): + y[5] = np.nan + x[2] = np.nan + assert_array_equal(evaluate("maximum(x,y)"), maximum(x,y)) + assert_array_equal(evaluate("minimum(x,y)"), minimum(x,y)) + + def test_addmult_booleans(self): + x = np.asarray([0, 1, 0, 0, 1], dtype=bool) + y = x[::-1] + res_ne = evaluate("x * y") + res_np = x * y + assert_array_equal(res_ne, res_np) + assert res_ne.dtype == res_np.dtype + res_ne = evaluate("x + y") + res_np = x + y + assert_array_equal(res_ne, res_np) + assert res_ne.dtype == res_np.dtype + + def test_sign_round(self): + for dtype in [float, double, np.int32, np.int64, complex]: + x = arange(10, dtype=dtype) + y = 2 * arange(10, dtype=dtype)[::-1] + r = x-y + if not np.issubdtype(dtype, np.integer): + r[-1] = np.nan + assert evaluate("round(r)").dtype == round(r).dtype + assert evaluate("sign(r)").dtype == sign(r).dtype + assert_array_equal(evaluate("sign(r)"), sign(r)) + assert_array_equal(evaluate("round(r)"), round(r)) + + def test_rational_expr(self): + a = arange(1e6) + b = arange(1e6) * 0.1 + x = (a + 2 * b) / (1 + a + 4 * b * b) + y = evaluate("(a + 2*b) / (1 + a + 4*b*b)") + assert_array_almost_equal(x, y) + + def test_complex_expr(self): + def complex(a, b): + c = zeros(a.shape, dtype=cdouble) + c.real = a + c.imag = b + return c + + a = arange(1e4) + b = arange(1e4) ** 1e-5 + z = a + 1j * b + x = z.imag + x = sin(complex(a, b)).real + z.imag + y = evaluate("sin(complex(a, b)).real + z.imag") + assert_array_almost_equal(x, y) + + def test_complex_strides(self): + a = arange(100).reshape(10, 10)[::2] + b = arange(50).reshape(5, 10) + assert_array_equal(evaluate("a+b"), a + b) + c = empty([10], dtype=[('c1', int32), ('c2', uint16)]) + c['c1'] = arange(10) + c['c2'].fill(0xaaaa) + c1 = c['c1'] + a0 = a[0] + assert_array_equal(evaluate("c1"), c1) + assert_array_equal(evaluate("a0+c1"), a0 + c1) + + def test_recarray_strides(self): + a = arange(100) + b = arange(100,200) + recarr = np.rec.array(None, formats='f4,f4', shape=(100,)) + recarr['f0'] = a + recarr['f1'] = b + c = recarr['f1'] + assert_array_almost_equal(evaluate("sqrt(c) > 1."), sqrt(c) > 1.) + assert_array_almost_equal(evaluate("log10(c)"), log10(c)) + + def test_broadcasting(self): + a = arange(100).reshape(10, 10)[::2] + c = arange(10) + d = arange(5).reshape(5, 1) + assert_array_equal(evaluate("a+c"), a + c) + assert_array_equal(evaluate("a+d"), a + d) + expr = NumExpr("2.0*a+3.0*c", [('a', double), ('c', double)]) + assert_array_equal(expr(a, c), 2.0 * a + 3.0 * c) + + def test_all_scalar(self): + a = 3. + b = 4. + assert_allclose(evaluate("a+b"), a + b) + expr = NumExpr("2*a+3*b", [('a', double), ('b', double)]) + assert_equal(expr(a, b), 2 * a + 3 * b) + + def test_run(self): + a = arange(100).reshape(10, 10)[::2] + b = arange(10) + expr = NumExpr("2*a+3*b", [('a', double), ('b', double)]) + assert_array_equal(expr(a, b), expr.run(a, b)) + + def test_illegal_value(self): + a = arange(3) + try: + evaluate("a < [0, 0, 0]") + except (ValueError, TypeError): + pass + else: + self.fail() + + @pytest.mark.thread_unsafe + def test_sanitize(self): + with _environment('NUMEXPR_SANITIZE', '1'): + # Forbid dunder + try: + evaluate('__builtins__') + except ValueError: + pass + else: + self.fail() + + # Forbid colon for lambda funcs + try: + evaluate('lambda x: x') + except ValueError: + pass + else: + self.fail() + + # Forbid indexing + try: + evaluate('locals()["evaluate"]') + except ValueError: + pass + else: + self.fail() + + # Forbid semicolon + try: + evaluate('import os;') + except ValueError: + pass + else: + self.fail() + + # Attribute access with spaces + try: + evaluate('os. cpu_count()') + except ValueError: + pass + else: + self.fail() + + # Attribute access with funny unicode characters that eval translates + # into ASCII. + try: + evaluate("(3+1).ᵇit_length()") + except ValueError: + pass + else: + self.fail() + + # Pass decimal points including scientific notation + a = 3.0 + evaluate('a*2.e-5') + evaluate('a*2.e+5') + evaluate('a*2e-5') + evaluate('a*2e+5') + evaluate('a*2E-5') + evaluate('a*2.0e5') + evaluate('a*2.2e5') + evaluate('2.+a') + + # pass .real and .imag + c = 2.5 + 1.5j + evaluate('c.real') + evaluate('c.imag') + + # pass imaginary unit j + evaluate('1.5j') + evaluate('3.j') + + #pass imaginary with scientific notation + evaluate('1.2e3+4.5e6j') + + # pass forbidden characters within quotes + x = np.array(['a', 'b'], dtype=bytes) + evaluate("x == 'b:'") + + @pytest.mark.thread_unsafe + def test_no_sanitize(self): + try: # Errors on compile() after eval() + evaluate('import os;', sanitize=False) + except SyntaxError: + pass + else: + self.fail() + + with _environment('NUMEXPR_SANITIZE', '0'): + try: # Errors on compile() after eval() + evaluate('import os;', sanitize=None) + except SyntaxError: + pass + else: + self.fail() + + def test_disassemble(self): + assert_equal(disassemble(NumExpr( + "where(m, a, -1)", [('m', bool), ('a', float)])), + [[b'where_fbff', b'r0', b'r1[m]', b'r2[a]', b'c3[-1.0]'], + [b'noop', None, None, None]]) + + def test_constant_deduplication(self): + assert_equal(NumExpr("(a + 1)*(a - 1)", [('a', np.int32)]).constants, (1,)) + + def test_nan_constant(self): + assert_equal(str(ConstantNode(float("nan")).value), 'nan') + + # check de-duplication works for nan + _nan = ConstantNode(float("nan")) + expr = (E.a + _nan)*(E.b + _nan) + assert_equal(NumExpr(expr, [('a', double), ('b', double)]).constants, (float("nan"),)) + + + def test_f32_constant(self): + assert_equal(ConstantNode(numpy.float32(1)).astKind, "float") + assert_equal(ConstantNode(numpy.float32("nan")).astKind, "float") + assert_equal(ConstantNode(numpy.float32(3)).value.dtype, numpy.dtype("float32")) + assert_array_equal(NumExpr(ConstantNode(numpy.float32(1))).run(), + numpy.array(1, dtype="float32")) + + def test_unaligned_singleton(self): + # Test for issue #397 whether singletons outputs assigned to consts must be + # aligned or not. + a = np.empty(5, dtype=np.uint8)[1:].view(np.int32) + evaluate('3', out=a) + assert_equal(a, 3) + + def test_negative_mod(self): + # Test for issue #413, modulus of negative integers. C modulus is + # actually remainder op, and hence different from Python modulus. + a = np.array([-500, -135, 0, 0, 135, 500], dtype=np.int32) + n = np.array([-360, -360, -360, 360, 360, 360], dtype=np.int32) + out_i = evaluate('a % n') + assert_equal(out_i, np.mod(a, n)) + main_i = evaluate('a // n') + assert_equal(main_i, a // n) + + b = a.astype(np.int64) + m = n.astype(np.int64) + out_l = evaluate('b % m') + assert_equal(out_l, np.mod(b, m)) + main_l = evaluate('b // m') + assert_equal(main_l, a // m) + + def test_negative_power_scalar(self): + # Test for issue #428, where the power is negative and the base is an + # integer. This was running afoul in the precomputation in `expressions.py:pow_op()` + base = np.array([-2, -1, 1, 2, 3], dtype=np.int32) + out_i = evaluate('base ** -1.0') + assert_equal(out_i, np.power(base, -1.0)) + + base = np.array([-2, -1, 1, 2, 3], dtype=np.int64) + out_l = evaluate('base ** -1.0') + assert_equal(out_l, np.power(base, -1.0)) + + def test_ex_uses_vml(self): + vml_funcs = [ "sin", "cos", "tan", "arcsin", "arccos", "arctan", + "sinh", "cosh", "tanh", "arcsinh", "arccosh", "arctanh", + "log", "log1p","log10", "log2", "exp", "expm1", "abs", "conj", + "arctan2", "fmod", "hypot"] + for func in vml_funcs: + strexpr = func+'(a)' + _, ex_uses_vml = numexpr.necompiler.getExprNames(strexpr, {}) + assert_equal(ex_uses_vml, use_vml, strexpr) + + def test_bool_funcs(self): + # Test functions with boolean outputs + array_size = 100 + dtype = np.float32 + a = np.arange(2 * array_size, dtype=dtype) + a[array_size//2] = np.nan + a[array_size//3] = np.inf + a[array_size//4] = -2 + + assert_equal(evaluate("isnan(a)"), isnan(a)) + assert_equal(evaluate("isfinite(a)"), isfinite(a)) + assert_equal(evaluate("isinf(a)"), isinf(a)) + assert_equal(evaluate("signbit(a)"), signbit(a)) + + a = a.astype(np.float64) + assert a.dtype == np.float64 + assert_equal(evaluate("isnan(a)"), isnan(a)) + assert_equal(evaluate("isfinite(a)"), isfinite(a)) + assert_equal(evaluate("isinf(a)"), isinf(a)) + assert_equal(evaluate("signbit(a)"), signbit(a)) + + a = a.astype(np.complex128) + assert a.dtype == np.complex128 + assert np.all(evaluate("isnan(a)") == np.isnan(a)) + assert np.all(evaluate("isfinite(a)") == np.isfinite(a)) + assert np.all(evaluate("isinf(a)") == np.isinf(a)) + # signbit not defined for complex numbers + + if 'sparc' not in platform.machine(): + # Execution order set here so as to not use too many threads + # during the rest of the execution. See #33 for details. + @pytest.mark.thread_unsafe + def test_changing_nthreads_00_inc(self): + a = linspace(-1, 1, 1000000) + b = ((.25 * a + .75) * a - 1.5) * a - 2 + for nthreads in range(1, 7): + numexpr.set_num_threads(nthreads) + c = evaluate("((.25*a + .75)*a - 1.5)*a - 2") + assert_array_almost_equal(b, c) + + @pytest.mark.thread_unsafe + def test_changing_nthreads_01_dec(self): + a = linspace(-1, 1, 1000000) + b = ((.25 * a + .75) * a - 1.5) * a - 2 + for nthreads in range(6, 1, -1): + numexpr.set_num_threads(nthreads) + c = evaluate("((.25*a + .75)*a - 1.5)*a - 2") + assert_array_almost_equal(b, c) + + +tests = [ + ('MISC', ['b*c+d*e', + '2*a+3*b', + '-a', + 'sinh(a)', + '2*a + (cos(3)+5)*sinh(cos(b))', + '2*a + arctan2(a, b)', + 'arcsin(0.5)', + 'where(a != 0.0, 2, a)', + 'where(a > 10, b < a, b > a)', + 'where((a-10).real != 0.0, a, 2)', + '0.25 * (a < 5) + 0.33 * (a >= 5)', + 'cos(1+1)', + '1+1', + '1', + 'cos(a2)', + ])] + +optests = [] +for op in list('+-*/%') + ['**']: + optests.append("(a+1) %s (b+3)" % op) + optests.append("3 %s (b+3)" % op) + optests.append("(a+1) %s 4" % op) + optests.append("2 %s (b+3)" % op) + optests.append("(a+1) %s 2" % op) + optests.append("(a+1) %s -1" % op) + optests.append("(a+1) %s 0.5" % op) + # Check divisions and modulus by zero (see ticket #107) + optests.append("(a+1) %s 0" % op) +tests.append(('OPERATIONS', optests)) + +cmptests = [] +for op in ['<', '<=', '==', '>=', '>', '!=']: + cmptests.append("a/2+5 %s b" % op) + cmptests.append("a/2+5 %s 7" % op) + cmptests.append("7 %s b" % op) + cmptests.append("7.0 %s 5" % op) +tests.append(('COMPARISONS', cmptests)) + +func1tests = [] +for func in ['copy', 'ones_like', 'sqrt', + 'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan', + 'sinh', 'cosh', 'tanh', 'arcsinh', 'arccosh', 'arctanh', + 'log', 'log1p', 'log10', "log2", 'exp', 'expm1', 'abs', 'conj', + 'ceil', 'floor', 'round', 'trunc', 'sign']: + func1tests.append("a + %s(b+c)" % func) +tests.append(('1_ARG_FUNCS', func1tests)) + +func2tests = [] +for func in ['arctan2', 'fmod', 'hypot', 'nextafter', 'copysign']: + func2tests.append("a + %s(b+c, d+1)" % func) + func2tests.append("a + %s(b+c, 1)" % func) + func2tests.append("a + %s(1, d+1)" % func) +tests.append(('2_ARG_FUNCS', func2tests)) + +powtests = [] +# n = -1, 0.5, 2, 4 already handled in section "OPERATIONS" +for n in (-7, -2.5, -1.5, -1.3, -.5, 0, 0.0, 1, 2.3, 2.5, 3): + powtests.append("(a+1)**%s" % n) +tests.append(('POW_TESTS', powtests)) + + +def equal(a, b, exact): + if array_equal(a, b): + return True + + if hasattr(a, 'dtype') and a.dtype in ['f4', 'f8']: + nnans = isnan(a).sum() + if nnans > 0: + # For results containing NaNs, just check that the number + # of NaNs is the same in both arrays. This check could be + # made more exhaustive, but checking element by element in + # python space is very expensive in general. + return nnans == isnan(b).sum() + ninfs = isinf(a).sum() + if ninfs > 0: + # Ditto for Inf's + return ninfs == isinf(b).sum() + if exact: + return (shape(a) == shape(b)) and alltrue(ravel(a) == ravel(b), axis=0) + else: + if hasattr(a, 'dtype') and a.dtype == 'f4': + atol = 1e-5 # Relax precision for special opcodes, like fmod + else: + atol = 1e-8 + return (shape(a) == shape(b) and + allclose(ravel(a), ravel(b), atol=atol)) + + +class Skip(Exception): pass + + +@pytest.mark.parametrize( + "expr,test_scalar,dtype,optimization,exact,section_name", + [ + (expr, test_scalar, dtype, optimization, exact, section_name) + for test_scalar in (0, 1, 2) + for dtype in (int, int, np.float32, double, complex) + for optimization, exact in [ + ("none", False), + ("moderate", False), + ("aggressive", False), + ] + for section_name, section_tests in tests + for expr in section_tests + if not ( + dtype == complex + and ( + "<" in expr + or ">" in expr + or "%" in expr + or "arctan2" in expr + or "fmod" in expr + or "hypot" in expr + or "nextafter" in expr + or "copysign" in expr + or "trunc" in expr + or "floor" in expr + or "ceil" in expr + ) + ) + if not (dtype in (int, int) and test_scalar and expr == "(a+1) ** -1") + ], +) +def test_expressions( + expr, test_scalar, dtype, optimization, exact, section_name +): + array_size = 100 + a = arange(2 * array_size, dtype=dtype)[::2] + a2 = zeros([array_size, array_size], dtype=dtype) + b = arange(array_size, dtype=dtype) / array_size + c = arange(array_size, dtype=dtype) + d = arange(array_size, dtype=dtype) + e = arange(array_size, dtype=dtype) + x = None + + if dtype == complex: + a = a.real + for var in [a2, b, c, d, e]: + var += 1j + var *= 1 + 1j + + if test_scalar == 1: + a = a[array_size // 2] + if test_scalar == 2: + b = b[array_size // 2] + + # We don't want to listen at RuntimeWarnings like + # "overflows" or "divide by zero" in plain eval(). + warnings.simplefilter("ignore") + try: + npexpr = expr + if "sign" in expr and dtype==complex and np.__version__<"2.0": + #definition of sign changed in numpy 2.0 for complex numbers + npexpr = expr.replace("sign(b+c)", "(b+c)/abs(b+c)") + npval = eval(npexpr, globals(), locals()) + except Exception as ex: + np_exception = ex + npval = None + else: + np_exception = None + warnings.simplefilter("always") + + try: + neval = evaluate(expr, local_dict=locals(), optimization=optimization) + except AssertionError: + raise + except NotImplementedError: + pytest.skip( + f"{expr!r} not implemented for {dtype.__name__} (scalar={test_scalar}, opt={optimization})" + ) + except Exception as ne_exception: + same_exc_type = issubclass(type(ne_exception), type(np_exception)) + if np_exception is None or not same_exc_type: + pytest.fail(f"numexpr error for expression {expr!r}") + else: + if np_exception is not None: + pytest.fail(f"expected numexpr error not raised for expression {expr!r}") + + assert equal(npval, neval, exact), f"""{expr!r} + (test_scalar={test_scalar!r}, dtype={dtype.__name__!r}, optimization={optimization!r}, exact={exact!r}, + npval={npval!r} ({type(npval)!r} - {shape(npval)!r}) + neval={neval!r} ({type(neval)!r} - {shape(neval)!r}))""" + +class test_int64(TestCase): + def test_neg(self): + a = array([2 ** 31 - 1, 2 ** 31, 2 ** 32, 2 ** 63 - 1], dtype=int64) + res = evaluate('-a') + assert_array_equal(res, [1 - 2 ** 31, -(2 ** 31), -(2 ** 32), 1 - 2 ** 63]) + self.assertEqual(res.dtype.name, 'int64') + + +class test_int32_int64(TestCase): + + def test_small_int(self): + # Small ints (32-bit ones) should not be promoted to longs. + res = evaluate('2') + assert_array_equal(res, 2) + self.assertEqual(res.dtype.name, 'int32') + + def test_big_int(self): + # Big ints should be promoted to longs. + res = evaluate('2**40') + assert_array_equal(res, 2 ** 40) + self.assertEqual(res.dtype.name, 'int64') + + def test_long_constant_promotion(self): + int32array = arange(100, dtype='int32') + itwo = np.int32(2) + ltwo = np.int64(2) + res = int32array * 2 + res32 = evaluate('int32array * itwo') + res64 = evaluate('int32array * ltwo') + assert_array_equal(res, res32) + assert_array_equal(res, res64) + self.assertEqual(res32.dtype.name, 'int32') + self.assertEqual(res64.dtype.name, 'int64') + + def test_int64_array_promotion(self): + int32array = arange(100, dtype='int32') + int64array = arange(100, dtype='int64') + respy = int32array * int64array + resnx = evaluate('int32array * int64array') + assert_array_equal(respy, resnx) + self.assertEqual(resnx.dtype.name, 'int64') + + +class test_uint32_int64(TestCase): + def test_small_uint32(self): + # Small uint32 should not be downgraded to ints. + a = np.uint32(42) + res = evaluate('a') + assert_array_equal(res, 42) + self.assertEqual(res.dtype.name, 'int64') + + def test_uint32_constant_promotion(self): + int32array = arange(100, dtype='int32') + stwo = np.int32(2) + utwo = np.uint32(2) + res = int32array * utwo + res32 = evaluate('int32array * stwo') + res64 = evaluate('int32array * utwo') + assert_array_equal(res, res32) + assert_array_equal(res, res64) + self.assertEqual(res32.dtype.name, 'int32') + self.assertEqual(res64.dtype.name, 'int64') + + def test_int64_array_promotion(self): + uint32array = arange(100, dtype='uint32') + int64array = arange(100, dtype='int64') + respy = uint32array * int64array + resnx = evaluate('uint32array * int64array') + assert_array_equal(respy, resnx) + self.assertEqual(resnx.dtype.name, 'int64') + + +class test_strings(TestCase): + BLOCK_SIZE1 = 128 + BLOCK_SIZE2 = 8 + str_list1 = [b'foo', b'bar', b'', b' '] + str_list2 = [b'foo', b'', b'x', b' '] + str_nloops = len(str_list1) * (BLOCK_SIZE1 + BLOCK_SIZE2 + 1) + str_array1 = array(str_list1 * str_nloops) + str_array2 = array(str_list2 * str_nloops) + str_constant = b'doodoo' + + def test_null_chars(self): + str_list = [ + b'\0\0\0', b'\0\0foo\0', b'\0\0foo\0b', b'\0\0foo\0b\0', + b'foo\0', b'foo\0b', b'foo\0b\0', b'foo\0bar\0baz\0\0'] + for s in str_list: + r = evaluate('s') + self.assertEqual(s, r.tobytes()) # check *all* stored data + + def test_compare_copy(self): + sarr = self.str_array1 + expr = 'sarr' + res1 = eval(expr) + res2 = evaluate(expr) + assert_array_equal(res1, res2) + + def test_compare_array(self): + sarr1 = self.str_array1 + sarr2 = self.str_array2 + expr = 'sarr1 >= sarr2' + res1 = eval(expr) + res2 = evaluate(expr) + assert_array_equal(res1, res2) + + def test_compare_variable(self): + sarr = self.str_array1 + svar = self.str_constant + expr = 'sarr >= svar' + res1 = eval(expr) + res2 = evaluate(expr) + assert_array_equal(res1, res2) + + def test_compare_constant(self): + sarr = self.str_array1 + expr = 'sarr >= %r' % self.str_constant + res1 = eval(expr) + res2 = evaluate(expr) + assert_array_equal(res1, res2) + + def test_add_string_array(self): + sarr1 = self.str_array1 + sarr2 = self.str_array2 + expr = 'sarr1 + sarr2' + self.assert_missing_op('add_sss', expr, locals()) + + def test_empty_string1(self): + a = np.array([b"", b"pepe"]) + b = np.array([b"pepe2", b""]) + res = evaluate("(a == b'') & (b == b'pepe2')") + assert_array_equal(res, np.array([True, False])) + res2 = evaluate("(a == b'pepe') & (b == b'')") + assert_array_equal(res2, np.array([False, True])) + + def test_empty_string2(self): + a = np.array([b"p", b"pepe"]) + b = np.array([b"pepe2", b""]) + res = evaluate("(a == b'') & (b == b'pepe2')") + assert_array_equal(res, np.array([False, False])) + res2 = evaluate("(a == b'pepe') & (b == b'')") + assert_array_equal(res, np.array([False, False])) + + def test_add_numeric_array(self): + sarr = self.str_array1 + narr = arange(len(sarr), dtype='int32') + expr = 'sarr >= narr' + self.assert_missing_op('ge_bsi', expr, locals()) + + def assert_missing_op(self, op, expr, local_dict): + msg = "expected NotImplementedError regarding '%s'" % op + try: + evaluate(expr, local_dict) + except NotImplementedError as nie: + if "'%s'" % op not in nie.args[0]: + self.fail(msg) + else: + self.fail(msg) + + def test_compare_prefix(self): + # Check comparing two strings where one is a prefix of the + # other. + for s1, s2 in [(b'foo', b'foobar'), (b'foo', b'foo\0bar'), + (b'foo\0a', b'foo\0bar')]: + self.assertTrue(evaluate('s1 < s2')) + self.assertTrue(evaluate('s1 <= s2')) + self.assertTrue(evaluate('~(s1 == s2)')) + self.assertTrue(evaluate('~(s1 >= s2)')) + self.assertTrue(evaluate('~(s1 > s2)')) + + # Check for NumPy array-style semantics in string equality. + s1, s2 = b'foo', b'foo\0\0' + self.assertTrue(evaluate('s1 == s2')) + + +# Case for testing selections in fields which are aligned but whose +# data length is not an exact multiple of the length of the record. +# The following test exposes the problem only in 32-bit machines, +# because in 64-bit machines 'c2' is unaligned. However, this should +# check most platforms where, while not unaligned, 'len(datatype) > +# boundary_alignment' is fullfilled. +class test_irregular_stride(TestCase): + def test_select(self): + f0 = arange(10, dtype=int32) + f1 = arange(10, dtype=float64) + + irregular = rec.fromarrays([f0, f1]) + + f0 = irregular['f0'] + f1 = irregular['f1'] + + i0 = evaluate('f0 < 5') + i1 = evaluate('f1 < 5') + + assert_array_equal(f0[i0], arange(5, dtype=int32)) + assert_array_equal(f1[i1], arange(5, dtype=float64)) + + +# Cases for testing arrays with dimensions that can be zero. +class test_zerodim(TestCase): + def test_zerodim1d(self): + a0 = array([], dtype=int32) + a1 = array([], dtype=float64) + + r0 = evaluate('a0 + a1') + r1 = evaluate('a0 * a1') + + assert_array_equal(r0, a1) + assert_array_equal(r1, a1) + + def test_zerodim3d(self): + a0 = array([], dtype=int32).reshape(0, 2, 4) + a1 = array([], dtype=float64).reshape(0, 2, 4) + + r0 = evaluate('a0 + a1') + r1 = evaluate('a0 * a1') + + assert_array_equal(r0, a1) + assert_array_equal(r1, a1) + + +@contextmanager +def _environment(key, value): + old = os.environ.get(key) + os.environ[key] = value + try: + yield + finally: + if old: + os.environ[key] = old + else: + del os.environ[key] + +# Test cases for the threading configuration +@pytest.mark.thread_unsafe +class test_threading_config(TestCase): + def test_max_threads_unset(self): + # Has to be done in a subprocess as `importlib.reload` doesn't let us + # re-initialize the threadpool + script = '\n'.join([ + "import os", + "if 'NUMEXPR_MAX_THREADS' in os.environ: os.environ.pop('NUMEXPR_MAX_THREADS')", + "if 'OMP_NUM_THREADS' in os.environ: os.environ.pop('OMP_NUM_THREADS')", + "import numexpr", + f"assert(numexpr.nthreads <= {MAX_THREADS})", + "exit(0)"]) + subprocess.check_call([sys.executable, '-c', script]) + + def test_max_threads_set(self): + # Has to be done in a subprocess as `importlib.reload` doesn't let us + # re-initialize the threadpool + script = '\n'.join([ + "import os", + "os.environ['NUMEXPR_MAX_THREADS'] = '4'", + "import numexpr", + "assert(numexpr.MAX_THREADS == 4)", + "exit(0)"]) + subprocess.check_call([sys.executable, '-c', script]) + + def test_numexpr_num_threads(self): + with _environment('OMP_NUM_THREADS', '5'): + # NUMEXPR_NUM_THREADS has priority + with _environment('NUMEXPR_NUM_THREADS', '3'): + if 'sparc' in platform.machine(): + self.assertEqual(1, numexpr._init_num_threads()) + else: + self.assertEqual(3, numexpr._init_num_threads()) + + def test_omp_num_threads(self): + with _environment('OMP_NUM_THREADS', '5'): + if 'sparc' in platform.machine(): + self.assertEqual(1, numexpr._init_num_threads()) + else: + self.assertEqual(5, numexpr._init_num_threads()) + + def test_omp_num_threads_empty_string(self): + with _environment('OMP_NUM_THREADS', ''): + if 'sparc' in platform.machine(): + self.assertEqual(1, numexpr._init_num_threads()) + else: + self.assertEqual(min(detect_number_of_cores(), MAX_THREADS), numexpr._init_num_threads()) + + def test_numexpr_max_threads_empty_string(self): + with _environment('NUMEXPR_MAX_THREADS', ''): + if 'sparc' in platform.machine(): + self.assertEqual(1, numexpr._init_num_threads()) + else: + self.assertEqual(min(detect_number_of_cores(), MAX_THREADS), numexpr._init_num_threads()) + + def test_vml_threads_round_trip(self): + n_threads = 3 + if use_vml: + numexpr.utils.set_vml_num_threads(n_threads) + set_threads = numexpr.utils.get_vml_num_threads() + self.assertEqual(n_threads, set_threads) + else: + self.assertIsNone(numexpr.utils.set_vml_num_threads(n_threads)) + self.assertIsNone(numexpr.utils.get_vml_num_threads()) + + +# Case test for threads +class test_threading(TestCase): + + def test_thread(self): + import threading + + class ThreadTest(threading.Thread): + def run(self): + a = arange(3) + assert_array_equal(evaluate('a**3'), array([0, 1, 8])) + + test = ThreadTest() + test.start() + test.join() + + def test_multithread(self): + + import threading + + # Running evaluate() from multiple threads shouldn't crash + def work(n): + a = arange(n) + evaluate('a+a') + + work(10) # warm compilation cache + + nthreads = 30 + threads = [threading.Thread(target=work, args=(1e5,)) + for i in range(nthreads)] + for t in threads: + t.start() + for t in threads: + t.join() + + def test_thread_safety(self): + """ + Expected output + + When not safe (before the pr this test is commited) + AssertionError: Thread-0 failed: result does not match expected + + When safe (after the pr this test is commited) + Should pass without failure + """ + import threading + import time + + barrier = threading.Barrier(4) + + # Function that each thread will run with different expressions + def thread_function(a_value, b_value, expression, expected_result, results, index): + validate(expression, local_dict={"a": a_value, "b": b_value}) + # Wait for all threads to reach this point + # such that they all set _numexpr_last + barrier.wait() + + # Simulate some work or a context switch delay + time.sleep(0.1) + + result = re_evaluate(local_dict={"a": a_value, "b": b_value}) + results[index] = np.array_equal(result, expected_result) + + def test_thread_safety_with_numexpr(): + num_threads = 4 + array_size = 1000000 + + expressions = [ + "a + b", + "a - b", + "a * b", + "a / b" + ] + + a_value = [np.full(array_size, i + 1) for i in range(num_threads)] + b_value = [np.full(array_size, (i + 1) * 2) for i in range(num_threads)] + + expected_results = [ + a_value[i] + b_value[i] if expr == "a + b" else + a_value[i] - b_value[i] if expr == "a - b" else + a_value[i] * b_value[i] if expr == "a * b" else + a_value[i] / b_value[i] if expr == "a / b" else None + for i, expr in enumerate(expressions) + ] + + results = [None] * num_threads + threads = [] + + # Create and start threads with different expressions + for i in range(num_threads): + thread = threading.Thread( + target=thread_function, + args=(a_value[i], b_value[i], expressions[i], expected_results[i], results, i) + ) + threads.append(thread) + thread.start() + + for thread in threads: + thread.join() + + for i in range(num_threads): + if not results[i]: + self.fail(f"Thread-{i} failed: result does not match expected") + + test_thread_safety_with_numexpr() + + +# The worker function for the subprocess (needs to be here because Windows +# has problems pickling nested functions with the multiprocess module :-/) +def _worker(qout=None): + ra = np.arange(1e3) + rows = evaluate('ra > 0') + #print "Succeeded in evaluation!\n" + if qout is not None: + qout.put("Done") + + +# Case test for subprocesses (via multiprocessing module) +class test_subprocess(TestCase): + @pytest.mark.thread_unsafe + def test_multiprocess(self): + try: + import multiprocessing as mp + except ImportError: + return + # Check for two threads at least + numexpr.set_num_threads(2) + #print "**** Running from main process:" + _worker() + #print "**** Running from subprocess:" + qout = mp.Queue() + ps = mp.Process(target=_worker, args=(qout,)) + ps.daemon = True + ps.start() + + result = qout.get() + #print result + + +def print_versions(): + """Print the versions of software that numexpr relies on.""" + # from pkg_resources import parse_version + import platform + + from numexpr.cpuinfo import cpu + + print('-=' * 38) + print('Numexpr version: %s' % numexpr.__version__) + print('NumPy version: %s' % np.__version__) + print('Python version: %s' % sys.version) + (sysname, nodename, release, os_version, machine, processor) = platform.uname() + print('Platform: %s-%s-%s' % (sys.platform, machine, os_version)) + try: + # cpuinfo doesn't work on OSX well it seems, so protect these outputs + # with a try block + cpu_info = cpu.info[0] + print('CPU vendor: %s' % cpu_info.get('VendorIdentifier', '')) + print('CPU model: %s' % cpu_info.get('ProcessorNameString', '')) + print('CPU clock speed: %s MHz' % cpu_info.get('~MHz','')) + except KeyError: + pass + print('VML available? %s' % use_vml) + if use_vml: + print('VML/MKL version: %s' % numexpr.get_vml_version()) + print('Number of threads used by default: %d ' + '(out of %d detected cores)' % (numexpr.nthreads, numexpr.ncores)) + print('Maximum number of threads: %s' % numexpr.MAX_THREADS) + print('-=' * 38) + + +def test(verbosity=1): + """ + Run all the tests in the test suite. + """ + print_versions() + # For some reason, NumPy issues all kinds of warnings when using Python3. + # Ignoring them in tests should be ok, as all results are checked out. + # See https://github.com/pydata/numexpr/issues/183 for details. + np.seterr(divide='ignore', invalid='ignore', over='ignore', under='ignore') + return unittest.TextTestRunner(verbosity=verbosity).run(suite()) + + +test.__test__ = False + + +def suite(): + import platform as pl + import unittest + + theSuite = unittest.TestSuite() + niter = 1 + + # Add the pytest parametrized tests only if pytest is available + if pytest_available: + # Create a class that will run the test_expressions function with different parameters + class TestExpressions(unittest.TestCase): + pass + + # Get the parameters from the pytest.mark.parametrize decorator + # This is safer than accessing internal pytest modules + marker = getattr(test_expressions, "pytestmark", None) + if marker and hasattr(marker[0], "args") and len(marker[0].args) >= 2: + param_list = marker[0].args[1] + + # Create test methods dynamically + for i, params in enumerate(param_list): + expr, test_scalar, dtype, optimization, exact, section_name = params + + def create_test_method(params=params): + def test_method(self): + expr, test_scalar, dtype, optimization, exact, section_name = ( + params + ) + test_expressions( + expr, test_scalar, dtype, optimization, exact, section_name + ) + + return test_method + + method_name = f"test_expr_{i}" + setattr(TestExpressions, method_name, create_test_method()) + + for n in range(niter): + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_numexpr)) + if 'sparc' not in platform.machine(): + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_numexpr2)) + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_evaluate)) + # Add the dynamically created TestExpressions to the suite + if pytest_available: + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(TestExpressions)) + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_int32_int64)) + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_uint32_int64)) + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_strings)) + theSuite.addTest( + unittest.defaultTestLoader.loadTestsFromTestCase(test_irregular_stride)) + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_zerodim)) + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_threading_config)) + + # multiprocessing module is not supported on Hurd/kFreeBSD + if (pl.system().lower() not in ('gnu', 'gnu/kfreebsd')): + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_subprocess)) + + # I need to put this test after test_subprocess because + # if not, the test suite locks immediately before test_subproces. + # This only happens with Windows, so I suspect of a subtle bad + # interaction with threads and subprocess :-/ + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_threading)) + + + return theSuite + + +if __name__ == '__main__': + print_versions() + unittest.main(defaultTest='suite') +# suite = suite() +# unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/venv/Lib/site-packages/numexpr/utils.py b/venv/Lib/site-packages/numexpr/utils.py new file mode 100644 index 0000000..9e45fbe --- /dev/null +++ b/venv/Lib/site-packages/numexpr/utils.py @@ -0,0 +1,311 @@ +################################################################### +# Numexpr - Fast numerical array expression evaluator for NumPy. +# +# License: MIT +# Author: See AUTHORS.txt +# +# See LICENSE.txt and LICENSES/*.txt for details about copyright and +# rights to use. +#################################################################### + +import logging + +log = logging.getLogger(__name__) + +import contextvars +import os +import subprocess + +from numexpr import use_vml +from numexpr.interpreter import MAX_THREADS, _get_num_threads, _set_num_threads + +from . import version + +if use_vml: + from numexpr.interpreter import (_get_vml_num_threads, _get_vml_version, + _set_vml_accuracy_mode, + _set_vml_num_threads) + + +def get_vml_version(): + """ + Get the VML/MKL library version. + """ + if use_vml: + return _get_vml_version() + else: + return None + + +def set_vml_accuracy_mode(mode): + """ + Set the accuracy mode for VML operations. + + The `mode` parameter can take the values: + - 'high': high accuracy mode (HA), <1 least significant bit + - 'low': low accuracy mode (LA), typically 1-2 least significant bits + - 'fast': enhanced performance mode (EP) + - None: mode settings are ignored + + This call is equivalent to the `vmlSetMode()` in the VML library. + See: + + http://www.intel.com/software/products/mkl/docs/webhelp/vml/vml_DataTypesAccuracyModes.html + + for more info on the accuracy modes. + + Returns old accuracy settings. + """ + if use_vml: + acc_dict = {None: 0, 'low': 1, 'high': 2, 'fast': 3} + acc_reverse_dict = {1: 'low', 2: 'high', 3: 'fast'} + if mode not in list(acc_dict.keys()): + raise ValueError( + "mode argument must be one of: None, 'high', 'low', 'fast'") + retval = _set_vml_accuracy_mode(acc_dict.get(mode, 0)) + return acc_reverse_dict.get(retval) + else: + return None + + +def set_vml_num_threads(nthreads): + """ + Suggests a maximum number of threads to be used in VML operations. + + This function is equivalent to the call + `mkl_domain_set_num_threads(nthreads, MKL_DOMAIN_VML)` in the MKL + library. See: + + http://www.intel.com/software/products/mkl/docs/webhelp/support/functn_mkl_domain_set_num_threads.html + + for more info about it. + """ + if use_vml: + _set_vml_num_threads(nthreads) + pass + +def get_vml_num_threads(): + """ + Gets the maximum number of threads to be used in VML operations. + + This function is equivalent to the call + `mkl_domain_get_max_threads (MKL_DOMAIN_VML)` in the MKL + library. See: + + http://software.intel.com/en-us/node/522118 + + for more info about it. + """ + if use_vml: + return _get_vml_num_threads() + return None + +def set_num_threads(nthreads): + """ + Sets a number of threads to be used in operations. + + DEPRECATED: returns the previous setting for the number of threads. + + During initialization time NumExpr sets this number to the number + of detected cores in the system (see `detect_number_of_cores()`). + """ + old_nthreads = _set_num_threads(nthreads) + return old_nthreads + +def get_num_threads(): + """ + Gets the number of threads currently in use for operations. + """ + return _get_num_threads() + +def _init_num_threads(): + """ + Detects the environment variable 'NUMEXPR_MAX_THREADS' to set the threadpool + size, and if necessary the slightly redundant 'NUMEXPR_NUM_THREADS' or + 'OMP_NUM_THREADS' env vars to set the initial number of threads used by + the virtual machine. + """ + # Any platform-specific short-circuits + if 'sparc' in version.platform_machine: + log.warning('The number of threads have been set to 1 because problems related ' + 'to threading have been reported on some sparc machine. ' + 'The number of threads can be changed using the "set_num_threads" ' + 'function.') + set_num_threads(1) + return 1 + + env_configured = False + n_cores = detect_number_of_cores() + if ('NUMEXPR_MAX_THREADS' in os.environ and os.environ['NUMEXPR_MAX_THREADS'] != '' or + 'OMP_NUM_THREADS' in os.environ and os.environ['OMP_NUM_THREADS'] != ''): + # The user has configured NumExpr in the expected way, so suppress logs. + env_configured = True + n_cores = MAX_THREADS + else: + # The use has not set 'NUMEXPR_MAX_THREADS', so likely they have not + # configured NumExpr as desired, so we emit info logs. + if n_cores > MAX_THREADS: + log.info('Note: detected %d virtual cores but NumExpr set to maximum of %d, check "NUMEXPR_MAX_THREADS" environment variable.'%(n_cores, MAX_THREADS)) + if n_cores > 16: + # Back in 2019, 8 threads would be considered safe for performance. We are in 2024 now, so adjusting. + log.info('Note: NumExpr detected %d cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.'%n_cores) + n_cores = 16 + + # Now we check for 'NUMEXPR_NUM_THREADS' or 'OMP_NUM_THREADS' to set the + # actual number of threads used. + if 'NUMEXPR_NUM_THREADS' in os.environ and os.environ['NUMEXPR_NUM_THREADS'] != '': + requested_threads = int(os.environ['NUMEXPR_NUM_THREADS']) + elif 'OMP_NUM_THREADS' in os.environ and os.environ['OMP_NUM_THREADS'] != '': + # Empty string is commonly used to unset the variable + requested_threads = int(os.environ['OMP_NUM_THREADS']) + else: + requested_threads = n_cores + if not env_configured: + log.info('NumExpr defaulting to %d threads.'%n_cores) + + # The C-extension function performs its own checks against `MAX_THREADS` + set_num_threads(requested_threads) + return requested_threads + + +def detect_number_of_cores(): + """ + Detects the number of cores on a system. Cribbed from pp. + """ + # Linux, Unix and MacOS: + if hasattr(os, "sysconf"): + if "SC_NPROCESSORS_ONLN" in os.sysconf_names: + # Linux & Unix: + ncpus = os.sysconf("SC_NPROCESSORS_ONLN") + if isinstance(ncpus, int) and ncpus > 0: + return ncpus + else: # OSX: + return int(subprocess.check_output(["sysctl", "-n", "hw.ncpu"])) + # Windows: + try: + ncpus = int(os.environ.get("NUMBER_OF_PROCESSORS", "")) + if ncpus > 0: + return ncpus + except ValueError: + pass + return 1 # Default + + +def detect_number_of_threads(): + """ + DEPRECATED: use `_init_num_threads` instead. + If this is modified, please update the note in: https://github.com/pydata/numexpr/wiki/Numexpr-Users-Guide + """ + log.warning('Deprecated, use `init_num_threads` instead.') + try: + nthreads = int(os.environ.get('NUMEXPR_NUM_THREADS', '')) + except ValueError: + try: + nthreads = int(os.environ.get('OMP_NUM_THREADS', '')) + except ValueError: + nthreads = detect_number_of_cores() + + # Check that we don't surpass the MAX_THREADS in interpreter.cpp + if nthreads > MAX_THREADS: + nthreads = MAX_THREADS + return nthreads + + +class CacheDict(dict): + """ + A dictionary that prevents itself from growing too much. + """ + + def __init__(self, maxentries): + self.maxentries = maxentries + super(CacheDict, self).__init__(self) + + def __setitem__(self, key, value): + # Protection against growing the cache too much + if len(self) > self.maxentries: + # Remove a 10% of (arbitrary) elements from the cache + entries_to_remove = self.maxentries // 10 + for k in list(self.keys())[:entries_to_remove]: + super(CacheDict, self).__delitem__(k) + super(CacheDict, self).__setitem__(key, value) + + +class ContextDict: + """ + A context aware version dictionary + """ + def __init__(self): + self._context_data = contextvars.ContextVar('context_data', default={}) + + def set(self, key=None, value=None, **kwargs): + data = self._context_data.get().copy() + + if key is not None: + data[key] = value + + for k, v in kwargs.items(): + data[k] = v + + self._context_data.set(data) + + def get(self, key, default=None): + data = self._context_data.get() + return data.get(key, default) + + def delete(self, key): + data = self._context_data.get().copy() + if key in data: + del data[key] + self._context_data.set(data) + + def clear(self): + self._context_data.set({}) + + def all(self): + return self._context_data.get() + + def update(self, *args, **kwargs): + data = self._context_data.get().copy() + + if args: + if len(args) > 1: + raise TypeError(f"update() takes at most 1 positional argument ({len(args)} given)") + other = args[0] + if isinstance(other, dict): + data.update(other) + else: + for k, v in other: + data[k] = v + + data.update(kwargs) + self._context_data.set(data) + + def keys(self): + return self._context_data.get().keys() + + def values(self): + return self._context_data.get().values() + + def items(self): + return self._context_data.get().items() + + def __getitem__(self, key): + return self.get(key) + + def __setitem__(self, key, value): + self.set(key, value) + + def __delitem__(self, key): + self.delete(key) + + def __contains__(self, key): + return key in self._context_data.get() + + def __len__(self): + return len(self._context_data.get()) + + def __iter__(self): + return iter(self._context_data.get()) + + def __repr__(self): + return repr(self._context_data.get()) diff --git a/venv/Lib/site-packages/numexpr/version.py b/venv/Lib/site-packages/numexpr/version.py new file mode 100644 index 0000000..d48d630 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/version.py @@ -0,0 +1,5 @@ +# THIS FILE IS GENERATED BY `setup.py` +__version__ = '2.14.1' +version = '2.14.1' +numpy_build_version = '2.3.3' +platform_machine = 'AMD64' diff --git a/venv/Lib/site-packages/numexpr/win32/pthread.c b/venv/Lib/site-packages/numexpr/win32/pthread.c new file mode 100644 index 0000000..6d38d99 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/win32/pthread.c @@ -0,0 +1,218 @@ +/* + * Code for simulating pthreads API on Windows. This is Git-specific, + * but it is enough for Numexpr needs too. + * + * Copyright (C) 2009 Andrzej K. Haczewski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * DISCLAIMER: The implementation is Git-specific, it is subset of original + * Pthreads API, without lots of other features that Git doesn't use. + * Git also makes sure that the passed arguments are valid, so there's + * no need for double-checking. + */ + +#include "pthread.h" + +#include +#include +#include +#include +#include + + +void die(const char *err, ...) +{ + printf("%s", err); + exit(-1); +} + +static unsigned __stdcall win32_start_routine(void *arg) +{ + pthread_t *thread = arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int pthread_create(pthread_t *thread, const void *unused, + void *(*start_routine)(void*), void *arg) +{ + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) + _beginthreadex(NULL, 0, win32_start_routine, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int win32_pthread_join(pthread_t *thread, void **value_ptr) +{ + DWORD result = WaitForSingleObject(thread->handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) + *value_ptr = thread->arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +int pthread_cond_init(pthread_cond_t *cond, const void *unused) +{ + cond->waiters = 0; + cond->was_broadcast = 0; + InitializeCriticalSection(&cond->waiters_lock); + + cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL); + if (!cond->sema) + die("CreateSemaphore() failed"); + + cond->continue_broadcast = CreateEvent(NULL, /* security */ + FALSE, /* auto-reset */ + FALSE, /* not signaled */ + NULL); /* name */ + if (!cond->continue_broadcast) + die("CreateEvent() failed"); + + return 0; +} + +int pthread_cond_destroy(pthread_cond_t *cond) +{ + CloseHandle(cond->sema); + CloseHandle(cond->continue_broadcast); + DeleteCriticalSection(&cond->waiters_lock); + return 0; +} + +int pthread_cond_wait(pthread_cond_t *cond, CRITICAL_SECTION *mutex) +{ + int last_waiter; + + EnterCriticalSection(&cond->waiters_lock); + cond->waiters++; + LeaveCriticalSection(&cond->waiters_lock); + + /* + * Unlock external mutex and wait for signal. + * NOTE: we've held mutex locked long enough to increment + * waiters count above, so there's no problem with + * leaving mutex unlocked before we wait on semaphore. + */ + LeaveCriticalSection(mutex); + + /* let's wait - ignore return value */ + WaitForSingleObject(cond->sema, INFINITE); + + /* + * Decrease waiters count. If we are the last waiter, then we must + * notify the broadcasting thread that it can continue. + * But if we continued due to cond_signal, we do not have to do that + * because the signaling thread knows that only one waiter continued. + */ + EnterCriticalSection(&cond->waiters_lock); + cond->waiters--; + last_waiter = cond->was_broadcast && cond->waiters == 0; + LeaveCriticalSection(&cond->waiters_lock); + + if (last_waiter) { + /* + * cond_broadcast was issued while mutex was held. This means + * that all other waiters have continued, but are contending + * for the mutex at the end of this function because the + * broadcasting thread did not leave cond_broadcast, yet. + * (This is so that it can be sure that each waiter has + * consumed exactly one slice of the semaphor.) + * The last waiter must tell the broadcasting thread that it + * can go on. + */ + SetEvent(cond->continue_broadcast); + /* + * Now we go on to contend with all other waiters for + * the mutex. Auf in den Kampf! + */ + } + /* lock external mutex again */ + EnterCriticalSection(mutex); + + return 0; +} + +/* + * IMPORTANT: This implementation requires that pthread_cond_signal + * is called while the mutex is held that is used in the corresponding + * pthread_cond_wait calls! + */ +int pthread_cond_signal(pthread_cond_t *cond) +{ + int have_waiters; + + EnterCriticalSection(&cond->waiters_lock); + have_waiters = cond->waiters > 0; + LeaveCriticalSection(&cond->waiters_lock); + + /* + * Signal only when there are waiters + */ + if (have_waiters) + return ReleaseSemaphore(cond->sema, 1, NULL) ? + 0 : GetLastError(); + else + return 0; +} + +/* + * DOUBLY IMPORTANT: This implementation requires that pthread_cond_broadcast + * is called while the mutex is held that is used in the corresponding + * pthread_cond_wait calls! + */ +int pthread_cond_broadcast(pthread_cond_t *cond) +{ + EnterCriticalSection(&cond->waiters_lock); + + if ((cond->was_broadcast = cond->waiters > 0)) { + /* wake up all waiters */ + ReleaseSemaphore(cond->sema, cond->waiters, NULL); + LeaveCriticalSection(&cond->waiters_lock); + /* + * At this point all waiters continue. Each one takes its + * slice of the semaphor. Now it's our turn to wait: Since + * the external mutex is held, no thread can leave cond_wait, + * yet. For this reason, we can be sure that no thread gets + * a chance to eat *more* than one slice. OTOH, it means + * that the last waiter must send us a wake-up. + */ + WaitForSingleObject(cond->continue_broadcast, INFINITE); + /* + * Since the external mutex is held, no thread can enter + * cond_wait, and, hence, it is safe to reset this flag + * without cond->waiters_lock held. + */ + cond->was_broadcast = 0; + } else { + LeaveCriticalSection(&cond->waiters_lock); + } + return 0; +} diff --git a/venv/Lib/site-packages/numexpr/win32/pthread.h b/venv/Lib/site-packages/numexpr/win32/pthread.h new file mode 100644 index 0000000..ed7f410 --- /dev/null +++ b/venv/Lib/site-packages/numexpr/win32/pthread.h @@ -0,0 +1,119 @@ +/* + * Code for simulating pthreads API on Windows. This is Git-specific, + * but it is enough for Numexpr needs too. + * + * Copyright (C) 2009 Andrzej K. Haczewski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * DISCLAIMER: The implementation is Git-specific, it is subset of original + * Pthreads API, without lots of other features that Git doesn't use. + * Git also makes sure that the passed arguments are valid, so there's + * no need for double-checking. + */ + +#ifndef PTHREAD_H +#define PTHREAD_H + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Defines that adapt Windows API threads to pthreads API + */ +#define pthread_mutex_t CRITICAL_SECTION + +#define pthread_mutex_init(a,b) InitializeCriticalSection((a)) +#define pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define pthread_mutex_lock EnterCriticalSection +#define pthread_mutex_unlock LeaveCriticalSection + +/* + * Implement simple condition variable for Windows threads, based on ACE + * implementation. + * + * See original implementation: http://bit.ly/1vkDjo + * ACE homepage: http://www.cse.wustl.edu/~schmidt/ACE.html + * See also: http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ +typedef struct { + LONG waiters; + int was_broadcast; + CRITICAL_SECTION waiters_lock; + HANDLE sema; + HANDLE continue_broadcast; +} pthread_cond_t; + +extern int pthread_cond_init(pthread_cond_t *cond, const void *unused); +extern int pthread_cond_destroy(pthread_cond_t *cond); +extern int pthread_cond_wait(pthread_cond_t *cond, CRITICAL_SECTION *mutex); +extern int pthread_cond_signal(pthread_cond_t *cond); +extern int pthread_cond_broadcast(pthread_cond_t *cond); + +/* + * Simple thread creation implementation using pthread API + */ +typedef struct { + HANDLE handle; + void *(*start_routine)(void*); + void *arg; +} pthread_t; + +extern int pthread_create(pthread_t *thread, const void *unused, + void *(*start_routine)(void*), void *arg); + +/* + * To avoid the need of copying a struct, we use small macro wrapper to pass + * pointer to win32_pthread_join instead. + */ +#define pthread_join(a, b) win32_pthread_join(&(a), (b)) + +extern int win32_pthread_join(pthread_t *thread, void **value_ptr); + +/* + * The POSIX signal system has a more developed interface than what's in + * Windows. We create a no-op shim layer to proivde enough of the API to + * pretend to support what's used when creating threads on POSIX systems. + */ +typedef int sigset_t; +enum sigop { + SIG_BLOCK, + SIG_UNBLOCK, + SIG_SETMASK +}; + +static inline int sigemptyset(sigset_t *sigs) { return 0; } +static inline int sigfillset(sigset_t *sigs) { return 0; } +static inline int sigaddset(sigset_t *sigs, int sig) { return 0; } +static inline int sigdelset(sigset_t *sigs, int sig) { return 0; } +static inline int pthread_sigmask(int how, sigset_t *newmask, + sigset_t *oldmask) { return 0; } + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* PTHREAD_H */ diff --git a/venv/Lib/site-packages/numexpr/win32/stdint.h b/venv/Lib/site-packages/numexpr/win32/stdint.h new file mode 100644 index 0000000..c66267a --- /dev/null +++ b/venv/Lib/site-packages/numexpr/win32/stdint.h @@ -0,0 +1,235 @@ +/* ISO C9x 7.18 Integer types + * Based on ISO/IEC SC22/WG14 9899 Committee draft (SC22 N2794) + * + * THIS SOFTWARE IS NOT COPYRIGHTED + * + * Contributor: Danny Smith + * + * This source code is offered for use in the public domain. You may + * use, modify or distribute it freely. + * + * This code is distributed in the hope that it will be useful but + * WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESS OR IMPLIED ARE HEREBY + * DISCLAIMED. This includes but is not limited to warranties of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Date: 2000-12-02 + * + * mwb: This was modified in the following ways: + * + * - make it compatible with Visual C++ 6 (which uses + * non-standard keywords and suffixes for 64-bit types) + * - some environments need stddef.h included (for wchar stuff?) + * - handle the fact that Microsoft's limits.h header defines + * SIZE_MAX + * - make corrections for SIZE_MAX, INTPTR_MIN, INTPTR_MAX, UINTPTR_MAX, + * PTRDIFF_MIN, PTRDIFF_MAX, SIG_ATOMIC_MIN, and SIG_ATOMIC_MAX + * to be 64-bit aware. + */ + + +#ifndef _STDINT_H +#define _STDINT_H +#define __need_wint_t +#define __need_wchar_t +#include +#include + +#if _MSC_VER && (_MSC_VER < 1300) +/* using MSVC 6 or earlier - no "long long" type, but might have _int64 type */ +#define __STDINT_LONGLONG __int64 +#define __STDINT_LONGLONG_SUFFIX i64 +#else +#define __STDINT_LONGLONG long long +#define __STDINT_LONGLONG_SUFFIX LL +#endif + +#if !defined( PASTE) +#define PASTE2( x, y) x##y +#define PASTE( x, y) PASTE2( x, y) +#endif /* PASTE */ + + +/* 7.18.1.1 Exact-width integer types */ +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef short int16_t; +typedef unsigned short uint16_t; +typedef int int32_t; +typedef unsigned uint32_t; +typedef __STDINT_LONGLONG int64_t; +typedef unsigned __STDINT_LONGLONG uint64_t; + +/* 7.18.1.2 Minimum-width integer types */ +typedef signed char int_least8_t; +typedef unsigned char uint_least8_t; +typedef short int_least16_t; +typedef unsigned short uint_least16_t; +typedef int int_least32_t; +typedef unsigned uint_least32_t; +typedef __STDINT_LONGLONG int_least64_t; +typedef unsigned __STDINT_LONGLONG uint_least64_t; + +/* 7.18.1.3 Fastest minimum-width integer types + * Not actually guaranteed to be fastest for all purposes + * Here we use the exact-width types for 8 and 16-bit ints. + */ +typedef char int_fast8_t; +typedef unsigned char uint_fast8_t; +typedef short int_fast16_t; +typedef unsigned short uint_fast16_t; +typedef int int_fast32_t; +typedef unsigned int uint_fast32_t; +typedef __STDINT_LONGLONG int_fast64_t; +typedef unsigned __STDINT_LONGLONG uint_fast64_t; + +/* 7.18.1.4 Integer types capable of holding object pointers */ +#ifndef _INTPTR_T_DEFINED +#define _INTPTR_T_DEFINED +#ifdef _WIN64 +typedef __STDINT_LONGLONG intptr_t +#else +typedef int intptr_t; +#endif /* _WIN64 */ +#endif /* _INTPTR_T_DEFINED */ + +#ifndef _UINTPTR_T_DEFINED +#define _UINTPTR_T_DEFINED +#ifdef _WIN64 +typedef unsigned __STDINT_LONGLONG uintptr_t +#else +typedef unsigned int uintptr_t; +#endif /* _WIN64 */ +#endif /* _UINTPTR_T_DEFINED */ + +/* 7.18.1.5 Greatest-width integer types */ +typedef __STDINT_LONGLONG intmax_t; +typedef unsigned __STDINT_LONGLONG uintmax_t; + +/* 7.18.2 Limits of specified-width integer types */ +#if !defined ( __cplusplus) || defined (__STDC_LIMIT_MACROS) + +/* 7.18.2.1 Limits of exact-width integer types */ +#define INT8_MIN (-128) +#define INT16_MIN (-32768) +#define INT32_MIN (-2147483647 - 1) +#define INT64_MIN (PASTE( -9223372036854775807, __STDINT_LONGLONG_SUFFIX) - 1) + +#define INT8_MAX 127 +#define INT16_MAX 32767 +#define INT32_MAX 2147483647 +#define INT64_MAX (PASTE( 9223372036854775807, __STDINT_LONGLONG_SUFFIX)) + +#define UINT8_MAX 0xff /* 255U */ +#define UINT16_MAX 0xffff /* 65535U */ +#define UINT32_MAX 0xffffffff /* 4294967295U */ +#define UINT64_MAX (PASTE( 0xffffffffffffffffU, __STDINT_LONGLONG_SUFFIX)) /* 18446744073709551615ULL */ + +/* 7.18.2.2 Limits of minimum-width integer types */ +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST64_MIN INT64_MIN + +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MAX INT64_MAX + +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +/* 7.18.2.3 Limits of fastest minimum-width integer types */ +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST64_MIN INT64_MIN + +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MAX INT64_MAX + +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +/* 7.18.2.4 Limits of integer types capable of holding + object pointers */ +#ifdef _WIN64 +#define INTPTR_MIN INT64_MIN +#define INTPTR_MAX INT64_MAX +#define UINTPTR_MAX UINT64_MAX +#else +#define INTPTR_MIN INT32_MIN +#define INTPTR_MAX INT32_MAX +#define UINTPTR_MAX UINT32_MAX +#endif /* _WIN64 */ + +/* 7.18.2.5 Limits of greatest-width integer types */ +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +/* 7.18.3 Limits of other integer types */ +#define PTRDIFF_MIN INTPTR_MIN +#define PTRDIFF_MAX INTPTR_MAX + +#define SIG_ATOMIC_MIN INTPTR_MIN +#define SIG_ATOMIC_MAX INTPTR_MAX + +/* we need to check for SIZE_MAX already defined because MS defines it in limits.h */ +#ifndef SIZE_MAX +#define SIZE_MAX UINTPTR_MAX +#endif + +#ifndef WCHAR_MIN /* also in wchar.h */ +#define WCHAR_MIN 0 +#define WCHAR_MAX ((wchar_t)-1) /* UINT16_MAX */ +#endif + +/* + * wint_t is unsigned short for compatibility with MS runtime + */ +#define WINT_MIN 0 +#define WINT_MAX ((wint_t)-1) /* UINT16_MAX */ + +#endif /* !defined ( __cplusplus) || defined __STDC_LIMIT_MACROS */ + + +/* 7.18.4 Macros for integer constants */ +#if !defined ( __cplusplus) || defined (__STDC_CONSTANT_MACROS) + +/* 7.18.4.1 Macros for minimum-width integer constants + + Accoding to Douglas Gwyn : + "This spec was changed in ISO/IEC 9899:1999 TC1; in ISO/IEC + 9899:1999 as initially published, the expansion was required + to be an integer constant of precisely matching type, which + is impossible to accomplish for the shorter types on most + platforms, because C99 provides no standard way to designate + an integer constant with width less than that of type int. + TC1 changed this to require just an integer constant + *expression* with *promoted* type." +*/ + +#define INT8_C(val) ((int8_t) + (val)) +#define UINT8_C(val) ((uint8_t) + (val##U)) +#define INT16_C(val) ((int16_t) + (val)) +#define UINT16_C(val) ((uint16_t) + (val##U)) + +#define INT32_C(val) val##L +#define UINT32_C(val) val##UL +#define INT64_C(val) (PASTE( val, __STDINT_LONGLONG_SUFFIX)) +#define UINT64_C(val)(PASTE( PASTE( val, U), __STDINT_LONGLONG_SUFFIX)) + +/* 7.18.4.2 Macros for greatest-width integer constants */ +#define INTMAX_C(val) INT64_C(val) +#define UINTMAX_C(val) UINT64_C(val) + +#endif /* !defined ( __cplusplus) || defined __STDC_CONSTANT_MACROS */ + +#endif diff --git a/venv/Lib/site-packages/packaging/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/packaging/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..d271a73 Binary files /dev/null and b/venv/Lib/site-packages/packaging/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/packaging/__pycache__/_structures.cpython-311.pyc b/venv/Lib/site-packages/packaging/__pycache__/_structures.cpython-311.pyc new file mode 100644 index 0000000..4b351e3 Binary files /dev/null and b/venv/Lib/site-packages/packaging/__pycache__/_structures.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/packaging/__pycache__/version.cpython-311.pyc b/venv/Lib/site-packages/packaging/__pycache__/version.cpython-311.pyc new file mode 100644 index 0000000..94a5c57 Binary files /dev/null and b/venv/Lib/site-packages/packaging/__pycache__/version.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/INSTALLER b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/LICENSE b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/LICENSE new file mode 100644 index 0000000..38438c1 --- /dev/null +++ b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2014-2022 Matthew Brennan Jones + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/METADATA b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/METADATA new file mode 100644 index 0000000..3f2fd71 --- /dev/null +++ b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/METADATA @@ -0,0 +1,27 @@ +Metadata-Version: 2.1 +Name: py-cpuinfo +Version: 9.0.0 +Summary: Get CPU info with pure Python +Home-page: https://github.com/workhorsy/py-cpuinfo +Author: Matthew Brennan Jones +Author-email: matthew.brennan.jones@gmail.com +License: MIT +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Topic :: Utilities +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python :: 3 +License-File: LICENSE + +py-cpuinfo +========== + + +Py-cpuinfo gets CPU info with pure Python. Py-cpuinfo should work +without any extra programs or libraries, beyond what your OS provides. +It does not require any compilation(C/C++, assembly, et cetera) to use. +It works with Python 3. + +Documentation can be viewed here: https://github.com/workhorsy/py-cpuinfo + + diff --git a/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/RECORD b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/RECORD new file mode 100644 index 0000000..441b4cc --- /dev/null +++ b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/RECORD @@ -0,0 +1,14 @@ +../../Scripts/cpuinfo.exe,sha256=7OjVYZmNGFcVGZqWaI98YiHTQfXb0_xlHrPC6lWZ3Yw,108359 +cpuinfo/__init__.py,sha256=T6gndqGAggfJCu4_iOziTnomCN7KzaAK_OYTewE4FMA,44 +cpuinfo/__main__.py,sha256=nSxC6Hqhi-0lN7Z4WwtKdxQdf3cUJefb5hOahCzh4Yg,33 +cpuinfo/__pycache__/__init__.cpython-311.pyc,, +cpuinfo/__pycache__/__main__.cpython-311.pyc,, +cpuinfo/__pycache__/cpuinfo.cpython-311.pyc,, +cpuinfo/cpuinfo.py,sha256=HHyDlDUNovE3QzJ3hviiM1ngyOC4iD7i6oGiz2iTmVk,84388 +py_cpuinfo-9.0.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +py_cpuinfo-9.0.0.dist-info/LICENSE,sha256=3br3Y5a_XHqkWXWiHq_i4i7st9paoNt8sOYVL6r-800,1127 +py_cpuinfo-9.0.0.dist-info/METADATA,sha256=rRFelvhFdoYcXnXXYDAbgdIxQ8_iVUa5lUHgEmU3ncE,794 +py_cpuinfo-9.0.0.dist-info/RECORD,, +py_cpuinfo-9.0.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92 +py_cpuinfo-9.0.0.dist-info/entry_points.txt,sha256=ZwrsclY_xUA0xJZK98bLxBdcowxnkK0ANYUT4FYcZJ8,42 +py_cpuinfo-9.0.0.dist-info/top_level.txt,sha256=XsjpunhkxD4hvznqQjrFNw0rtgizHEOGzewPZY3UEtU,8 diff --git a/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/WHEEL b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/WHEEL new file mode 100644 index 0000000..becc9a6 --- /dev/null +++ b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.37.1) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/entry_points.txt b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/entry_points.txt new file mode 100644 index 0000000..c10718f --- /dev/null +++ b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +cpuinfo = cpuinfo:main + diff --git a/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/top_level.txt b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/top_level.txt new file mode 100644 index 0000000..b53b02d --- /dev/null +++ b/venv/Lib/site-packages/py_cpuinfo-9.0.0.dist-info/top_level.txt @@ -0,0 +1 @@ +cpuinfo diff --git a/venv/Lib/site-packages/share/miniexpr/licenses/LICENSE b/venv/Lib/site-packages/share/miniexpr/licenses/LICENSE new file mode 100644 index 0000000..afb2c67 --- /dev/null +++ b/venv/Lib/site-packages/share/miniexpr/licenses/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2025-2026, The Blosc Development Team +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/Lib/site-packages/share/miniexpr/licenses/LICENSE-LIBTCC b/venv/Lib/site-packages/share/miniexpr/licenses/LICENSE-LIBTCC new file mode 100644 index 0000000..99e8531 --- /dev/null +++ b/venv/Lib/site-packages/share/miniexpr/licenses/LICENSE-LIBTCC @@ -0,0 +1,507 @@ +Notice: Corresponding minicc source is available at https://github.com/Blosc/minicc +Notice: minicc is a fork of the original TinyCC project at https://repo.or.cz/w/tinycc.git + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/venv/Lib/site-packages/share/miniexpr/licenses/LICENSE-SLEEF b/venv/Lib/site-packages/share/miniexpr/licenses/LICENSE-SLEEF new file mode 100644 index 0000000..36b7cd9 --- /dev/null +++ b/venv/Lib/site-packages/share/miniexpr/licenses/LICENSE-SLEEF @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/venv/Lib/site-packages/share/miniexpr/licenses/LICENSE-TINYEXPR b/venv/Lib/site-packages/share/miniexpr/licenses/LICENSE-TINYEXPR new file mode 100644 index 0000000..3a1bc57 --- /dev/null +++ b/venv/Lib/site-packages/share/miniexpr/licenses/LICENSE-TINYEXPR @@ -0,0 +1,20 @@ +zlib License + +Copyright (C) 2015, 2016 Lewis Van Winkle + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgement in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + diff --git a/venv/Lib/site-packages/share/miniexpr/licenses/THIRD_PARTY_NOTICES.md b/venv/Lib/site-packages/share/miniexpr/licenses/THIRD_PARTY_NOTICES.md new file mode 100644 index 0000000..89bdde1 --- /dev/null +++ b/venv/Lib/site-packages/share/miniexpr/licenses/THIRD_PARTY_NOTICES.md @@ -0,0 +1,29 @@ +## Third-Party Notices + +This project includes or depends on third-party components with separate licenses. + +### TinyExpr + +- Component: parser/evaluator base design and code portions +- Upstream: https://github.com/codeplea/tinyexpr +- License: zlib +- Local license file: `LICENSE-TINYEXPR` + +### SLEEF + +- Component: SIMD math kernels +- Upstream: https://github.com/shibatch/sleef +- License: Boost Software License 1.0 +- Local license file: `LICENSE-SLEEF` + +### TinyCC / libtcc + +- Component: DSL JIT in-memory compiler backend (`tcc`, powered by `libtcc`) +- Upstream: https://repo.or.cz/tinycc.git +- License: GNU LGPL v2.1 or later +- Local license file: `LICENSE-LIBTCC` + +For installed binaries, the corresponding TinyCC source and license are staged at: + +- https://repo.or.cz/w/tinycc.git` +- https://repo.or.cz/tinycc.git/blob/HEAD:/COPYING diff --git a/venv/Lib/site-packages/share/miniexpr/third_party/tinycc/COPYING b/venv/Lib/site-packages/share/miniexpr/third_party/tinycc/COPYING new file mode 100644 index 0000000..223ede7 --- /dev/null +++ b/venv/Lib/site-packages/share/miniexpr/third_party/tinycc/COPYING @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/venv/Lib/site-packages/tables-3.11.1.dist-info/DELVEWHEEL b/venv/Lib/site-packages/tables-3.11.1.dist-info/DELVEWHEEL new file mode 100644 index 0000000..15ebfd5 --- /dev/null +++ b/venv/Lib/site-packages/tables-3.11.1.dist-info/DELVEWHEEL @@ -0,0 +1,2 @@ +Version: 1.12.0 +Arguments: ['C:\\Users\\runneradmin\\AppData\\Local\\Temp\\cibw-run-g1z4fy_5\\cp311-win_amd64\\build\\venv\\Scripts\\delvewheel', 'repair', '--no-mangle', 'libblosc2.dll', '-v', '-w', 'C:\\Users\\runneradmin\\AppData\\Local\\Temp\\cibw-run-g1z4fy_5\\cp311-win_amd64\\repaired_wheel', 'C:\\Users\\runneradmin\\AppData\\Local\\Temp\\cibw-run-g1z4fy_5\\cp311-win_amd64\\built_wheel\\tables-3.11.1-cp311-abi3-win_amd64.whl'] diff --git a/venv/Lib/site-packages/tables-3.11.1.dist-info/INSTALLER b/venv/Lib/site-packages/tables-3.11.1.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/Lib/site-packages/tables-3.11.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/Lib/site-packages/tables-3.11.1.dist-info/METADATA b/venv/Lib/site-packages/tables-3.11.1.dist-info/METADATA new file mode 100644 index 0000000..20c8ccc --- /dev/null +++ b/venv/Lib/site-packages/tables-3.11.1.dist-info/METADATA @@ -0,0 +1,48 @@ +Metadata-Version: 2.4 +Name: tables +Version: 3.11.1 +Summary: Hierarchical datasets for Python +Author: Francesc Alted, Ivan Vilata, Antonio Valentino, Anthony Scopatz, et al. +Author-email: pytables@pytables.org +Maintainer-email: PyTables maintainers +License: BSD 3-Clause License +Project-URL: homepage, http://www.pytables.org +Project-URL: documentation, http://www.pytables.org +Project-URL: repository, https://github.com/PyTables/PyTables +Project-URL: changelog, http://www.pytables.org/release_notes.html +Project-URL: tracker, https://github.com/PyTables/PyTables/issues +Keywords: hdf5 +Platform: any +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Information Technology +Classifier: Intended Audience :: Science/Research +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: Unix +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 +Classifier: Topic :: Database +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Requires-Python: >=3.11 +Description-Content-Type: text/x-rst +License-File: LICENSE.txt +Requires-Dist: numpy>=1.20.0 +Requires-Dist: numexpr>=2.6.2 +Requires-Dist: packaging +Requires-Dist: py-cpuinfo +Requires-Dist: blosc2>=2.3.0 +Dynamic: license-file + +PyTables is a package for managing hierarchical datasets and +designed to efficiently cope with extremely large amounts of +data. PyTables is built on top of the HDF5 library and the +NumPy package and features an object-oriented interface +that, combined with C-code generated from Cython sources, +makes of it a fast, yet extremely easy to use tool for +interactively save and retrieve large amounts of data. diff --git a/venv/Lib/site-packages/tables-3.11.1.dist-info/RECORD b/venv/Lib/site-packages/tables-3.11.1.dist-info/RECORD new file mode 100644 index 0000000..9165ef2 --- /dev/null +++ b/venv/Lib/site-packages/tables-3.11.1.dist-info/RECORD @@ -0,0 +1,260 @@ +../../Scripts/pt2to3.exe,sha256=Gf-Xa8KxLAJa5WidxGV5O4Rc0KFnUpjDAH7o4Mej__M,108373 +../../Scripts/ptdump.exe,sha256=fo-uP2KAhUO7QUMKWsQZ-5haWKuyuUBNwMQUiWkm2S0,108373 +../../Scripts/ptrepack.exe,sha256=c2JWWKaCeVZ6zErz6MDWfrPkScSI0U9He7SfrnerZsY,108375 +../../Scripts/pttree.exe,sha256=9-MTaNGnJCXA3kTAeM2A9Yiki8zUUqrbC1Dia54m5Js,108373 +tables-3.11.1.dist-info/DELVEWHEEL,sha256=nll4u7kFFwBXuEYDDyRWbIPlPMkEYD8eZXda92fOJJw,437 +tables-3.11.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +tables-3.11.1.dist-info/METADATA,sha256=J4z6T1elIBEiX2YmHBblVFrkLIfZhy4crt_ng8AVvdM,2128 +tables-3.11.1.dist-info/RECORD,, +tables-3.11.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +tables-3.11.1.dist-info/WHEEL,sha256=EixXjpLAvvK5lDiSAwmfN-koda0bD15tqEVn0N4e4fA,100 +tables-3.11.1.dist-info/entry_points.txt,sha256=h42qnqwrimQr-JQ1gZCM9SNGq4jH7dh45VZlw3r6VHU,166 +tables-3.11.1.dist-info/licenses/LICENSE.txt,sha256=7sbqXp8hY5K2M8u-9yT_W4KbxkqbR0rJ5LRW3SBZSG8,1756 +tables-3.11.1.dist-info/top_level.txt,sha256=m3W6sioRFzYyZByXzdell41O7YB9Kbe8ROEgs8Lyh6A,7 +tables.libs/blosc-a4a28bc7aa2533cc2a5bc97cb813890c.dll,sha256=yJUfHPI__oYtbU8xNBfjZD71uMLc4YV5vpADPaYfHEs,69632 +tables.libs/hdf5-e6c72474544c7b020b14fe7c482e97b5.dll,sha256=zJj6pcRXAamEIyRjhZK8oWdFDC9azL-6A8P75_VHkTY,3414016 +tables.libs/libblosc2.dll,sha256=eMbHaEXQBuPrQSHgFFxTnGU-gJ2dVE3tQ5ie01XhBs0,465408 +tables.libs/libbz2-894dbcd41ee025e020c9083f03e69f64.dll,sha256=iU281B7gJeAgyQg_A-afZKE8an-rNhJbK9Kd3YA-SnI,77824 +tables.libs/libcrypto-3-x64-e5a134bc7562c8f279b5f88612c7d914.dll,sha256=5aE0vHViyPJ5tfiGEsfZFA7lLuL2No2Sw4PsWHNm2XI,7420928 +tables.libs/libcurl-5ab672140859252079c8e100b39d35ef.dll,sha256=TM7cDzpnWyB2dOS9HInaYQ3O6uxuO6CPor6rhVn7bBo,718848 +tables.libs/libssh2-b423053596b4de89982cda34c3c09058.dll,sha256=uK-m8-hxYz6kUczUp8bUiAQpOE1h2nlr3FQRq5zOY1E,258048 +tables.libs/lz4-6dc4c9d99b472a733962763c89a20ca1.dll,sha256=bcTJ2ZtHKnM5YnY8iaIMoVuvBX5NTnJIZv0Mf4qd92Q,127488 +tables.libs/msvcp140-8f141b4454fa78db34bc1f28c571b4da.dll,sha256=jxQbRFT6eNs0vB8oxXG02g4AzSxD960OKC8xMDaCaq4,557648 +tables.libs/snappy-747c86baab1469b466d2e9a18acf1497.dll,sha256=1RYGeVDJXK9gKo5ALnbXehogk0--d10DGzHEb80QkOc,81920 +tables.libs/szip-dce5955eea02e788e2d938e3c81907bc.dll,sha256=3OWVXuoC54ji2TjjyBkHvFlIklQoEUJSPNi0WeMInx0,33792 +tables.libs/zlib-7d86de8659d728d0cc22615ea37248ca.dll,sha256=fYbehlnXKNDMImFeo3JIyoOySvZKn8GezEVIEFMI7go,89088 +tables.libs/zlib-ng2-f57947fffdf36cbecc829a61001247e1.dll,sha256=9XlH__3zbL7MgpphABJH4XB702V6oje3nerHJbDQYv0,333824 +tables.libs/zstd-ca228f0f33b8296d6650b4694bb38ba9.dll,sha256=yiKPDzO4KW1mULRpS7OLqeqHlsUjtrXgaC_MBaXtqwo,658432 +tables/__init__.py,sha256=c_KrhiYawqxHgbN-NPj4cQn3gOc_hun5VTYXx39l3iQ,6771 +tables/__pycache__/__init__.cpython-311.pyc,, +tables/__pycache__/_version.cpython-311.pyc,, +tables/__pycache__/array.cpython-311.pyc,, +tables/__pycache__/atom.cpython-311.pyc,, +tables/__pycache__/attributeset.cpython-311.pyc,, +tables/__pycache__/carray.cpython-311.pyc,, +tables/__pycache__/conditions.cpython-311.pyc,, +tables/__pycache__/description.cpython-311.pyc,, +tables/__pycache__/earray.cpython-311.pyc,, +tables/__pycache__/exceptions.cpython-311.pyc,, +tables/__pycache__/expression.cpython-311.pyc,, +tables/__pycache__/file.cpython-311.pyc,, +tables/__pycache__/filters.cpython-311.pyc,, +tables/__pycache__/flavor.cpython-311.pyc,, +tables/__pycache__/group.cpython-311.pyc,, +tables/__pycache__/idxutils.cpython-311.pyc,, +tables/__pycache__/index.cpython-311.pyc,, +tables/__pycache__/indexes.cpython-311.pyc,, +tables/__pycache__/leaf.cpython-311.pyc,, +tables/__pycache__/link.cpython-311.pyc,, +tables/__pycache__/node.cpython-311.pyc,, +tables/__pycache__/parameters.cpython-311.pyc,, +tables/__pycache__/path.cpython-311.pyc,, +tables/__pycache__/registry.cpython-311.pyc,, +tables/__pycache__/req_versions.cpython-311.pyc,, +tables/__pycache__/table.cpython-311.pyc,, +tables/__pycache__/undoredo.cpython-311.pyc,, +tables/__pycache__/unimplemented.cpython-311.pyc,, +tables/__pycache__/utils.cpython-311.pyc,, +tables/__pycache__/vlarray.cpython-311.pyc,, +tables/_comp_bzip2.pyd,sha256=PZwTvtn0oXuQic0yQHudi1YZLqjfV7kJQ1T70nFRrUk,33792 +tables/_comp_bzip2.pyx,sha256=jX5-v6YePF34Xu1hOCqEQV5KI1iQMRlrtmGlJzy3Uds,394 +tables/_comp_lzo.pyd,sha256=QfxO4tnjXGMcjGUrKoCUA0i7P7lu02BKSp_XQZ4O70A,30208 +tables/_comp_lzo.pyx,sha256=hXuoMTPWoe3npLe49tPva-K_x_UE5Vbjs6teUBTcva8,388 +tables/_version.py,sha256=Wg6P2QZ_GRZQVvajkxYSnmobZWLy4uPFiUMWATbxAyU,60 +tables/array.py,sha256=y3iod0blY3dkuJiBKyR6QLGTtYImR-heQm38Yf5mewA,38662 +tables/atom.py,sha256=ZX081G0lCs8irf9pwJ1r3NXGYla0c2y3Eu2A0Z3tOLg,48156 +tables/attributeset.py,sha256=MqU2S5tTp-HrbrrFPpYJuyw8YY491qaUOPCau18wllg,27845 +tables/carray.py,sha256=m_1Xkn8cDT3taMyK_hhW7RgHD3uSekWrYFil7po_qYI,11464 +tables/conditions.py,sha256=oWd729f3b5uxMj9OPIBEFynrc6qHtXLUWE625LIRBgw,18409 +tables/definitions.pxd,sha256=8mWrZhsSCBxM1IvhW4afya1nbkrYSTAx7DeUECywqKA,22997 +tables/description.py,sha256=GhmiYNaTNvr6iFUuvvda_HfzTBc7xoLw2w265yvRyNk,41536 +tables/earray.py,sha256=O6shGpG80bG44IHv825TMWDtuh9oaGxsjuD7jKWun3Y,10723 +tables/exceptions.py,sha256=Xz2RUCis30D9TKxgRy9-8wN9c8MODdEnYEQux7pPQNc,13523 +tables/expression.py,sha256=vXQEVAMYDrNUfd18NN9pIkJIOrOMwEQFCatZlf4j2Ck,30505 +tables/file.py,sha256=Wax__e0llsXNdpOFuNAcGYwwAKy88mvw7x4AcDd8Ol0,114738 +tables/filters.py,sha256=ZwPbey4guVcCAohsq9xabghKy6Q15_9vr2umO4R6X6M,17517 +tables/flavor.py,sha256=BTWdl6GL5nVxEGpyVGEZ79uoSjftVqR6fioOf1XOX-c,14854 +tables/group.py,sha256=A_URAlqn5c9HFH0BC4r3C-msuob5FBnX4R2WPbEqYr4,50840 +tables/hdf5extension.pxd,sha256=TpR8BETK-_7jUqcqI-kzMMLD2sBQpIbTsbi-ki1yAyI,1033 +tables/hdf5extension.pyd,sha256=6OZlPwZ6Cv9Vr_n2HAnHphzspRBEDuIUPraZLkJdXVI,308736 +tables/hdf5extension.pyx,sha256=ZY_juybIyD9K3mN2Jv25bbWvNBJsPynwYyAZ491o9tU,81956 +tables/idxutils.py,sha256=XAtqJGXO6TWJJ5h3Xv4oyeNllyAK0k5trGD_u9Vncec,17629 +tables/index.py,sha256=0FnCnwvFHZNkqiruBLKWgWuhniIiOcoQ2Gvwg5T6heg,99480 +tables/indexes.py,sha256=U5avwUP4q_MJ8V3_I-g9fNpdEyx7SJeo6LtzcDYUZi0,6535 +tables/indexesextension.pyd,sha256=wp9jDHiLbxRi5o_oxTavHRjwP_MgrxaT7DZ6XIMq8_4,159232 +tables/indexesextension.pyx,sha256=JQjj16zfdLqDmWhLV3mDqt0TRxaFYdY2xLiBQncz7rs,50613 +tables/leaf.py,sha256=4JBMhZ6ELoXrNaafEARWYbIg-qXTLmIyESwqrAzvk34,40114 +tables/libblosc2.dll,sha256=mjonuR8R7U49iAWnpxxgHcQCe5KtF3InL8rOqeNFKv0,465408 +tables/link.py,sha256=PFoOBirMQ51Ut7e_kVrALwhI7SmJF2u2HXHLl8Jzc9M,14637 +tables/linkextension.pyd,sha256=bdJ9hlvWWJZrqZZ140PbUNqt6pravVChJVrJ_aD0X6o,82432 +tables/linkextension.pyx,sha256=evX_SJVeSIZCAtLZKdC27lbDBY98w0UTW7CC_55Amhw,8520 +tables/lrucacheextension.pxd,sha256=w9uLzsIg50l_AovDtKwCHz-FhrYyuxFR5VihqIrZVlY,2443 +tables/lrucacheextension.pyd,sha256=gJ3Sv8CRZ_YsJPCzEn8oEFH4MCCKnPAwPHObBHEKwTA,109568 +tables/lrucacheextension.pyx,sha256=Jieg_YcRCa6zSM8auIjeytp-z3YhrYioKH5dPS7KSBk,22268 +tables/misc/__init__.py,sha256=8hpnRwHysjDPDathoIB3M9rjWpUpR3CmB7nKqb1zhvI,228 +tables/misc/__pycache__/__init__.cpython-311.pyc,, +tables/misc/__pycache__/enum.cpython-311.pyc,, +tables/misc/__pycache__/proxydict.cpython-311.pyc,, +tables/misc/enum.py,sha256=Tml9Bf6VhAs9ltfc9kBvjYmXvVy9Zwk3BpgDMbIxjpY,14037 +tables/misc/proxydict.py,sha256=onsRiSietzyVhUsu8z-oQ3rATdjkcDOhj8Ogl6CZNLI,2057 +tables/node.py,sha256=AEZOhLas7QaoJB4-KklUeueND-3sV0PfplPl82-zGSQ,34713 +tables/nodes/__init__.py,sha256=XDfEBbhNDx3-dCmVd7Skkg-eqU4fzND2NyxgzOhYp_E,396 +tables/nodes/__pycache__/__init__.cpython-311.pyc,, +tables/nodes/__pycache__/filenode.cpython-311.pyc,, +tables/nodes/filenode.py,sha256=5EgBTxk5S7iYGgqUqzwyUQyJwXxE9snAyN5bA845bgY,27869 +tables/nodes/tests/__init__.py,sha256=f9Mc3baWCC7WQtspQ-seRZSg7XlZ_ErHTCOjeLsN0mg,47 +tables/nodes/tests/__pycache__/__init__.cpython-311.pyc,, +tables/nodes/tests/__pycache__/test_filenode.cpython-311.pyc,, +tables/nodes/tests/test_filenode.dat,sha256=p_Kr3IuVmunNj4q72YbRILmzKp63O95-aQ9Dz9zxB-A,3280 +tables/nodes/tests/test_filenode.py,sha256=RD8y0EKuHCV5-G8cn2NykbolLCrMw7Ac5F-WJNgbWXA,36474 +tables/nodes/tests/test_filenode.xbm,sha256=p_Kr3IuVmunNj4q72YbRILmzKp63O95-aQ9Dz9zxB-A,3280 +tables/nodes/tests/test_filenode_v1.h5,sha256=z_Gi0CPy_OHMjigm2FHPeE935-FNv0A58aQtPHv3VpQ,9062 +tables/parameters.py,sha256=DICQaMOMC_dTFuMX0ZE397FffFBjtzRIWuRZydeJPoE,15188 +tables/path.py,sha256=SKfk52xf6Gxfc0VPoQ6XtINGOPKBq1BY9xo6UH52wsk,6986 +tables/registry.py,sha256=yMYdOnu2746e8nvcaUpMuIN2HQp_-h_oOfFACKfejUQ,2317 +tables/req_versions.py,sha256=jGw78hbZ0IBkqr71ZD_FdHeBJ3RhpH3PQZaB8OdWBao,646 +tables/scripts/__init__.py,sha256=Lb_1TZ4hPN4Bc4MukqFM7jYs9q_pY7yHnDewdXsq3yE,174 +tables/scripts/__pycache__/__init__.cpython-311.pyc,, +tables/scripts/__pycache__/pt2to3.cpython-311.pyc,, +tables/scripts/__pycache__/ptdump.cpython-311.pyc,, +tables/scripts/__pycache__/ptrepack.cpython-311.pyc,, +tables/scripts/__pycache__/pttree.cpython-311.pyc,, +tables/scripts/pt2to3.py,sha256=qMe7YTiVcCBhIO_O-hMlDZZHMprTuJKyoPgzihKlToI,24324 +tables/scripts/ptdump.py,sha256=en-G7hrZx6DcFG4z4yKT0IIcBw3A5IQ1XCGK1Eh9k3A,5855 +tables/scripts/ptrepack.py,sha256=VMr1K963VmcOjX2JkDaUDGldsgeuyN2caTnuz8IH1-s,25252 +tables/scripts/pttree.py,sha256=-xXxxlK6ybl_9Y0qeinLysDA7aLAglVPix3Moj0HiQ0,16324 +tables/table.py,sha256=iYqz7KIUA1G-zwIXMaGYjE-LkLPycfZ0Z8tVV4OrJiY,160410 +tables/tableextension.pyd,sha256=yEu35mD32Tu8ix8jT8GD6V0W_hpXROQ1tv3GQyBKhgA,195072 +tables/tableextension.pyx,sha256=jS2rnl2AC0M5qrgppnE9bDMRmhRv1GKItv2swC1he2E,65153 +tables/tests/Table2_1_lzo_nrv2e_shuffle.h5,sha256=7dHYCFHgzaMW2-vjztBny4g2jhCGl_ZRENev7i4_7VM,19206 +tables/tests/Tables_lzo1.h5,sha256=1-qIKgkRIGXltiW-AErSkYRjb68dwbjmDIZu6XxYXfE,23363 +tables/tests/Tables_lzo1_shuffle.h5,sha256=SLhi7MtxxuFzY5yym9ftaDKWou2TyEYcboDCB_5_4o8,21097 +tables/tests/Tables_lzo2.h5,sha256=NTWN31-mBuWZbmroozN1foN5LYlkNVnvxmMmx7mYPqw,23398 +tables/tests/Tables_lzo2_shuffle.h5,sha256=F4EhtzIptQQuVHEj5qC8O6cf4GoDoS3iBCjw4_0GUUc,21097 +tables/tests/__init__.py,sha256=8FjB4SuKo2Kq2inypsc0-bNFbwgKdCET12UW5hr8W2g,352 +tables/tests/__pycache__/__init__.cpython-311.pyc,, +tables/tests/__pycache__/check_leaks.cpython-311.pyc,, +tables/tests/__pycache__/common.cpython-311.pyc,, +tables/tests/__pycache__/create_backcompat_indexes.cpython-311.pyc,, +tables/tests/__pycache__/run_ft.cpython-311.pyc,, +tables/tests/__pycache__/test_all.cpython-311.pyc,, +tables/tests/__pycache__/test_array.cpython-311.pyc,, +tables/tests/__pycache__/test_attributes.cpython-311.pyc,, +tables/tests/__pycache__/test_aux.cpython-311.pyc,, +tables/tests/__pycache__/test_backcompat.cpython-311.pyc,, +tables/tests/__pycache__/test_basics.cpython-311.pyc,, +tables/tests/__pycache__/test_carray.cpython-311.pyc,, +tables/tests/__pycache__/test_create.cpython-311.pyc,, +tables/tests/__pycache__/test_direct_chunk.cpython-311.pyc,, +tables/tests/__pycache__/test_do_undo.cpython-311.pyc,, +tables/tests/__pycache__/test_earray.cpython-311.pyc,, +tables/tests/__pycache__/test_enum.cpython-311.pyc,, +tables/tests/__pycache__/test_expression.cpython-311.pyc,, +tables/tests/__pycache__/test_garbage.cpython-311.pyc,, +tables/tests/__pycache__/test_hdf5compat.cpython-311.pyc,, +tables/tests/__pycache__/test_index_backcompat.cpython-311.pyc,, +tables/tests/__pycache__/test_indexes.cpython-311.pyc,, +tables/tests/__pycache__/test_indexvalues.cpython-311.pyc,, +tables/tests/__pycache__/test_large_tables.cpython-311.pyc,, +tables/tests/__pycache__/test_links.cpython-311.pyc,, +tables/tests/__pycache__/test_lists.cpython-311.pyc,, +tables/tests/__pycache__/test_nestedtypes.cpython-311.pyc,, +tables/tests/__pycache__/test_numpy.cpython-311.pyc,, +tables/tests/__pycache__/test_queries.cpython-311.pyc,, +tables/tests/__pycache__/test_suite.cpython-311.pyc,, +tables/tests/__pycache__/test_tables.cpython-311.pyc,, +tables/tests/__pycache__/test_tablesMD.cpython-311.pyc,, +tables/tests/__pycache__/test_timestamps.cpython-311.pyc,, +tables/tests/__pycache__/test_timetype.cpython-311.pyc,, +tables/tests/__pycache__/test_tree.cpython-311.pyc,, +tables/tests/__pycache__/test_types.cpython-311.pyc,, +tables/tests/__pycache__/test_utils.cpython-311.pyc,, +tables/tests/__pycache__/test_vlarray.cpython-311.pyc,, +tables/tests/array_mdatom.h5,sha256=yI2XaJ3l_ufBtIBUzwZMTkRRFP8GK7YrKKY_t_SHDOc,5150 +tables/tests/attr-u16.h5,sha256=ZBZglj-y-SfSXILM0wQv6bQ415cFFRQi0aVlz_Ah9eY,28782 +tables/tests/b2nd-no-chunkshape.h5,sha256=OVRQCTXa2FVvtqnhGt47UXKCg6CjvG2Ob0j8nOgMVMY,5168 +tables/tests/blosc_bigendian.h5,sha256=V2rYecoLpXsfizabzZth9kqaadjH8XeKGhT7AMSuEBc,11974 +tables/tests/bug-idx.h5,sha256=tRjZIKEKcEpliZBWvCBLRUczIWs5ixSr3uhjiy-M6VU,14649 +tables/tests/check_leaks.py,sha256=RFMz8x1KuugOXOnn8EKsIJdFSVtwumAvJKFN4qC3i2I,12629 +tables/tests/common.py,sha256=cX51mctBTrgKKOCcokenXPOEgPFQO1dk1NdhVME4nww,12695 +tables/tests/create_backcompat_indexes.py,sha256=kftPGhScYoLZZJhHAxRn7sBxbBjVTdjXTcUNcAa0450,1208 +tables/tests/elink.h5,sha256=E87HIdLeiE-a2ngwztUC8PaZp29Q1v4AuSovyudUM6s,3550 +tables/tests/elink2.h5,sha256=TAT75aaJca7JxIyNk9QYsAsCXWAkDJHn6lSyXuNtSe8,2238 +tables/tests/ex-noattr.h5,sha256=D1eYQLISzBQDZmT1GjcIWcNKstxK0iyEunCvH5okIGo,12342 +tables/tests/flavored_vlarrays-format1.6.h5,sha256=C7SqiBCqh610efYIT95FuWk2psZnmRxAV2FKrqHPMv8,12621 +tables/tests/float.h5,sha256=B4scBb4HkRyT-NrWhMAsQNwx-NXtPc7vWUqL3DWbGyw,4742 +tables/tests/idx-std-1.x.h5,sha256=hrFJRX7SfCs5puOhQH9OAFRbWlwnzNHPOy6DQ7UcyDs,26662 +tables/tests/indexes_2_0.h5,sha256=ItdvkzBQUfnUkp_0A_9F6jtRy8WzQe71wBr0urO3lBs,60801 +tables/tests/indexes_2_1.h5,sha256=NrkKELb0wBYzDm_MaelYRzQZ0K4wbYtHKJAP8Kmz4fE,147256 +tables/tests/issue_368.h5,sha256=xN9zpKKPKkzCtCY1ymERdMpOFM3D-ekW62gFPLHhTu4,1232 +tables/tests/issue_560.h5,sha256=wkmMOuRm6U5QTHftw4WdHR-exjP6YnDzf9bhJ9SANA0,2344 +tables/tests/itemsize.h5,sha256=q_I3NP5tyl7XwDNMmcjlhtOubgirV0cUIOTkNzeUw60,2096 +tables/tests/matlab_file.mat,sha256=82TEze2TcREplHrC30e_9qYgvA9gJ1Pq_2FghR1bxcE,1942 +tables/tests/nested-type-with-gaps.h5,sha256=mZyufnF-OyTPolUfhGoBKmsDEt0uzikS_tWbN7tF2ms,1830 +tables/tests/non-chunked-table.h5,sha256=Ysdo_pXT97JvNlY_CKTMfC4ynPWJV3liWKRG2G2ySx0,6184 +tables/tests/oldflavor_numeric.h5,sha256=w680ov-6uccul5UX6A0aPJcwA3wUHQrQYG55eYUOjo8,112296 +tables/tests/out_of_order_types.h5,sha256=lN1j642UrQXdF8NI6UbOzVYbApvrVuMnFxtRJwwnD10,71001 +tables/tests/python2.h5,sha256=CA6vtG47XsHHRIYdSLwrtJOs7QRgpbKOcV5Tlp_umTY,79658 +tables/tests/python3.h5,sha256=Ol-OePDlUJ2NRPoXafvCcrtGnhjpg-qZ06GiaJ8IQ3I,79658 +tables/tests/run_ft.py,sha256=1jz738PNmUSL4vacwDJxp8cZP58THAqG8zptq_L2Dtg,9381 +tables/tests/scalar.h5,sha256=pcl25oxrtALDINAEWoU320_1bJSZSL7IiDbrprnmTRM,8294 +tables/tests/slink.h5,sha256=ujUfyrlVaxusn5GARiDvo-jn0bAGYAKd8FAaUobHfco,5502 +tables/tests/smpl_SDSextendible.h5,sha256=Cfn2UXNzm_ueLU_y-sSZnXRr7a2yGv-YerYkgaFL8qc,6246 +tables/tests/smpl_compound_chunked.h5,sha256=tezHxr8yvoC-VcGzip7wLpTxCzIxhZshSoapyyzGy_0,5774 +tables/tests/smpl_enum.h5,sha256=1P4oyQw2K3_m2CBjFUNTRGhuYgg-iDbrVzruug5qiJY,2094 +tables/tests/smpl_f64be.h5,sha256=npUgBxMhRqjnDHzQrBZHVLUCsvw6f22mVa0cBW5Spss,2294 +tables/tests/smpl_f64le.h5,sha256=u2WYVtfssgU9RaVWG1FCVj4U-kSRTVJfYL_Cb8ykyUE,2294 +tables/tests/smpl_i32be.h5,sha256=gVZaKpYEKqrQuXssQTS0KuxxyzlBRaDtH-gEUhiVtoo,2174 +tables/tests/smpl_i32le.h5,sha256=ZB2aInSE3SIwWzdGIdpphkI9KQ4CTiZWNCzQqvPmRrY,2174 +tables/tests/smpl_i64be.h5,sha256=iYRlTC6OeU9oQsFR3tOEHPmcHXh7s-gMnnmzdzYGTZs,2294 +tables/tests/smpl_i64le.h5,sha256=JYaXS3zTrAiaxmSpuYO3bzwKE5NzqWdtYee2NBXO4m4,2294 +tables/tests/smpl_unsupptype.h5,sha256=GRiBHqqhS2Pub6q-lWOAc1L9Uj4nHbU6qngfymRZ2sY,11870 +tables/tests/test_all.py,sha256=TiaKUnhKK_M8REslHfdgKo29bNb0tKQf4wI-CxAPgHA,1843 +tables/tests/test_array.py,sha256=y3jeHiddqis0-27IcDeV1n3cCBgi6RlZgBS0lMGqjSQ,98134 +tables/tests/test_attributes.py,sha256=GtQ-ssNkWH3ShsPwhdBh503vOqiHjbqi2ntbSVaOPLc,73097 +tables/tests/test_aux.py,sha256=_NyvKnYMjsTdJkNJoWgb1Y2uQ8JGTp2rP01gIRpdgRg,921 +tables/tests/test_backcompat.py,sha256=3s-S-P-tmC0w1xb9RAxmPn5chl41HP7E9mKZx4tQMD4,8654 +tables/tests/test_basics.py,sha256=mjh23REAmKqkTv04ADmxs0tYEqT0KLY5KUkr2WtDjec,96838 +tables/tests/test_carray.py,sha256=qx4pbX0UjtQf4uXfdMoqW2fvLyQgv94gApdZSAXSK1k,105473 +tables/tests/test_create.py,sha256=lbl5Le_qtvMGKqiKjGf-NVPhX9-7LPu2l-VXZJud0gk,100306 +tables/tests/test_direct_chunk.py,sha256=XfAb95J1-vnS60AAYREIq1zZmt9ddknYNlzfFUhhD1E,15176 +tables/tests/test_do_undo.py,sha256=Xjv0HurWkov3_3ZVfWDRdwbdoPcTjD7KMBLmk7hc0K0,100981 +tables/tests/test_earray.py,sha256=XrYC5ZjelD2skydd9Dh804KSFXnk6v7Ux-Y-IdxmMY8,106716 +tables/tests/test_enum.py,sha256=c7MVsYRaPdTuNBD05Z59o19ABfgPN7Tw1c87eXJj9zk,23719 +tables/tests/test_expression.py,sha256=yNQBIXjygkGBDAQrSHDruCpi8_HvbTVS4FBKiidC6-0,59343 +tables/tests/test_garbage.py,sha256=2aCnxqX82zUBHW76SzCKS7Y5fJVWsgAPLfkPiC-GFWM,1557 +tables/tests/test_hdf5compat.py,sha256=lFVRyz6wY_ba-SuU-60EHHNNQ6lWuJ3AP5gUDzK4uys,14244 +tables/tests/test_index_backcompat.py,sha256=L6F4mR5rD1Yq6qTjkk1yh667ovi9GXADMoLWnC1UGm4,5508 +tables/tests/test_indexes.py,sha256=sL348m2kY7vJGkluBgi42zTVoOn3T6HVy_SLzUNq0sI,104356 +tables/tests/test_indexvalues.py,sha256=rxNDVWWJkJvWTcHDth0NUULzMaEXuQvjC3uIW7maagE,126043 +tables/tests/test_large_tables.py,sha256=QDInWOIlLPM8l0YdvgRn7FIb3YHy02jji_eiO0MSZVo,3148 +tables/tests/test_links.py,sha256=_ku0SKtBNsSJye_GMRUZhG5C6R2z_IsJ5nDFmHSNQbM,23691 +tables/tests/test_lists.py,sha256=1yez7PIYQsqFzPN0Hzlq4ruv_6HDH8z0ZSkr9N-kEtQ,15202 +tables/tests/test_nestedtypes.py,sha256=gcfTU1MUZuGeMFj9AO3tLZY6st4GLv49QszW2lVXx5w,55714 +tables/tests/test_numpy.py,sha256=VHCSePSBZ9zljoJQPJ6Sin4gmmZePFIx0-k3nH49vu4,52697 +tables/tests/test_queries.py,sha256=pymwqZjwCgwS23a0ffe6N7wq-OoFJ_AtTqA8d8cLj1I,45671 +tables/tests/test_ref_array1.mat,sha256=rrV46WkkhAbyRo0Y6s2w-eQYxYkUVARMzf1nqlRLKB4,16192 +tables/tests/test_ref_array2.mat,sha256=pn9dUPhy9NZlxlnjfKEWZGCKVgP5GCVEM3hX_Kj9lwA,4832 +tables/tests/test_suite.py,sha256=rotIjTmQsBweJi8lzHWXNVb6spYMY14k-nxOy9mRaUk,3230 +tables/tests/test_szip.h5,sha256=mQUuflKOjGgBJGfn-42GXPGd1xwt1W48WDs2cz5ecaY,5594 +tables/tests/test_tables.py,sha256=rrHdU8w5FZ5KXGwuJPE25tWhQCm_RqNqyGHBLRNOdEo,288017 +tables/tests/test_tablesMD.py,sha256=ITDd75u7UpjjaNjKjO7qSU_CFk1icyw97xEuYThSLRg,84521 +tables/tests/test_timestamps.py,sha256=2AcoJZqqVt7fYO5gW-xFtVKCmz6r2oV_SIlwfxWCw6I,5939 +tables/tests/test_timetype.py,sha256=lE-9U0I2NfPagr_YymFNDycGcBUqYEmxGU5qEuMIy84,18527 +tables/tests/test_tree.py,sha256=PeH2StU63IYy3FJF1WcNgEvc2-aecRHKr1xN8Z70UU4,41915 +tables/tests/test_types.py,sha256=H0JnzdVScjhzGH2_q9AQSBdZQ8jGUEPpbhx6_U4j5sE,12281 +tables/tests/test_utils.py,sha256=jL_IHbriFYnMrtwyhvjBl87n4aqzUnfxPvGVCQiPu3o,3028 +tables/tests/test_vlarray.py,sha256=BSUI_IeMw-71GTqfpH-z6kL8F9WpuvcVc6HxsNpyqLs,165290 +tables/tests/time-table-vlarray-1_x.h5,sha256=hYTomgMXpmvI5dVqMNqoj49GpmiHJy2yJ37RYOJOd9w,3766 +tables/tests/times-nested-be.h5,sha256=jcri1G2O7Qz94eFc0O9hPvVRJl2pWJgvKkVOuWNIDb0,22674 +tables/tests/vlstr_attr.h5,sha256=zuqgBjcpz9sMcrdpyRjxKUV-QTS1MXuobb10NL8sVmg,5294 +tables/tests/vlunicode_endian.h5,sha256=Xc91gNHoLArSszzCGWMd1eeq4wqsssllZuu5tVMVe94,82022 +tables/tests/zerodim-attrs-1.3.h5,sha256=sDUqt8zs26wKePl7B3ZYZ07YImQXrho6jfBvBm2D_8E,5102 +tables/tests/zerodim-attrs-1.4.h5,sha256=2jMOHUsizyI6Gsxew4bfADKulFBEFIY1Nelf5nTdv20,4366 +tables/undoredo.py,sha256=xTOkJUpq2TAroWTlCFsws7wserphDOV_XoBbithK0xQ,5218 +tables/unimplemented.py,sha256=QCBzcDvXEwKhr__KHUR1innwcWfb6kSOL77RD4ny3ic,5858 +tables/utils.py,sha256=aC_me4EhcozbJ2eVdVv7e90G-Cl5W_WuJyayIb9jzAA,16126 +tables/utilsextension.pxd,sha256=sKK-3k2VHYG-3dgpW0UjIInOhqLYGRWal1Y4sb-L6g8,760 +tables/utilsextension.pyd,sha256=_MlDFKYOs2W9TtfNMEllLoHy2KoQV_16ohftgmnVRWo,187392 +tables/utilsextension.pyx,sha256=7CR1Ph5wyvKMgXoBj45phunLxe584E5N-Fw4i1L5Uw0,48469 +tables/vlarray.py,sha256=bPLXGw-OE6DzjFlJrCQaGUF_7exeyh-Etym2fUJCmS8,34271 diff --git a/venv/Lib/site-packages/tables-3.11.1.dist-info/REQUESTED b/venv/Lib/site-packages/tables-3.11.1.dist-info/REQUESTED new file mode 100644 index 0000000..e69de29 diff --git a/venv/Lib/site-packages/tables-3.11.1.dist-info/WHEEL b/venv/Lib/site-packages/tables-3.11.1.dist-info/WHEEL new file mode 100644 index 0000000..3fdc4cb --- /dev/null +++ b/venv/Lib/site-packages/tables-3.11.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (82.0.0) +Root-Is-Purelib: false +Tag: cp311-abi3-win_amd64 + diff --git a/venv/Lib/site-packages/tables-3.11.1.dist-info/entry_points.txt b/venv/Lib/site-packages/tables-3.11.1.dist-info/entry_points.txt new file mode 100644 index 0000000..e912c85 --- /dev/null +++ b/venv/Lib/site-packages/tables-3.11.1.dist-info/entry_points.txt @@ -0,0 +1,5 @@ +[console_scripts] +pt2to3 = tables.scripts.pt2to3:main +ptdump = tables.scripts.ptdump:main +ptrepack = tables.scripts.ptrepack:main +pttree = tables.scripts.pttree:main diff --git a/venv/Lib/site-packages/tables-3.11.1.dist-info/licenses/LICENSE.txt b/venv/Lib/site-packages/tables-3.11.1.dist-info/licenses/LICENSE.txt new file mode 100644 index 0000000..cee1c27 --- /dev/null +++ b/venv/Lib/site-packages/tables-3.11.1.dist-info/licenses/LICENSE.txt @@ -0,0 +1,35 @@ +Copyright Notice and Statement for PyTables Software Library and Utilities: + +Copyright (c) 2002-2004 by Francesc Alted +Copyright (c) 2005-2007 by Carabos Coop. V. +Copyright (c) 2008-2010 by Francesc Alted +Copyright (c) 2011-2026 by PyTables maintainers +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/Lib/site-packages/tables-3.11.1.dist-info/top_level.txt b/venv/Lib/site-packages/tables-3.11.1.dist-info/top_level.txt new file mode 100644 index 0000000..e608fc3 --- /dev/null +++ b/venv/Lib/site-packages/tables-3.11.1.dist-info/top_level.txt @@ -0,0 +1 @@ +tables diff --git a/venv/Lib/site-packages/tables.libs/blosc-a4a28bc7aa2533cc2a5bc97cb813890c.dll b/venv/Lib/site-packages/tables.libs/blosc-a4a28bc7aa2533cc2a5bc97cb813890c.dll new file mode 100644 index 0000000..11b34f9 Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/blosc-a4a28bc7aa2533cc2a5bc97cb813890c.dll differ diff --git a/venv/Lib/site-packages/tables.libs/hdf5-e6c72474544c7b020b14fe7c482e97b5.dll b/venv/Lib/site-packages/tables.libs/hdf5-e6c72474544c7b020b14fe7c482e97b5.dll new file mode 100644 index 0000000..68d458f Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/hdf5-e6c72474544c7b020b14fe7c482e97b5.dll differ diff --git a/venv/Lib/site-packages/tables.libs/libblosc2.dll b/venv/Lib/site-packages/tables.libs/libblosc2.dll new file mode 100644 index 0000000..6ed7c7d Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/libblosc2.dll differ diff --git a/venv/Lib/site-packages/tables.libs/libbz2-894dbcd41ee025e020c9083f03e69f64.dll b/venv/Lib/site-packages/tables.libs/libbz2-894dbcd41ee025e020c9083f03e69f64.dll new file mode 100644 index 0000000..8a7aeb4 Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/libbz2-894dbcd41ee025e020c9083f03e69f64.dll differ diff --git a/venv/Lib/site-packages/tables.libs/libcrypto-3-x64-e5a134bc7562c8f279b5f88612c7d914.dll b/venv/Lib/site-packages/tables.libs/libcrypto-3-x64-e5a134bc7562c8f279b5f88612c7d914.dll new file mode 100644 index 0000000..03d65a8 Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/libcrypto-3-x64-e5a134bc7562c8f279b5f88612c7d914.dll differ diff --git a/venv/Lib/site-packages/tables.libs/libcurl-5ab672140859252079c8e100b39d35ef.dll b/venv/Lib/site-packages/tables.libs/libcurl-5ab672140859252079c8e100b39d35ef.dll new file mode 100644 index 0000000..41dc365 Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/libcurl-5ab672140859252079c8e100b39d35ef.dll differ diff --git a/venv/Lib/site-packages/tables.libs/libssh2-b423053596b4de89982cda34c3c09058.dll b/venv/Lib/site-packages/tables.libs/libssh2-b423053596b4de89982cda34c3c09058.dll new file mode 100644 index 0000000..788cefc Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/libssh2-b423053596b4de89982cda34c3c09058.dll differ diff --git a/venv/Lib/site-packages/tables.libs/lz4-6dc4c9d99b472a733962763c89a20ca1.dll b/venv/Lib/site-packages/tables.libs/lz4-6dc4c9d99b472a733962763c89a20ca1.dll new file mode 100644 index 0000000..eeee5f2 Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/lz4-6dc4c9d99b472a733962763c89a20ca1.dll differ diff --git a/venv/Lib/site-packages/tables.libs/msvcp140-8f141b4454fa78db34bc1f28c571b4da.dll b/venv/Lib/site-packages/tables.libs/msvcp140-8f141b4454fa78db34bc1f28c571b4da.dll new file mode 100644 index 0000000..5a1cb64 Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/msvcp140-8f141b4454fa78db34bc1f28c571b4da.dll differ diff --git a/venv/Lib/site-packages/tables.libs/snappy-747c86baab1469b466d2e9a18acf1497.dll b/venv/Lib/site-packages/tables.libs/snappy-747c86baab1469b466d2e9a18acf1497.dll new file mode 100644 index 0000000..8c9107d Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/snappy-747c86baab1469b466d2e9a18acf1497.dll differ diff --git a/venv/Lib/site-packages/tables.libs/szip-dce5955eea02e788e2d938e3c81907bc.dll b/venv/Lib/site-packages/tables.libs/szip-dce5955eea02e788e2d938e3c81907bc.dll new file mode 100644 index 0000000..47ad103 Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/szip-dce5955eea02e788e2d938e3c81907bc.dll differ diff --git a/venv/Lib/site-packages/tables.libs/zlib-7d86de8659d728d0cc22615ea37248ca.dll b/venv/Lib/site-packages/tables.libs/zlib-7d86de8659d728d0cc22615ea37248ca.dll new file mode 100644 index 0000000..48f858a Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/zlib-7d86de8659d728d0cc22615ea37248ca.dll differ diff --git a/venv/Lib/site-packages/tables.libs/zlib-ng2-f57947fffdf36cbecc829a61001247e1.dll b/venv/Lib/site-packages/tables.libs/zlib-ng2-f57947fffdf36cbecc829a61001247e1.dll new file mode 100644 index 0000000..65dc8e3 Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/zlib-ng2-f57947fffdf36cbecc829a61001247e1.dll differ diff --git a/venv/Lib/site-packages/tables.libs/zstd-ca228f0f33b8296d6650b4694bb38ba9.dll b/venv/Lib/site-packages/tables.libs/zstd-ca228f0f33b8296d6650b4694bb38ba9.dll new file mode 100644 index 0000000..02d0a43 Binary files /dev/null and b/venv/Lib/site-packages/tables.libs/zstd-ca228f0f33b8296d6650b4694bb38ba9.dll differ diff --git a/venv/Lib/site-packages/tables/__init__.py b/venv/Lib/site-packages/tables/__init__.py new file mode 100644 index 0000000..5614745 --- /dev/null +++ b/venv/Lib/site-packages/tables/__init__.py @@ -0,0 +1,267 @@ +"""PyTables, hierarchical datasets in Python. + +:URL: http://www.pytables.org/ + +PyTables is a package for managing hierarchical datasets and designed +to efficiently cope with extremely large amounts of data. + +""" + + +# start delvewheel patch +def _delvewheel_patch_1_12_0(): + import os + if os.path.isdir(libs_dir := os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'tables.libs'))): + os.add_dll_directory(libs_dir) + + +_delvewheel_patch_1_12_0() +del _delvewheel_patch_1_12_0 +# end delvewheel patch + +# Load the blosc2 library: +# 1. In tables.libs/ sibling (delvewheel, Windows-only) +# 2. In tables +# 3. In site-packages/blosc2/lib/ (venv, conda env, or system Python; same one where this tables is running) +# 4. Without a path (default, only the filename) +def _load_blosc2(): + import ctypes + import platform + import sysconfig + from pathlib import Path + + search_paths = ( + # "delvewheel" + Path(__file__).parent.with_suffix(".libs"), + # tables package + Path(__file__).parent, + # "site-packages" + Path(sysconfig.get_path("platlib")) / "blosc2" / "lib", + # "site-packages" purelib - this should be redundant + Path(sysconfig.get_path("purelib")) / "blosc2" / "lib", + # "default" + "", + ) + platform_system = platform.system() + ext = ( + "so" + if platform_system == "Linux" + else ("dylib" if platform_system == "Darwin" else "dll") + ) + lib_name = "blosc2" + lib_file = f"lib{lib_name}.{ext}" + + for where in search_paths: + lib_path = Path(where) / lib_file + if where == "" or lib_path.exists(): + try: + ctypes.CDLL(str(lib_path)) # may be Path in Python 3.12+ + return True + except OSError: + pass + + import ctypes.util + + if ctypes.util.find_library(lib_name): + return True + + return False + + +if not _load_blosc2(): + raise RuntimeError("Blosc2 library not found.") + +from ._version import __version__ + +# Necessary imports to get versions stored on the cython extension +from .utilsextension import get_hdf5_version as _get_hdf5_version + +hdf5_version = _get_hdf5_version() +"""The underlying HDF5 library version number. + +.. versionadded:: 3.0 + +""" + +from .atom import * +from .file import File, open_file, copy_file +from .leaf import Leaf, ChunkInfo +from .node import Node +from .array import Array +from .group import Group +from .table import Table, Cols, Column +from .tests import print_versions, test +from .carray import CArray +from .earray import EArray +from .flavor import restrict_flavors +from .filters import Filters +from .vlarray import VLArray +from .misc.enum import Enum + +# Import the user classes from the proper modules +from .exceptions import * +from .expression import Expr +from .description import * +from .unimplemented import UnImplemented, Unknown +from .utilsextension import ( + blosc_compcode_to_compname_ as blosc_compcode_to_compname, +) +from .utilsextension import ( + blosc2_compcode_to_compname_ as blosc2_compcode_to_compname, +) +from .utilsextension import blosc_get_complib_info_ as blosc_get_complib_info +from .utilsextension import blosc2_get_complib_info_ as blosc2_get_complib_info +from .utilsextension import ( + blosc_compressor_list, + blosc2_compressor_list, + is_hdf5_file, + is_pytables_file, + which_lib_version, + set_blosc_max_threads, + set_blosc2_max_threads, + silence_hdf5_messages, +) + +# List here only the objects we want to be publicly available +__all__ = [ + # Exceptions and warnings: + "HDF5ExtError", + "ClosedNodeError", + "ClosedFileError", + "FileModeError", + "NaturalNameWarning", + "NodeError", + "NoSuchNodeError", + "UndoRedoError", + "UndoRedoWarning", + "PerformanceWarning", + "FlavorError", + "FlavorWarning", + "FiltersWarning", + "DataTypeWarning", + "ChunkError", + "NotChunkedError", + "NotChunkAlignedError", + "NoSuchChunkError", + # Functions: + "is_hdf5_file", + "is_pytables_file", + "which_lib_version", + "copy_file", + "open_file", + "print_versions", + "test", + "split_type", + "restrict_flavors", + "set_blosc_max_threads", + "set_blosc2_max_threads", + "silence_hdf5_messages", + # Helper classes: + "IsDescription", + "Description", + "Filters", + "Cols", + "Column", + "ChunkInfo", + # Types: + "Enum", + # Atom types: + "Atom", + "StringAtom", + "BoolAtom", + "IntAtom", + "UIntAtom", + "Int8Atom", + "UInt8Atom", + "Int16Atom", + "UInt16Atom", + "Int32Atom", + "UInt32Atom", + "Int64Atom", + "UInt64Atom", + "FloatAtom", + "Float32Atom", + "Float64Atom", + "ComplexAtom", + "Complex32Atom", + "Complex64Atom", + "Complex128Atom", + "TimeAtom", + "Time32Atom", + "Time64Atom", + "EnumAtom", + "PseudoAtom", + "ObjectAtom", + "VLStringAtom", + "VLUnicodeAtom", + # Column types: + "Col", + "StringCol", + "BoolCol", + "IntCol", + "UIntCol", + "Int8Col", + "UInt8Col", + "Int16Col", + "UInt16Col", + "Int32Col", + "UInt32Col", + "Int64Col", + "UInt64Col", + "FloatCol", + "Float32Col", + "Float64Col", + "ComplexCol", + "Complex32Col", + "Complex64Col", + "Complex128Col", + "TimeCol", + "Time32Col", + "Time64Col", + "EnumCol", + # Node classes: + "Node", + "Group", + "Leaf", + "Table", + "Array", + "CArray", + "EArray", + "VLArray", + "UnImplemented", + "Unknown", + # The File class: + "File", + # Expr class + "Expr", +] + +if "Float16Atom" in locals(): + # float16 is new in numpy 1.6.0 + __all__.extend(("Float16Atom", "Float16Col")) + +if "Float96Atom" in locals(): + __all__.extend(("Float96Atom", "Float96Col")) + __all__.extend(("Complex192Atom", "Complex192Col")) # XXX check + +if "Float128Atom" in locals(): + __all__.extend(("Float128Atom", "Float128Col")) + __all__.extend(("Complex256Atom", "Complex256Col")) # XXX check + + +def get_pytables_version() -> str: + warnings.warn( + "the 'get_pytables_version()' function is deprecated and could be " + "removed in future versions. Please use 'tables.__version__'", + DeprecationWarning, + ) + return __version__ + + +def get_hdf5_version() -> str: + warnings.warn( + "the 'get_hdf5_version()' function is deprecated and could be " + "removed in future versions. Please use 'tables.hdf5_version'", + DeprecationWarning, + ) + return hdf5_version diff --git a/venv/Lib/site-packages/tables/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..2309f20 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/_version.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/_version.cpython-311.pyc new file mode 100644 index 0000000..e5489c5 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/_version.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/array.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/array.cpython-311.pyc new file mode 100644 index 0000000..2274ec8 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/array.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/atom.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/atom.cpython-311.pyc new file mode 100644 index 0000000..fa128a0 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/atom.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/attributeset.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/attributeset.cpython-311.pyc new file mode 100644 index 0000000..c8b11b5 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/attributeset.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/carray.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/carray.cpython-311.pyc new file mode 100644 index 0000000..68bcea7 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/carray.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/conditions.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/conditions.cpython-311.pyc new file mode 100644 index 0000000..a65e3db Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/conditions.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/description.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/description.cpython-311.pyc new file mode 100644 index 0000000..a9ede9a Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/description.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/earray.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/earray.cpython-311.pyc new file mode 100644 index 0000000..532ee5a Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/earray.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/exceptions.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/exceptions.cpython-311.pyc new file mode 100644 index 0000000..ee065de Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/exceptions.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/expression.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/expression.cpython-311.pyc new file mode 100644 index 0000000..1b99141 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/expression.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/file.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/file.cpython-311.pyc new file mode 100644 index 0000000..0a3446e Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/file.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/filters.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/filters.cpython-311.pyc new file mode 100644 index 0000000..62949b5 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/filters.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/flavor.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/flavor.cpython-311.pyc new file mode 100644 index 0000000..3b5644c Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/flavor.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/group.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/group.cpython-311.pyc new file mode 100644 index 0000000..cf77e48 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/group.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/idxutils.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/idxutils.cpython-311.pyc new file mode 100644 index 0000000..752666d Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/idxutils.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/index.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/index.cpython-311.pyc new file mode 100644 index 0000000..93f8962 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/index.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/indexes.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/indexes.cpython-311.pyc new file mode 100644 index 0000000..e7e7403 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/indexes.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/leaf.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/leaf.cpython-311.pyc new file mode 100644 index 0000000..fc019b9 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/leaf.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/link.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/link.cpython-311.pyc new file mode 100644 index 0000000..81ac7d4 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/link.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/node.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/node.cpython-311.pyc new file mode 100644 index 0000000..4080326 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/node.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/parameters.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/parameters.cpython-311.pyc new file mode 100644 index 0000000..a56bdd9 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/parameters.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/path.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/path.cpython-311.pyc new file mode 100644 index 0000000..429bc4e Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/path.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/registry.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/registry.cpython-311.pyc new file mode 100644 index 0000000..019cd02 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/registry.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/req_versions.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/req_versions.cpython-311.pyc new file mode 100644 index 0000000..0c8c057 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/req_versions.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/table.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/table.cpython-311.pyc new file mode 100644 index 0000000..f90be01 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/table.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/undoredo.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/undoredo.cpython-311.pyc new file mode 100644 index 0000000..a33b615 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/undoredo.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/unimplemented.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/unimplemented.cpython-311.pyc new file mode 100644 index 0000000..fbeeffd Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/unimplemented.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/utils.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000..4830095 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/utils.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/__pycache__/vlarray.cpython-311.pyc b/venv/Lib/site-packages/tables/__pycache__/vlarray.cpython-311.pyc new file mode 100644 index 0000000..5cd6812 Binary files /dev/null and b/venv/Lib/site-packages/tables/__pycache__/vlarray.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/_comp_bzip2.pyd b/venv/Lib/site-packages/tables/_comp_bzip2.pyd new file mode 100644 index 0000000..2a5f6fd Binary files /dev/null and b/venv/Lib/site-packages/tables/_comp_bzip2.pyd differ diff --git a/venv/Lib/site-packages/tables/_comp_bzip2.pyx b/venv/Lib/site-packages/tables/_comp_bzip2.pyx new file mode 100644 index 0000000..4a3399b --- /dev/null +++ b/venv/Lib/site-packages/tables/_comp_bzip2.pyx @@ -0,0 +1,19 @@ +import sys +from libc.stdlib cimport free + + +cdef extern from "H5Zbzip2.h": + int register_bzip2(char **, char **) + + +def register_(): + cdef char *version + cdef char *date + + if not register_bzip2(&version, &date): + return None + + compinfo = (version, date) + free(version) + free(date) + return compinfo[0].decode('ascii'), compinfo[1].decode('ascii') diff --git a/venv/Lib/site-packages/tables/_comp_lzo.pyd b/venv/Lib/site-packages/tables/_comp_lzo.pyd new file mode 100644 index 0000000..b9cf3f0 Binary files /dev/null and b/venv/Lib/site-packages/tables/_comp_lzo.pyd differ diff --git a/venv/Lib/site-packages/tables/_comp_lzo.pyx b/venv/Lib/site-packages/tables/_comp_lzo.pyx new file mode 100644 index 0000000..c6af453 --- /dev/null +++ b/venv/Lib/site-packages/tables/_comp_lzo.pyx @@ -0,0 +1,19 @@ +import sys +from libc.stdlib cimport free + + +cdef extern from "H5Zlzo.h": + int register_lzo(char **, char **) + + +def register_(): + cdef char *version + cdef char *date + + if not register_lzo(&version, &date): + return None + + compinfo = (version, date) + free(version) + free(date) + return compinfo[0].decode('ascii'), compinfo[1].decode('ascii') diff --git a/venv/Lib/site-packages/tables/_version.py b/venv/Lib/site-packages/tables/_version.py new file mode 100644 index 0000000..cdaeafb --- /dev/null +++ b/venv/Lib/site-packages/tables/_version.py @@ -0,0 +1,2 @@ +__version__ = "3.11.1" +"""The PyTables version number.""" diff --git a/venv/Lib/site-packages/tables/array.py b/venv/Lib/site-packages/tables/array.py new file mode 100644 index 0000000..1276204 --- /dev/null +++ b/venv/Lib/site-packages/tables/array.py @@ -0,0 +1,993 @@ +"""Here is defined the Array class.""" + +from __future__ import annotations + +import sys +import operator +from typing import Any, Union, TYPE_CHECKING + +import numpy as np +import numpy.typing as npt + +from . import hdf5extension +from .leaf import Leaf +from .utils import ( + is_idx, + convert_to_np_atom2, + SizeType, + lazyattr, + byteorders, + quantize, +) +from .flavor import flavor_of, array_as_internal, internal_to_flavor +from .filters import Filters + +if TYPE_CHECKING: + from .atom import Atom, EnumAtom + from .group import Group + from .misc.enum import Enum + +# default version for ARRAY objects +# obversion = "1.0" # initial version +# obversion = "2.0" # Added an optional EXTDIM attribute +# obversion = "2.1" # Added support for complex datatypes +# obversion = "2.2" # This adds support for time datatypes. +# obversion = "2.3" # This adds support for enumerated datatypes. +obversion = "2.4" # Numeric and numarray flavors are gone. + +SelectionType = Union[int, slice, list[Union[int, slice]], npt.ArrayLike] + + +class Array(hdf5extension.Array, Leaf): + """This class represents homogeneous datasets in an HDF5 file. + + This class provides methods to write or read data to or from array objects + in the file. This class does not allow you neither to enlarge nor compress + the datasets on disk; use the EArray class (see :ref:`EArrayClassDescr`) if + you want enlargeable dataset support or compression features, or CArray + (see :ref:`CArrayClassDescr`) if you just want compression. + + An interesting property of the Array class is that it remembers the + *flavor* of the object that has been saved so that if you saved, for + example, a list, you will get a list during readings afterwards; if you + saved a NumPy array, you will get a NumPy object, and so forth. + + Note that this class inherits all the public attributes and methods that + Leaf (see :ref:`LeafClassDescr`) already provides. However, as Array + instances have no internal I/O buffers, it is not necessary to use the + flush() method they inherit from Leaf in order to save their internal state + to disk. When a writing method call returns, all the data is already on + disk. + + Parameters + ---------- + parentnode + The parent :class:`Group` object. + + .. versionchanged:: 3.0 + Renamed from *parentNode* to *parentnode* + + name : str + The name of this node in its parent group. + obj + The array or scalar to be saved. Accepted types are NumPy + arrays and scalars as well as native Python sequences and + scalars, provided that values are regular (i.e. they are not + like ``[[1,2],2]``) and homogeneous (i.e. all the elements are + of the same type). + + .. versionchanged:: 3.0 + Renamed from *object* into *obj*. + title + A description for this node (it sets the ``TITLE`` HDF5 attribute on + disk). + byteorder + The byteorder of the data *on disk*, specified as 'little' or 'big'. + If this is not specified, the byteorder is that of the given `object`. + track_times + Whether time data associated with the leaf are recorded (object + access time, raw data modification time, metadata change time, object + birth time); default True. Semantics of these times depend on their + implementation in the HDF5 library: refer to documentation of the + H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata + change time) is implemented. + + .. versionadded:: 3.4.3 + + """ + + # Class identifier. + _c_classid = "ARRAY" + + @lazyattr + def dtype(self) -> np.dtype: + """Numpy ``dtype`` most closely matching the one of the array.""" + return self.atom.dtype + + @property + def nrows(self) -> int: + """Return the number of rows in the array.""" + if self.shape == (): + return SizeType(1) # scalar case + else: + return self.shape[self.maindim] + + @property + def rowsize(self) -> int: + """Size of the rows in bytes in dimensions orthogonal to *maindim*.""" + maindim = self.maindim + rowsize = self.atom.size + for i, dim in enumerate(self.shape): + if i != maindim: + rowsize *= dim + return rowsize + + @property + def size_in_memory(self) -> int: + """Size of the array's data in bytes when fully loaded in memory.""" + return self.nrows * self.rowsize + + def __init__( + self, + parentnode: Group, + name: str, + obj: npt.ArrayLike | None = None, + title: str = "", + byteorder: str | None = None, + _log: bool = True, + _atom: Atom | EnumAtom | None = None, + track_times: bool = True, + ) -> None: + + self._v_version: str | None = None + """The object version of this array.""" + self._v_new = new = obj is not None + """Is this the first time the node has been created?""" + self._v_new_title = title + """New title for this node.""" + self._obj = obj + """The object to be stored in the array. It can be any of numpy, + list, tuple, string, integer of floating point types, provided + that they are regular (i.e. they are not like ``[[1, 2], 2]``). + + .. versionchanged:: 3.0 + Renamed form *_object* into *_obj*. + + """ + + self._v_convert = True + """Whether the ``Array`` object must be converted or not.""" + + # Miscellaneous iteration rubbish. + self._start: int | None = None + """Starting row for the current iteration.""" + self._stop: int | None = None + """Stopping row for the current iteration.""" + self._step: int | None = None + """Step size for the current iteration.""" + self._nrowsread: int | None = None + """Number of rows read up to the current state of iteration.""" + self._startb: int | None = None + """Starting row for current buffer.""" + self._stopb: int | None = None + """Stopping row for current buffer. """ + self._row: int | None = None + """Current row in iterators (sentinel).""" + self._init = False + """Whether we are in the middle of an iteration or not (sentinel).""" + self.listarr: npt.ArrayLike | None = None + """Current buffer in iterators.""" + + # Documented (*public*) attributes. + self.atom = _atom + """An Atom (see :ref:`AtomClassDescr`) instance representing the *type* + and *shape* of the atomic objects to be saved. + """ + self.shape: list[int] | None = None + """The shape of the stored array.""" + self.nrow: int | None = None + """On iterators, this is the index of the current row.""" + self.extdim = -1 # ordinary arrays are not enlargeable + """The index of the enlargeable dimension.""" + + # Ordinary arrays have no filters: leaf is created with default ones. + super().__init__( + parentnode, name, new, Filters(), byteorder, _log, track_times + ) + + def _g_create(self) -> int: + """Save a new array in file.""" + self._v_version = obversion + try: + # `Leaf._g_post_init_hook()` should be setting the flavor on disk. + self._flavor = flavor = flavor_of(self._obj) + nparr = array_as_internal(self._obj, flavor) + except Exception: # XXX + # Problems converting data. Close the node and re-raise exception. + self.close(flush=0) + raise + + # Raise an error in case of unsupported object + if nparr.dtype.kind in ["V", "U", "O"]: # in void, unicode, object + raise TypeError( + "Array objects cannot currently deal with void, " + "unicode or object arrays" + ) + + # Decrease the number of references to the object + self._obj = None + + # Fix the byteorder of data + nparr = self._g_fix_byteorder_data(nparr, nparr.dtype.byteorder) + + # Create the array on-disk + try: + # ``self._v_objectid`` needs to be set because would be + # needed for setting attributes in some descendants later + # on + self._v_objectid, self.shape, self.atom = self._create_array( + nparr, self._v_new_title, self.atom + ) + except Exception: # XXX + # Problems creating the Array on disk. Close node and re-raise. + self.close(flush=0) + raise + + # Compute the optimal buffer size + self.nrowsinbuf = self._calc_nrowsinbuf() + # Arrays don't have chunkshapes (so, set it to None) + self._v_chunkshape = None + + return self._v_objectid + + def _g_open(self) -> int: + """Get the metadata info for an array in file.""" + oid, self.atom, self.shape, self._v_chunkshape = self._open_array() + + self.nrowsinbuf = self._calc_nrowsinbuf() + + return oid + + def get_enum(self) -> Enum: + """Get the enumerated type associated with this array. + + If this array is of an enumerated type, the corresponding Enum instance + (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated + type, a TypeError is raised. + + """ + if self.atom.kind != "enum": + raise TypeError( + "array ``%s`` is not of an enumerated type" % self._v_pathname + ) + + return self.atom.enum + + def iterrows( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + ) -> tuple | Array: + """Iterate over the rows of the array. + + This method returns an iterator yielding an object of the current + flavor for each selected row in the array. The returned rows are taken + from the *main dimension*. + + If a range is not supplied, *all the rows* in the array are iterated + upon - you can also use the :meth:`Array.__iter__` special method for + that purpose. If you only want to iterate over a given *range of rows* + in the array, you may use the start, stop and step parameters. + + Examples + -------- + :: + + result = [row for row in arrayInstance.iterrows(step=4)] + + .. versionchanged:: 3.0 + If the *start* parameter is provided and *stop* is None then the + array is iterated from *start* to the last line. + In PyTables < 3.0 only one element was returned. + + """ + try: + self._start, self._stop, self._step = self._process_range( + start, stop, step + ) + except IndexError: + # If problems with indexes, silently return the null tuple + return () + self._init_loop() + return self + + def __iter__(self) -> Array: + """Iterate over the rows of the array. + + This is equivalent to calling :meth:`Array.iterrows` with default + arguments, i.e. it iterates over *all the rows* in the array. + + Examples + -------- + :: + + result = [row[2] for row in array] + + Which is equivalent to:: + + result = [row[2] for row in array.iterrows()] + + """ + if not self._init: + # If the iterator is called directly, assign default variables + self._start = 0 + self._stop = self.nrows + self._step = 1 + # and initialize the loop + self._init_loop() + return self + + def _init_loop(self) -> None: + """Initialize the __iter__ iterator.""" + self._nrowsread = self._start + self._startb = self._start + self._row = -1 # Sentinel + self._init = True # Sentinel + self.nrow = SizeType(self._start - self._step) # row number + + def __next__(self) -> Any: + """Get the next element of the array during an iteration. + + The element is returned as an object of the current flavor. + + """ + # this could probably be sped up for long iterations by reusing the + # listarr buffer + if self._nrowsread >= self._stop: + self._init = False + self.listarr = None # fixes issue #308 + raise StopIteration # end of iteration + else: + # Read a chunk of rows + if self._row + 1 >= self.nrowsinbuf or self._row < 0: + self._stopb = self._startb + self._step * self.nrowsinbuf + # Protection for reading more elements than needed + if self._stopb > self._stop: + self._stopb = self._stop + listarr = self._read(self._startb, self._stopb, self._step) + # Swap the axes to easy the return of elements + if self.extdim > 0: + listarr = listarr.swapaxes(self.extdim, 0) + self.listarr = internal_to_flavor(listarr, self.flavor) + self._row = -1 + self._startb = self._stopb + self._row += 1 + self.nrow += self._step + self._nrowsread += self._step + # Fixes bug #968132 + # if self.listarr.shape: + if self.shape: + return self.listarr[self._row] + else: + return self.listarr # Scalar case + + def _interpret_indexing( + self, + keys: SelectionType, + ) -> tuple[np.ndarray, np.ndarray, np.ndarray, list[int]]: + """Implement the common part of `__getitem__` and `__setitem__`.""" + maxlen = len(self.shape) + shape = (maxlen,) + startl = np.empty(shape=shape, dtype=SizeType) + stopl = np.empty(shape=shape, dtype=SizeType) + stepl = np.empty(shape=shape, dtype=SizeType) + stop_none = np.zeros(shape=shape, dtype=SizeType) + if not isinstance(keys, tuple): + keys = (keys,) + nkeys = len(keys) + dim = 0 + # Here is some problem when dealing with [...,...] params + # but this is a bit weird way to pass parameters anyway + for key in keys: + ellipsis = 0 # Sentinel + if isinstance(key, type(Ellipsis)): + ellipsis = 1 + for diml in range(dim, len(self.shape) - (nkeys - dim) + 1): + startl[dim] = 0 + stopl[dim] = self.shape[diml] + stepl[dim] = 1 + dim += 1 + elif dim >= maxlen: + raise IndexError( + "Too many indices for object '%s'" % self._v_pathname + ) + elif is_idx(key): + key = operator.index(key) + + # Protection for index out of range + if key >= self.shape[dim]: + raise IndexError("Index out of range") + if key < 0: + # To support negative values (Fixes bug #968149) + key += self.shape[dim] + start, stop, step = self._process_range( + key, key + 1, 1, dim=dim + ) + stop_none[dim] = 1 + elif isinstance(key, slice): + start, stop, step = self._process_range( + key.start, key.stop, key.step, dim=dim + ) + else: + raise TypeError("Non-valid index or slice: %s" % key) + if not ellipsis: + startl[dim] = start + stopl[dim] = stop + stepl[dim] = step + dim += 1 + + # Complete the other dimensions, if needed + if dim < len(self.shape): + for diml in range(dim, len(self.shape)): + startl[dim] = 0 + stopl[dim] = self.shape[diml] + stepl[dim] = 1 + dim += 1 + + # Compute the shape for the container properly. Fixes #1288792 + shape = [] + for dim in range(len(self.shape)): + new_dim = len(range(startl[dim], stopl[dim], stepl[dim])) + if not (new_dim == 1 and stop_none[dim]): + shape.append(new_dim) + + return startl, stopl, stepl, shape + + def _fancy_selection(self, args: list[int | list[int]]) -> tuple[ + list[tuple[int, int, int, int, str]], + tuple[int, np.ndarray] | None, + tuple[int, ...], + ]: + """Perform a NumPy-style fancy selection in `self`. + + Implements advanced NumPy-style selection operations in + addition to the standard slice-and-int behavior. + + Indexing arguments may be ints, slices or lists of indices. + + Note: This is a backport from the h5py project. + + """ + # Internal functions + + def validate_number(num: int, length: int) -> None: + """Validate a list member for the given axis length.""" + try: + num = int(num) + except TypeError: + raise TypeError("Illegal index: %r" % num) + if num > length - 1: + raise IndexError("Index out of bounds: %d" % num) + + def expand_ellipsis( + args: tuple[int | list[int], ...], rank: int + ) -> list: + """Expand ellipsis objects and fill in missing axes.""" + n_el = sum(1 for arg in args if arg is Ellipsis) + if n_el > 1: + raise IndexError("Only one ellipsis may be used.") + elif n_el == 0 and len(args) != rank: + args = args + (Ellipsis,) + + final_args = [] + n_args = len(args) + for idx, arg in enumerate(args): + if arg is Ellipsis: + final_args.extend((slice(None),) * (rank - n_args + 1)) + else: + final_args.append(arg) + + if len(final_args) > rank: + raise IndexError("Too many indices.") + + return final_args + + def translate_slice(exp: slice, length: int) -> tuple[int, int, int]: + """Given a slice object, return a 3-tuple (start, count, step). + + This is for use with the hyperslab selection routines. + + """ + start, stop, step = exp.start, exp.stop, exp.step + if start is None: + start = 0 + else: + start = int(start) + if stop is None: + stop = length + else: + stop = int(stop) + if step is None: + step = 1 + else: + step = int(step) + + if step < 1: + raise IndexError("Step must be >= 1 (got %d)" % step) + if stop == start: + raise IndexError("Zero-length selections are not allowed") + if stop < start: + raise IndexError("Reverse-order selections are not allowed") + if start < 0: + start = length + start + if stop < 0: + stop = length + stop + + if not 0 <= start <= (length - 1): + raise IndexError( + "Start index %s out of range (0-%d)" % (start, length - 1) + ) + if not 1 <= stop <= length: + raise IndexError( + "Stop index %s out of range (1-%d)" % (stop, length) + ) + + count = (stop - start) // step + if (stop - start) % step != 0: + count += 1 + + if start + count > length: + raise IndexError( + f"Selection out of bounds ({start + count}; " + f"axis has {length})" + ) + + return start, count, step + + # Main code for _fancy_selection + mshape = [] + selection = [] + + if not isinstance(args, tuple): + args = (args,) + + args = expand_ellipsis(args, len(self.shape)) + + list_seen = False + reorder = None + for idx, (exp, length) in enumerate(zip(args, self.shape)): + if isinstance(exp, slice): + start, count, step = translate_slice(exp, length) + selection.append((start, count, step, idx, "AND")) + mshape.append(count) + else: + try: + exp = list(exp) + except TypeError: + exp = [exp] # Handle scalar index as a list of length 1 + mshape.append(0) # Keep track of scalar index for NumPy + else: + mshape.append(len(exp)) + if len(exp) == 0: + raise IndexError( + f"Empty selections are not allowed (axis {idx})" + ) + elif len(exp) > 1: + if list_seen: + raise IndexError("Only one selection list is allowed") + else: + list_seen = True + else: + if not isinstance(exp[0], (int, np.integer)) or ( + isinstance(exp[0], np.ndarray) + and not np.issubdtype(exp[0].dtype, np.integer) + ): + raise TypeError("Only integer coordinates allowed.") + + nexp = np.asarray(exp, dtype="i8") + # Convert negative values + nexp = np.where(nexp < 0, length + nexp, nexp) + # Check whether the list is ordered or not + # (only one unordered list is allowed) + if len(nexp) != len(np.unique(nexp)): + raise IndexError( + "Selection lists cannot have repeated values. " + "To see how to handle this, please see " + "https://github.com/PyTables/PyTables/issues/1149" + ) + neworder = nexp.argsort() + if ( + neworder.shape != (len(exp),) + or np.sum(np.abs(neworder - np.arange(len(exp)))) != 0 + ): + if reorder is not None: + raise IndexError( + "Only one selection list can be unordered" + ) + corrected_idx = sum(1 for x in mshape if x != 0) - 1 + reorder = (corrected_idx, neworder) + nexp = nexp[neworder] + for select_idx in range(len(nexp) + 1): + # This crazy piece of code performs a list selection + # using HDF5 hyperslabs. + # For each index, perform a "NOTB" selection on every + # portion of *this axis* which falls *outside* the list + # selection. For this to work, the input array MUST be + # monotonically increasing. + if select_idx < len(nexp): + validate_number(nexp[select_idx], length) + if select_idx == 0: + start = 0 + count = nexp[0] + elif select_idx == len(nexp): + start = nexp[-1] + 1 + count = length - start + else: + start = nexp[select_idx - 1] + 1 + count = nexp[select_idx] - start + if count > 0: + selection.append((start, count, 1, idx, "NOTB")) + + mshape = tuple(x for x in mshape if x != 0) + return selection, reorder, mshape + + def __getitem__(self, key: SelectionType) -> list | np.ndarray: + """Get a row, a range of rows or a slice from the array. + + The set of tokens allowed for the key is the same as that for extended + slicing in Python (including the Ellipsis or ... token). The result is + an object of the current flavor; its shape depends on the kind of slice + used as key and the shape of the array itself. + + Furthermore, NumPy-style fancy indexing, where a list of indices in a + certain axis is specified, is also supported. Note that only one list + per selection is supported right now. Finally, NumPy-style point and + boolean selections are supported as well. + + Examples + -------- + :: + + array1 = array[4] # simple selection + array2 = array[4:1000:2] # slice selection + array3 = array[1, ..., ::2, 1:4, 4:] # general slice selection + array4 = array[1, [1,5,10], ..., -1] # fancy selection + array5 = array[np.where(array[:] > 4)] # point selection + array6 = array[array[:] > 4] # boolean selection + + """ + self._g_check_open() + + try: + # First, try with a regular selection + startl, stopl, stepl, shape = self._interpret_indexing(key) + arr = self._read_slice(startl, stopl, stepl, shape) + except TypeError: + # Then, try with a point-wise selection + try: + coords = self._point_selection(key) + arr = self._read_coords(coords) + except TypeError: + # Finally, try with a fancy selection + selection, reorder, shape = self._fancy_selection(key) + arr = self._read_selection(selection, reorder, shape) + + if self.flavor == "numpy" or not self._v_convert: + return arr + + return internal_to_flavor(arr, self.flavor) + + def __setitem__(self, key: SelectionType, value: Any) -> None: + """Set a row, a range of rows or a slice in the array. + + It takes different actions depending on the type of the key parameter: + if it is an integer, the corresponding array row is set to value (the + value is broadcast when needed). If key is a slice, the row slice + determined by it is set to value (as usual, if the slice to be updated + exceeds the actual shape of the array, only the values in the existing + range are updated). + + If value is a multidimensional object, then its shape must be + compatible with the shape determined by key, otherwise, a ValueError + will be raised. + + Furthermore, NumPy-style fancy indexing, where a list of indices in a + certain axis is specified, is also supported. Note that only one list + per selection is supported right now. Finally, NumPy-style point and + boolean selections are supported as well. + + Examples + -------- + :: + + a1[0] = 333 # assign an integer to an Integer Array row + a2[0] = 'b' # assign a string to a string Array row + a3[1:4] = 5 # broadcast 5 to slice 1:4 + a4[1:4:2] = 'xXx' # broadcast 'xXx' to slice 1:4:2 + + # General slice update (a5.shape = (4,3,2,8,5,10). + a5[1, ..., ::2, 1:4, 4:] = numpy.arange(1728, shape=(4,3,2,4,3,6)) + a6[1, [1,5,10], ..., -1] = arr # fancy selection + a7[np.where(a6[:] > 4)] = 4 # point selection + broadcast + a8[arr > 4] = arr2 # boolean selection + + """ + self._g_check_open() + + # Create an array compliant with the specified slice + nparr = convert_to_np_atom2(value, self.atom) + if nparr.size == 0: + return + + # truncate data if least_significant_digit filter is set + # TODO: add the least_significant_digit attribute to the array on disk + if ( + self.filters.least_significant_digit is not None + and not np.issubdtype(nparr.dtype, np.signedinteger) + ): + nparr = quantize(nparr, self.filters.least_significant_digit) + + try: + startl, stopl, stepl, shape = self._interpret_indexing(key) + self._write_slice(startl, stopl, stepl, shape, nparr) + except TypeError: + # Then, try with a point-wise selection + try: + coords = self._point_selection(key) + self._write_coords(coords, nparr) + except TypeError: + selection, reorder, shape = self._fancy_selection(key) + self._write_selection(selection, reorder, shape, nparr) + + def _check_shape( + self, nparr: np.ndarray, slice_shape: tuple[int, ...] + ) -> np.ndarray: + """Test that nparr shape is consistent with underlying object. + + If not, try creating a new nparr object, using broadcasting if + necessary. + + """ + if nparr.shape != (slice_shape + self.atom.dtype.shape): + # Create an array compliant with the specified shape + narr = np.empty(shape=slice_shape, dtype=self.atom.dtype) + + # Assign the value to it. It will raise a ValueError exception + # if the objects cannot be broadcast to a single shape. + narr[...] = nparr + return narr + else: + return nparr + + def _read_slice( + self, + startl: np.ndarray, + stopl: np.ndarray, + stepl: np.ndarray, + shape: list[int], + ) -> np.ndarray: + """Read a slice based on `startl`, `stopl` and `stepl`.""" + nparr = np.empty(dtype=self.atom.dtype, shape=shape) + # Protection against reading empty arrays + if 0 not in shape: + # Arrays that have non-zero dimensionality + self._g_read_slice(startl, stopl, stepl, nparr) + # For zero-shaped arrays, return the scalar + if nparr.shape == (): + nparr = nparr[()] + return nparr + + def _read_coords(self, coords: np.ndarray) -> np.ndarray: + """Read a set of points defined by `coords`.""" + nparr = np.empty(dtype=self.atom.dtype, shape=len(coords)) + if len(coords) > 0: + self._g_read_coords(coords, nparr) + # For zero-shaped arrays, return the scalar + if nparr.shape == (): + nparr = nparr[()] + return nparr + + def _read_selection( + self, + selection: list[tuple[int, int, int, int, str]], + reorder: tuple[int, npt.ArrayLike] | None, + shape: tuple[int, ...], + ) -> np.ndarray: + """Read a `selection`. + + Reorder if necessary. + + """ + # Create the container for the slice + nparr = np.empty(dtype=self.atom.dtype, shape=shape) + # Arrays that have non-zero dimensionality + self._g_read_selection(selection, nparr) + # For zero-shaped arrays, return the scalar + if nparr.shape == (): + nparr = nparr[()] + elif reorder is not None: + # We need to reorder the array + idx, neworder = reorder + k = [slice(None)] * len(shape) + k[idx] = neworder.argsort() + # Apparently, a copy is not needed here, but doing it + # for symmetry with the `_write_selection()` method. + nparr = nparr[tuple(k)].copy() + return nparr + + def _write_slice( + self, + startl: np.ndarray, + stopl: np.ndarray, + stepl: np.ndarray, + shape: list[int], + nparr: np.ndarray, + ) -> None: + """Write `nparr` in a slice based on `startl`, `stopl` and `stepl`.""" + nparr = self._check_shape(nparr, tuple(shape)) + countl = ((stopl - startl - 1) // stepl) + 1 + self._g_write_slice(startl, stepl, countl, nparr) + + def _write_coords(self, coords: np.ndarray, nparr: np.ndarray) -> None: + """Write `nparr` values in points defined by `coords` coordinates.""" + if len(coords) > 0: + nparr = self._check_shape(nparr, (len(coords),)) + self._g_write_coords(coords, nparr) + + def _write_selection( + self, + selection: list[tuple[int, int, int, int, str]], + reorder: tuple[int, npt.ArrayLike] | None, + shape: tuple[int, ...], + nparr: np.ndarray, + ) -> None: + """Write `nparr` in `selection`. + + Reorder if necessary. + + """ + nparr = self._check_shape(nparr, tuple(shape)) + # Check whether we should reorder the array + if reorder is not None: + idx, neworder = reorder + k = [slice(None)] * len(shape) + k[idx] = neworder + # For a reason a don't understand well, we need a copy of + # the reordered array + nparr = nparr[tuple(k)].copy() + self._g_write_selection(selection, nparr) + + def _read( + self, start: int, stop: int, step: int, out: np.ndarray | None = None + ) -> np.ndarray: + """Read the array from disk without slice or flavor processing.""" + nrowstoread = len(range(start, stop, step)) + shape = list(self.shape) + if shape: + shape[self.maindim] = nrowstoread + if out is None: + arr = np.empty(dtype=self.atom.dtype, shape=shape) + else: + bytes_required = self.rowsize * nrowstoread + # if buffer is too small, it will segfault + if bytes_required != out.nbytes: + raise ValueError( + f"output array size invalid, got {out.nbytes}" + f" bytes, need {bytes_required} bytes" + ) + if not out.flags["C_CONTIGUOUS"]: + raise ValueError("output array not C contiguous") + arr = out + # Protection against reading empty arrays + if 0 not in shape: + # Arrays that have non-zero dimensionality + self._read_array(start, stop, step, arr) + # data is always read in the system byteorder + # if the out array's byteorder is different, do a byteswap + if ( + out is not None + and byteorders[arr.dtype.byteorder] != sys.byteorder + ): + arr.byteswap(True) + return arr + + def read( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + out: np.ndarray | None = None, + ) -> np.ndarray: + """Get data in the array as an object of the current flavor. + + The start, stop and step parameters can be used to select only a + *range of rows* in the array. Their meanings are the same as in + the built-in range() Python function, except that negative values + of step are not allowed yet. Moreover, if only start is specified, + then stop will be set to start + 1. If you do not specify neither + start nor stop, then *all the rows* in the array are selected. + + The out parameter may be used to specify a NumPy array to receive + the output data. Note that the array must have the same size as + the data selected with the other parameters. Note that the array's + datatype is not checked and no type casting is performed, so if it + does not match the datatype on disk, the output will not be correct. + Also, this parameter is only valid when the array's flavor is set + to 'numpy'. Otherwise, a TypeError will be raised. + + When data is read from disk in NumPy format, the output will be + in the current system's byteorder, regardless of how it is stored + on disk. + The exception is when an output buffer is supplied, in which case + the output will be in the byteorder of that output buffer. + + .. versionchanged:: 3.0 + Added the *out* parameter. + + """ + self._g_check_open() + if out is not None and self.flavor != "numpy": + msg = ( + f"Optional 'out' argument may only be supplied if array " + f"flavor is 'numpy', currently is {self.flavor}" + ) + raise TypeError(msg) + start, stop, step = self._process_range_read(start, stop, step) + arr = self._read(start, stop, step, out) + return internal_to_flavor(arr, self.flavor) + + def _g_copy_with_stats( + self, + group: Group, + name: str, + start: int, + stop: int, + step: int, + title: str, + filters: Filters, + chunkshape: tuple[int, ...], + _log: bool, + **kwargs, + ) -> tuple[Array, int]: + """Private part of Leaf.copy() for each kind of leaf.""" + # Compute the correct indices. + start, stop, step = self._process_range_read(start, stop, step) + # Get the slice of the array + # (non-buffered version) + if self.shape: + arr = self[start:stop:step] + else: + arr = self[()] + # Build the new Array object. Use the _atom reserved keyword + # just in case the array is being copied from a native HDF5 + # with atomic types different from scalars. + # For details, see #275 of trac. + object_ = Array( + group, name, arr, title=title, _log=_log, _atom=self.atom + ) + nbytes = np.prod(self.shape, dtype=SizeType) * self.atom.size + + return (object_, nbytes) + + def __repr__(self) -> str: + """Provide more metainfo in addition to standard __str__.""" + return f"""{self} + atom := {self.atom!r} + maindim := {self.maindim!r} + flavor := {self.flavor!r} + byteorder := {self.byteorder!r} + chunkshape := {self.chunkshape!r}""" + + +class ImageArray(Array): + """Array containing an image. + + This class has no additional behaviour or functionality compared to + that of an ordinary array. It simply enables the user to open an + ``IMAGE`` HDF5 node as a normal `Array` node in PyTables. + + """ + + # Class identifier. + _c_classid = "IMAGE" diff --git a/venv/Lib/site-packages/tables/atom.py b/venv/Lib/site-packages/tables/atom.py new file mode 100644 index 0000000..22a2e76 --- /dev/null +++ b/venv/Lib/site-packages/tables/atom.py @@ -0,0 +1,1374 @@ +"""Atom classes for describing dataset contents.""" + +from __future__ import annotations + +import re +import pickle +import inspect +import warnings +from typing import Any, NoReturn, dataclass_transform +from collections.abc import Callable + +import numpy as np +from numpy.typing import DTypeLike + +from .utils import SizeType +from .misc.enum import Enum +from .exceptions import FlavorWarning + +Shape = tuple[SizeType, ...] + +__docformat__ = "reStructuredText" +"""The format of documentation strings in this module.""" + +all_types = set() # filled as atom classes are created +"""Set of all PyTables types.""" + +atom_map: dict[str, Any] = {} # filled as atom classes are created +"""Maps atom kinds to item sizes and atom classes. + +If there is a fixed set of possible item sizes for a given kind, the +kind maps to another mapping from item size in bytes to atom class. +Otherwise, the kind maps directly to the atom class. +""" + +deftype_from_kind = {} # filled as atom classes are created +"""Maps atom kinds to their default atom type (if any).""" + + +_type_re = re.compile(r"^([a-z]+)([0-9]*)$") + + +def split_type(type_: str) -> tuple[str, int | None]: + """Split a PyTables type into a PyTables kind and an item size. + + Returns a tuple of (kind, itemsize). If no item size is present in the type + (in the form of a precision), the returned item size is None:: + + >>> split_type('int32') + ('int', 4) + >>> split_type('string') + ('string', None) + >>> split_type('int20') + Traceback (most recent call last): + ... + ValueError: precision must be a multiple of 8: 20 + >>> split_type('foo bar') + Traceback (most recent call last): + ... + ValueError: malformed type: 'foo bar' + + """ + match = _type_re.match(type_) + if not match: + raise ValueError("malformed type: %r" % type_) + kind, precision = match.groups() + itemsize = None + if precision: + precision = int(precision) + itemsize, remainder = divmod(precision, 8) + if remainder: # 0 could be a valid item size + raise ValueError( + "precision must be a multiple of 8: %d" % precision + ) + return (kind, itemsize) + + +def _invalid_itemsize_error( + kind: str, itemsize: int, itemsizes: list[int] +) -> ValueError: + isizes = sorted(itemsizes) + return ValueError( + "invalid item size for kind ``%s``: %r; " + "it must be one of ``%r``" % (kind, itemsize, isizes) + ) + + +def _normalize_shape(shape: Shape | np.integer | int) -> Shape: + """Check that the `shape` is safe to be used and return it as a tuple.""" + if isinstance(shape, (np.integer, int)): + if shape < 1: + raise ValueError("shape value must be greater than 0: %d" % shape) + shape = (shape,) # N is a shorthand for (N,) + try: + shape = tuple(shape) + except TypeError: + raise TypeError(f"shape must be an integer or sequence: {shape!r}") + + # XXX Get from HDF5 library if possible. + # HDF5 does not support ranks greater than 32 + if len(shape) > 32: + raise ValueError(f"shapes with rank > 32 are not supported: {shape!r}") + + return tuple(SizeType(s) for s in shape) + + +def _normalize_default(value: Any, dtype: DTypeLike) -> np.ndarray: + """Return `value` as a valid default of NumPy type `dtype`.""" + # Create NumPy objects as defaults + # This is better in order to serialize them as attributes + if value is None: + value = 0 + basedtype = dtype.base + try: + default = np.array(value, dtype=basedtype) + except ValueError: + array = np.array(value) + if array.shape != basedtype.shape: + raise + # Maybe nested dtype with "scalar" value. + default = np.array(value, dtype=basedtype.base) + # 0-dim arrays will be representented as NumPy scalars + # (PyTables attribute convention) + if default.shape == (): + default = default[()] + return default + + +def _cmp_dispatcher(other_method_name: str) -> Callable[[Any, Any], bool]: + """Dispatch comparisons to a method of the *other* object. + + Returns a new *rich comparison* method which dispatches calls to + the method `other_method_name` of the *other* object. If there is + no such method in the object, ``False`` is returned. + + This is part of the implementation of a double dispatch pattern. + """ + + def dispatched_cmp(self, other) -> bool: + try: + other_method: Callable[[Any], bool] = getattr( + other, other_method_name + ) + except AttributeError: + return False + return other_method(self) + + return dispatched_cmp + + +@dataclass_transform() +class MetaAtom(type): + """Atom metaclass. + + This metaclass ensures that data about atom classes gets inserted + into the suitable registries. + + """ + + kind: str + + def __init__(cls, name: str, bases: tuple, dict_: dict[str, Any]) -> None: + super().__init__(name, bases, dict_) + + kind = dict_.get("kind") + itemsize = dict_.get("itemsize") + type_ = dict_.get("type") + deftype = dict_.get("_deftype") + + if kind and deftype: + deftype_from_kind[kind] = deftype + + if type_: + all_types.add(type_) + + if kind and itemsize and not hasattr(itemsize, "__int__"): + # Atom classes with a non-fixed item size do have an + # ``itemsize``, but it's not a number (e.g. property). + atom_map[kind] = cls + return + + if kind: # first definition of kind, make new entry + atom_map[kind] = {} + + if itemsize and hasattr(itemsize, "__int__"): # fixed + kind = cls.kind # maybe from superclasses + atom_map[kind][int(itemsize)] = cls + + +class Atom(metaclass=MetaAtom): + """Defines the type of atomic cells stored in a dataset. + + The meaning of *atomic* is that individual elements of a cell can + not be extracted directly by indexing (i.e. __getitem__()) the + dataset; e.g. if a dataset has shape (2, 2) and its atoms have + shape (3,), to get the third element of the cell at (1, 0) one + should use dataset[1,0][2] instead of dataset[1,0,2]. + + The Atom class is meant to declare the different properties of the + *base element* (also known as *atom*) of CArray, EArray and + VLArray datasets, although they are also used to describe the base + elements of Array datasets. Atoms have the property that their + length is always the same. However, you can grow datasets along + the extensible dimension in the case of EArray or put a variable + number of them on a VLArray row. Moreover, they are not restricted + to scalar values, and they can be *fully multidimensional + objects*. + + Parameters + ---------- + nptype : str or np.dtype + Sets the Numpy data type of the atom. + shape : tuple + Sets the shape of the atom. An integer shape of + N is equivalent to the tuple (N,). + dflt : Any + Sets the default value for the atom. + + The following are the public methods and attributes of the Atom class. + + Notes + ----- + A series of descendant classes are offered in order to make the + use of these element descriptions easier. You should use a + particular Atom descendant class whenever you know the exact type + you will need when writing your code. Otherwise, you may use one + of the Atom.from_*() factory Methods. + + .. rubric:: Atom attributes + + .. attribute:: dflt + + The default value of the atom. + + If the user does not supply a value for an element while + filling a dataset, this default value will be written to disk. + If the user supplies a scalar value for a multidimensional + atom, this value is automatically *broadcast* to all the items + in the atom cell. If dflt is not supplied, an appropriate zero + value (or *null* string) will be chosen by default. Please + note that default values are kept internally as NumPy objects. + + .. attribute:: dtype + + The NumPy dtype that most closely matches this atom. + + .. attribute:: itemsize + + Size in bytes of a single item in the atom. + Specially useful for atoms of the string kind. + + .. attribute:: kind + + The PyTables kind of the atom (a string). + + .. attribute:: shape + + The shape of the atom (a tuple for scalar atoms). + + .. attribute:: type + + The PyTables type of the atom (a string). + + Atoms can be compared with atoms and other objects for + strict (in)equality without having to compare individual + attributes:: + + >>> atom1 = StringAtom(itemsize=10) # same as ``atom2`` + >>> atom2 = Atom.from_kind('string', 10) # same as ``atom1`` + >>> atom3 = IntAtom() + >>> bool(atom1 == 'foo') + False + >>> bool(atom1 == atom2) + True + >>> bool(atom2 != atom1) + False + >>> bool(atom1 == atom3) + False + >>> bool(atom3 != atom2) + True + + """ + + dflt: Any + + dtype: np.dtype + + itemsize: int + + kind: str + + shape: Shape + + type: str # noqa: A003 + + @classmethod + def prefix(cls) -> str: + """Return the atom class prefix.""" + cname = cls.__name__ + return cname[: cname.rfind("Atom")] + + @classmethod + def from_sctype( + cls, sctype: str | np.dtype, shape: Shape = (), dflt: Any = None + ) -> Atom: + """Create an Atom from a NumPy scalar type sctype. + + Optional shape and default value may be specified as the + shape and dflt + arguments, respectively. Information in the + sctype not represented in an Atom is ignored:: + + >>> import numpy as np + >>> Atom.from_sctype(np.int16, shape=(2, 2)) + Int16Atom(shape=(2, 2), dflt=0) + >>> Atom.from_sctype('S5', dflt='hello') + Traceback (most recent call last): + ... + ValueError: unknown NumPy scalar type: 'S5' + >>> Atom.from_sctype('float64') + Float64Atom(shape=(), dflt=0.0) + + """ + if not isinstance(sctype, type) or not issubclass(sctype, np.generic): + assert isinstance(sctype, str) + if "," in sctype: + raise ValueError(f"unknown NumPy scalar type: {sctype!r}") + try: + dtype = np.dtype(sctype) + except TypeError: + raise ValueError( + f"unknown NumPy scalar type: {sctype!r}" + ) from None + if issubclass(dtype.type, np.flexible) and dtype.itemsize > 0: + raise ValueError(f"unknown NumPy scalar type: {sctype!r}") + + sctype_resolved = dtype.type + else: + sctype_resolved = sctype + return cls.from_dtype(np.dtype((sctype_resolved, shape)), dflt) + + @classmethod + def from_dtype(cls, dtype: np.dtype, dflt: Any = None) -> Atom: + """Create an Atom from a NumPy dtype. + + An optional default value may be specified as the dflt + argument. Information in the dtype not represented in an Atom is + ignored:: + + >>> import numpy as np + >>> Atom.from_dtype(np.dtype((np.int16, (2, 2)))) + Int16Atom(shape=(2, 2), dflt=0) + >>> Atom.from_dtype(np.dtype('float64')) + Float64Atom(shape=(), dflt=0.0) + + Note: for easier use in Python 3, where all strings lead to the + Unicode dtype, this dtype will also generate a StringAtom. Since + this is only viable for strings that are castable as ascii, a + warning is issued. + + >>> Atom.from_dtype(np.dtype('U20')) # doctest: +SKIP + Atom.py:392: FlavorWarning: support for unicode type is very + limited, and only works for strings that can be cast as ascii + StringAtom(itemsize=20, shape=(), dflt=b'') + + """ + basedtype = dtype.base + shape = tuple(SizeType(i) for i in dtype.shape) + if basedtype.names: + raise ValueError( + "compound data types are not supported: %r" % dtype + ) + if basedtype.shape != (): + raise ValueError("nested data types are not supported: %r" % dtype) + if basedtype.kind == "S": # can not reuse something like 'string80' + itemsize = basedtype.itemsize + return cls.from_kind("string", itemsize, shape, dflt) + elif basedtype.kind == "U": + # workaround for unicode type (standard string type in Python 3) + warnings.warn( + "support for unicode type is very limited, and " + "only works for strings that can be cast as ascii", + FlavorWarning, + ) + itemsize = basedtype.itemsize // 4 + assert ( + str(itemsize) in basedtype.str + ), "something went wrong in handling unicode." + return cls.from_kind("string", itemsize, shape, dflt) + # Most NumPy types have direct correspondence with PyTables types. + return cls.from_type(basedtype.name, shape, dflt) + + @classmethod + def from_type( + cls, type_: str, shape: Shape = (), dflt: Any = None + ) -> Atom: + """Create an Atom from a PyTables type. + + Optional shape and default value may be specified as the + shape and dflt arguments, respectively:: + + >>> Atom.from_type('bool') + BoolAtom(shape=(), dflt=False) + >>> Atom.from_type('int16', shape=(2, 2)) + Int16Atom(shape=(2, 2), dflt=0) + >>> Atom.from_type('string40', dflt='hello') + Traceback (most recent call last): + ... + ValueError: unknown type: 'string40' + >>> Atom.from_type('Float64') + Traceback (most recent call last): + ... + ValueError: unknown type: 'Float64' + + """ + if type_ not in all_types: + raise ValueError(f"unknown type: {type_!r}") + kind, itemsize = split_type(type_) + return cls.from_kind(kind, itemsize, shape, dflt) + + @classmethod + def from_kind( + cls, + kind: str, + itemsize: int | None = None, + shape: Shape = (), + dflt: Any = None, + ) -> Atom: + """Create an Atom from a PyTables kind. + + Optional item size, shape and default value may be + specified as the itemsize, shape and dflt + arguments, respectively. Bear in mind that not all atoms support + a default item size:: + + >>> Atom.from_kind('int', itemsize=2, shape=(2, 2)) + Int16Atom(shape=(2, 2), dflt=0) + >>> Atom.from_kind('int', shape=(2, 2)) + Int32Atom(shape=(2, 2), dflt=0) + >>> Atom.from_kind('int', shape=1) + Int32Atom(shape=(1,), dflt=0) + >>> Atom.from_kind('string', dflt=b'hello') + Traceback (most recent call last): + ... + ValueError: no default item size for kind ``string`` + >>> Atom.from_kind('Float') + Traceback (most recent call last): + ... + ValueError: unknown kind: 'Float' + + Moreover, some kinds with atypical constructor signatures + are not supported; you need to use the proper + constructor:: + + >>> Atom.from_kind('enum') #doctest: +ELLIPSIS + Traceback (most recent call last): + ... + ValueError: the ``enum`` kind is not supported... + + """ + kwargs: dict[str, Any] = {"shape": shape} + if kind not in atom_map: + raise ValueError(f"unknown kind: {kind!r}") + # This incompatibility detection may get out-of-date and is + # too hard-wired, but I couldn't come up with something + # smarter. -- Ivan (2007-02-08) + if kind in ["enum"]: + raise ValueError( + "the ``%s`` kind is not supported; " + "please use the appropriate constructor" % kind + ) + # If no `itemsize` is given, try to get the default type of the + # kind (which has a fixed item size). + if itemsize is None: + if kind not in deftype_from_kind: + raise ValueError("no default item size for kind ``%s``" % kind) + type_ = deftype_from_kind[kind] + kind, itemsize = split_type(type_) + kdata = atom_map[kind] + # Look up the class and set a possible item size. + if hasattr(kdata, "kind"): # atom class: non-fixed item size + atomclass = kdata + kwargs["itemsize"] = itemsize + else: # dictionary: fixed item size + if itemsize not in kdata: + raise _invalid_itemsize_error(kind, itemsize, kdata) + atomclass = kdata[itemsize] + # Only set a `dflt` argument if given (`None` may not be understood). + if dflt is not None: + kwargs["dflt"] = dflt + + return atomclass(**kwargs) + + @property + def size(self) -> int: + """Total size in bytes of the atom.""" + return self.dtype.itemsize + + @property + def recarrtype(self) -> str: + """Return the string type to be used in `numpy.rec.array()`.""" + return str(self.dtype.shape) + self.dtype.base.str[1:] + + @property + def ndim(self) -> int: + """Return the number of dimensions of the atom. + + .. versionadded:: 2.4 + """ + return len(self.shape) + + def __init__( + self, nptype: str | np.dtype, shape: Shape, dflt: Any + ) -> None: + if not hasattr(self, "type"): + raise NotImplementedError( + f"``{self.__class__.__name__}`` is an abstract class; " + f"please use one of its subclasses" + ) + self.shape = shape = _normalize_shape(shape) + """The shape of the atom (a tuple for scalar atoms).""" + # Curiously enough, NumPy isn't generally able to accept NumPy + # integers in a shape. ;( + npshape = tuple(int(s) for s in shape) + self.dtype = dtype = np.dtype((nptype, npshape)) + """The NumPy dtype that most closely matches this atom.""" + self.dflt = _normalize_default(dflt, dtype) + """The default value of the atom. + + If the user does not supply a value for an element while + filling a dataset, this default value will be written to + disk. If the user supplies a scalar value for a + multidimensional atom, this value is automatically *broadcast* + to all the items in the atom cell. If dflt is not supplied, an + appropriate zero value (or *null* string) will be chosen by + default. Please note that default values are kept internally + as NumPy objects.""" + + def __repr__(self) -> str: + args = f"shape={self.shape}, dflt={self.dflt!r}" + if not hasattr(self.__class__.itemsize, "__int__"): # non-fixed + args = f"itemsize={self.itemsize}, {args}" + return f"{self.__class__.__name__}({args})" + + __eq__ = _cmp_dispatcher("_is_equal_to_atom") + + def __ne__(self, other: Atom) -> bool: + return not self.__eq__(other) + + # XXX: API incompatible change for PyTables 3 line + # Overriding __eq__ blocks inheritance of __hash__ in 3.x + # def __hash__(self): + # return hash((self.__class__, self.type, self.shape, self.itemsize, + # self.dflt)) + + def copy(self, **override) -> Atom: + """Get a copy of the atom, possibly overriding some arguments. + + Constructor arguments to be overridden must be passed as + keyword arguments:: + + >>> atom1 = Int32Atom(shape=12) + >>> atom2 = atom1.copy() + >>> print(atom1) + Int32Atom(shape=(12,), dflt=0) + >>> print(atom2) + Int32Atom(shape=(12,), dflt=0) + >>> atom1 is atom2 + False + >>> atom3 = atom1.copy(shape=(2, 2)) + >>> print(atom3) + Int32Atom(shape=(2, 2), dflt=0) + >>> atom1.copy(foobar=42) #doctest: +ELLIPSIS + Traceback (most recent call last): + ... + TypeError: ...__init__() got an unexpected keyword argument ... + + """ + newargs = self._get_init_args() + newargs.update(override) + return self.__class__(**newargs) + + def _get_init_args(self) -> dict[str, Any]: + """Get a dictionary of instance constructor arguments. + + This implementation works on classes which use the same names + for both constructor arguments and instance attributes. + + """ + signature = inspect.signature(self.__init__) + parameters = signature.parameters + args = [ + arg + for arg, p in parameters.items() + if p.kind is p.POSITIONAL_OR_KEYWORD + ] + + return {arg: getattr(self, arg) for arg in args if arg != "self"} + + def _is_equal_to_atom(self, atom: Atom) -> bool: + """Return True if the object is equal to the given `atom`.""" + return ( + self.type == atom.type + and self.shape == atom.shape + and self.itemsize == atom.itemsize + and np.all(self.dflt == atom.dflt) + ) + + +def _abstract_atom_init( + deftype: str, defvalue: Any +) -> Callable[[Atom, int | None, Shape, Any], None]: + """Return a constructor for an abstract `Atom` class.""" + defitemsize = split_type(deftype)[1] + + def __init__( # noqa: N807 + self: Atom, + itemsize: int | None = defitemsize, + shape: Shape = (), + dflt: Any = defvalue, + ) -> None: + assert self.kind in atom_map + try: + atomclass = atom_map[self.kind][itemsize] + except KeyError: + raise _invalid_itemsize_error( + self.kind, itemsize, atom_map[self.kind] + ) + self.__class__ = atomclass + atomclass.__init__(self, shape, dflt) + + return __init__ + + +class StringAtom(Atom): # type: ignore[misc] + """Defines an atom of type string. + + The item size is the *maximum* length in characters of strings. + + """ + + kind: str = "string" + type: str = "string" # noqa: A003 + _defvalue: bytes = b"" + + @property # type: ignore[misc] + def itemsize(self) -> int: # type: ignore[override] + """Size in bytes of a sigle item in the atom.""" + return self.dtype.base.itemsize + + def __init__( + self, itemsize: int, shape: Shape = (), dflt: str | bytes = _defvalue + ) -> None: + if not hasattr(itemsize, "__int__") or int(itemsize) < 0: + raise ValueError( + f"invalid item size for kind ``string``: {itemsize!r}; " + f"it must be a positive integer" + ) + Atom.__init__(self, f"S{itemsize}", shape, dflt) + + +class BoolAtom(Atom): # type: ignore[misc] + """Defines an atom of type bool.""" + + kind: str = "bool" + itemsize: int = 1 + type: str = "bool" # noqa: A003 + _deftype = "bool8" + _defvalue = False + + def __init__(self, shape: Shape = (), dflt: bool = _defvalue) -> None: + Atom.__init__(self, self.type, shape, dflt) + + +class IntAtom(Atom): # type: ignore[misc] + """Defines an atom of a signed integral type (int kind).""" + + kind: str = "int" + signed: bool = True + _deftype = "int32" + _defvalue = 0 + __init__ = _abstract_atom_init( + _deftype, _defvalue + ) # type: ignore[assignment] + + +class UIntAtom(Atom): # type: ignore[misc] + """Defines an atom of an unsigned integral type (uint kind).""" + + kind: str = "uint" + signed: bool = False + _deftype = "uint32" + _defvalue = 0 + __init__ = _abstract_atom_init( + _deftype, _defvalue + ) # type: ignore[assignment] + + +class FloatAtom(Atom): # type: ignore[misc] + """Defines an atom of a floating point type (float kind).""" + + kind: str = "float" + _deftype = "float64" + _defvalue = 0.0 + __init__ = _abstract_atom_init( + _deftype, _defvalue + ) # type: ignore[assignment] + + +class Int8Atom(IntAtom): # type: ignore[misc] + """Atom for 8 bit integers.""" + + itemsize: int = 1 + type: str = "int8" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: int = 0) -> None: + Atom.__init__(self, "int8", shape, dflt) + + +class Int16Atom(IntAtom): # type: ignore[misc] + """Atom for 12 bit integers.""" + + itemsize: int = 2 + type: str = "int16" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: int = 0) -> None: + Atom.__init__(self, "int16", shape, dflt) + + +class Int32Atom(IntAtom): # type: ignore[misc] + """Atom for 32 bit integers.""" + + itemsize: int = 4 + type: str = "int32" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: int = 0) -> None: + Atom.__init__(self, "int32", shape, dflt) + + +class Int64Atom(IntAtom): # type: ignore[misc] + """Atom for 64 bit integers.""" + + itemsize: int = 8 + type: str = "int64" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: int = 0) -> None: + Atom.__init__(self, "int64", shape, dflt) + + +class UInt8Atom(UIntAtom): # type: ignore[misc] + """Atom for 8 bit unsoged integers.""" + + itemsize: int = 1 + type: str = "uint8" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: int = 0) -> None: + Atom.__init__(self, "uint8", shape, dflt) + + +class UInt16Atom(UIntAtom): # type: ignore[misc] + """Atom for 16 bit unsigned integers.""" + + itemsize: int = 2 + type: str = "uint16" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: int = 0) -> None: + Atom.__init__(self, "uint16", shape, dflt) + + +class UInt32Atom(UIntAtom): # type: ignore[misc] + """Atom for 32 bit unsigned integers.""" + + itemsize: int = 4 + type: str = "uint32" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: int = 0) -> None: + Atom.__init__(self, "uint32", shape, dflt) + + +class UInt64Atom(UIntAtom): # type: ignore[misc] + """Atom for 16 bit unsigned integers.""" + + itemsize: int = 8 + type: str = "uint64" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: int = 0) -> None: + Atom.__init__(self, "uint64", shape, dflt) + + +if hasattr(np, "float16"): + + class Float16Atom(FloatAtom): # type: ignore[misc] + """FLoat 16 atom.""" + + itemsize: int = 2 + type: str = "float16" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: float = 0.0) -> None: + Atom.__init__(self, "float16", shape, dflt) + + +class Float32Atom(FloatAtom): # type: ignore[misc] + """Float 32 atom.""" + + itemsize: int = 4 + type: str = "float32" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: float = 0.0) -> None: + Atom.__init__(self, "float32", shape, dflt) + + +class Float64Atom(FloatAtom): # type: ignore[misc] + """Float 64 atom.""" + + itemsize: int = 8 + type: str = "float64" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: float = 0.0) -> None: + Atom.__init__(self, "float64", shape, dflt) + + +if hasattr(np, "float96"): + + class Float96Atom(FloatAtom): # type: ignore[misc] + """Float 96 atom.""" + + itemsize: int = 12 + type: str = "float96" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: float = 0.0) -> None: + Atom.__init__(self, "float96", shape, dflt) + + +if hasattr(np, "float128"): + + class Float128Atom(FloatAtom): # type: ignore[misc] + """Float 128 atom.""" + + itemsize: int = 16 + type: str = "float128" # noqa: A003 + + def __init__(self, shape: Shape = (), dflt: float = 0.0) -> None: + Atom.__init__(self, "float128", shape, dflt) + + +class ComplexAtom(Atom): + """Defines an atom of kind complex. + + Allowed item sizes are 8 (single precision) and 16 (double precision). This + class must be used instead of more concrete ones to avoid confusions with + numarray-like precision specifications used in PyTables 1.X. + + """ + + # This definition is a little more complex (no pun intended) + # because, although the complex kind is a normal numerical one, + # the usage of bottom-level classes is artificially forbidden. + # Everything will be back to normality when people has stopped + # using the old bottom-level complex classes. + + kind = "complex" + _deftype = "complex128" + _defvalue = 0j + _isizes = [8, 16] + + @property # type: ignore[misc] + def itemsize(self) -> int: # type: ignore[override] + """Size in bytes of a sigle item in the atom.""" + return self.dtype.base.itemsize + + # Only instances have a `type` attribute, so complex types must be + # registered by hand. + all_types.add("complex64") + all_types.add("complex128") + if hasattr(np, "complex192"): + all_types.add("complex192") + _isizes.append(24) + if hasattr(np, "complex256"): + all_types.add("complex256") + _isizes.append(32) + + def __init__( + self, itemsize: int, shape: Shape = (), dflt: Any = _defvalue + ) -> None: + if itemsize not in self._isizes: + raise _invalid_itemsize_error("complex", itemsize, self._isizes) + self.type = "%s%d" % (self.kind, itemsize * 8) + Atom.__init__(self, self.type, shape, dflt) + + +class _ComplexErrorAtom(ComplexAtom, metaclass=type): + """Reminds the user to stop using the old complex atom names.""" + + def __init__( + self, shape: Shape = (), dflt=ComplexAtom._defvalue + ) -> NoReturn: + raise TypeError( + "to avoid confusions with PyTables 1.X complex atom names, " + "please use ``ComplexAtom(itemsize=N)``, " + "where N=8 for single precision complex atoms, " + "and N=16 for double precision complex atoms" + ) + + +Complex32Atom = Complex64Atom = Complex128Atom = _ComplexErrorAtom +if hasattr(np, "complex192"): + Complex192Atom = _ComplexErrorAtom +if hasattr(np, "complex256"): + Complex256Atom = _ComplexErrorAtom + + +class TimeAtom(Atom): # type: ignore[misc] + """Defines an atom of time type (time kind). + + There are two distinct supported types of time: a 32 bit integer value and + a 64 bit floating point value. Both of them reflect the number of seconds + since the Unix epoch. This atom has the property of being stored using the + HDF5 time datatypes. + + """ + + kind: str = "time" + _deftype = "time32" + _defvalue: int | float = 0 + __init__ = _abstract_atom_init( + _deftype, _defvalue + ) # type: ignore[assignment] + + +class Time32Atom(TimeAtom): # type: ignore[misc] + """Defines an atom of type time32.""" + + itemsize: int = 4 + type: str = "time32" # noqa: A003 + _defvalue = 0 + + def __init__(self, shape: Shape = (), dflt=_defvalue) -> None: + Atom.__init__(self, "int32", shape, dflt) + + +class Time64Atom(TimeAtom): # type: ignore[misc] + """Defines an atom of type time64.""" + + itemsize: int = 8 + type: str = "time64" # noqa: A003 + _defvalue: float = 0.0 + + def __init__(self, shape: Shape = (), dflt: float = _defvalue) -> None: + Atom.__init__(self, "float64", shape, dflt) + + +class EnumAtom(Atom): + """Description of an atom of an enumerated type. + + Instances of this class describe the atom type used to store enumerated + values. Those values belong to an enumerated type, defined by the first + argument (enum) in the constructor of the atom, which accepts the same + kinds of arguments as the Enum class (see :ref:`EnumClassDescr`). The + enumerated type is stored in the enum attribute of the atom. + + A default value must be specified as the second argument (dflt) in the + constructor; it must be the *name* (a string) of one of the enumerated + values in the enumerated type. When the atom is created, the corresponding + concrete value is broadcast and stored in the dflt attribute (setting + different default values for items in a multidimensional atom is not + supported yet). If the name does not match any value in the enumerated + type, a KeyError is raised. + + Another atom must be specified as the base argument in order to determine + the base type used for storing the values of enumerated values in memory + and disk. This *storage atom* is kept in the base attribute of the created + atom. As a shorthand, you may specify a PyTables type instead of the + storage atom, implying that this has a scalar shape. + + The storage atom should be able to represent each and every concrete value + in the enumeration. If it is not, a TypeError is raised. The default value + of the storage atom is ignored. + + The type attribute of enumerated atoms is always enum. + + Enumerated atoms also support comparisons with other objects:: + + >>> enum = ['T0', 'T1', 'T2'] + >>> atom1 = EnumAtom(enum, 'T0', 'int8') # same as ``atom2`` + >>> atom2 = EnumAtom(enum, 'T0', Int8Atom()) # same as ``atom1`` + >>> atom3 = EnumAtom(enum, 'T0', 'int16') + >>> atom4 = Int8Atom() + >>> atom1 == enum + False + >>> atom1 == atom2 + True + >>> atom2 != atom1 + False + >>> atom1 == atom3 + False + >>> atom1 == atom4 + False + >>> atom4 != atom1 + True + + Examples + -------- + The next C enum construction:: + + enum myEnum { + T0, + T1, + T2 + }; + + would correspond to the following PyTables + declaration:: + + >>> my_enum_atom = EnumAtom(['T0', 'T1', 'T2'], 'T0', 'int32') + + Please note the dflt argument with a value of 'T0'. Since the concrete + value matching T0 is unknown right now (we have not used explicit concrete + values), using the name is the only option left for defining a default + value for the atom. + + The chosen representation of values for this enumerated atom uses unsigned + 32-bit integers, which surely wastes quite a lot of memory. Another size + could be selected by using the base argument (this time with a full-blown + storage atom):: + + >>> my_enum_atom = EnumAtom(['T0', 'T1', 'T2'], 'T0', UInt8Atom()) + + You can also define multidimensional arrays for data elements:: + + >>> my_enum_atom = EnumAtom( + ... ['T0', 'T1', 'T2'], 'T0', base='uint32', shape=(3,2)) + + for 3x2 arrays of uint32. + + """ + + # Registering this class in the class map may be a little wrong, + # since the ``Atom.from_kind()`` method fails miserably with + # enumerations, as they don't support an ``itemsize`` argument. + # However, resetting ``__metaclass__`` to ``type`` doesn't seem to + # work and I don't feel like creating a subclass of ``MetaAtom``. + + kind = "enum" + type = "enum" # noqa: A003 + + @property # type: ignore[misc] + def itemsize(self) -> int: # type: ignore[override] + """Size in bytes of a single item in the atom.""" + return self.dtype.base.itemsize + + def _checkbase(self, base: Atom) -> None: + """Check the `base` storage atom.""" + if base.kind == "enum": + raise TypeError( + "can not use an enumerated atom " + "as a storage atom: %r" % base + ) + + # Check whether the storage atom can represent concrete values + # in the enumeration... + basedtype = base.dtype + pyvalues = [value for (name, value) in self.enum] + try: + npgenvalues = np.array(pyvalues) + except ValueError: + raise TypeError("concrete values are not uniformly-shaped") + try: + npvalues = np.array(npgenvalues, dtype=basedtype.base) + except ValueError: + raise TypeError( + "storage atom type is incompatible with " + "concrete values in the enumeration" + ) + if npvalues.shape[1:] != basedtype.shape: + raise TypeError( + "storage atom shape does not match that of " + "concrete values in the enumeration" + ) + if npvalues.tolist() != npgenvalues.tolist(): + raise TypeError( + "storage atom type lacks precision for " + "concrete values in the enumeration" + ) + + # ...with some implementation limitations. + if npvalues.dtype.kind not in ["i", "u"]: + raise NotImplementedError( + "only integer concrete values " + "are supported for the moment, sorry" + ) + if len(npvalues.shape) > 1: + raise NotImplementedError( + "only scalar concrete values " + "are supported for the moment, sorry" + ) + + def _get_init_args(self) -> dict[str, Any]: + """Get a dictionary of instance constructor arguments.""" + return { + "enum": self.enum, + "dflt": self._defname, + "base": self.base, + "shape": self.shape, + } + + def _is_equal_to_atom(self, atom) -> bool: + """Return True if the object is equal to the given `atom`.""" + return False + + def _is_equal_to_enumatom(self, enumatom: EnumAtom) -> bool: + """Return True if the object is equal to the given `enumatom`.""" + return ( + self.enum == enumatom.enum + and self.shape == enumatom.shape + and np.all(self.dflt == enumatom.dflt) + and self.base == enumatom.base + ) + + def __init__( + self, enum: Enum | Any, dflt: Any, base: Atom | str, shape: Shape = () + ) -> None: + if not isinstance(enum, Enum): + enum = Enum(enum) + self.enum = enum + + if isinstance(base, str): + base = Atom.from_type(base) + + self._checkbase(base) + self.base = base + assert isinstance(self.base, Atom) + + default = enum[dflt] # check default value + self._defname = dflt # kept for representation purposes + + # These are kept to ease dumping this particular + # representation of the enumeration to storage. + names, values = [], [] + for name, value in enum: + names.append(name) + values.append(value) + basedtype = self.base.dtype + + self._names = names + self._values = np.array(values, dtype=basedtype.base) + + Atom.__init__(self, basedtype, shape, default) + + def __repr__(self) -> str: + return "EnumAtom(enum={!r}, dflt={!r}, base={!r}, shape={!r})".format( + self.enum, + self._defname, + self.base, + self.shape, + ) + + __eq__ = _cmp_dispatcher("_is_equal_to_enumatom") + + # XXX: API incompatible change for PyTables 3 line + # Overriding __eq__ blocks inheritance of __hash__ in 3.x + # def __hash__(self): + # return hash((self.__class__, self.enum, self.shape, self.dflt, + # self.base)) + + +class ReferenceAtom(Atom): + """Defines an atom of type object to read references. + + This atom is read-only. + """ + + kind = "reference" + type = "object" # noqa: A003 + _deftype = "NoneType" + _defvalue = None + + @property # type: ignore[misc] + def itemsize(self) -> int: # type: ignore[override] + """Size in bytes of a single item in the atom.""" + return self.dtype.base.itemsize + + def __init__(self, shape: Shape = ()) -> None: + Atom.__init__(self, self.type, shape, self._defvalue) + + def __repr__(self) -> str: + return f"ReferenceAtom(shape={self.shape})" + + +# Pseudo-atom classes +# =================== +# +# Now, there come three special classes, `ObjectAtom`, `VLStringAtom` +# and `VLUnicodeAtom`, that actually do not descend from `Atom`, but +# which goal is so similar that they should be described here. +# Pseudo-atoms can only be used with `VLArray` datasets, and they do +# not support multidimensional values, nor multiple values per row. +# +# They can be recognised because they also have ``kind``, ``type`` and +# ``shape`` attributes, but no ``size``, ``itemsize`` or ``dflt`` +# ones. Instead, they have a ``base`` atom which defines the elements +# used for storage. +# +# See ``examples/vlarray1.py`` and ``examples/vlarray2.py`` for +# further examples on `VLArray` datasets, including object +# serialization and string management. + + +class PseudoAtom: + """Pseudo-atoms can only be used in ``VLArray`` nodes. + + They can be recognised because they also have `kind`, `type` and + `shape` attributes, but no `size`, `itemsize` or `dflt` ones. + Instead, they have a `base` atom which defines the elements used + for storage. + """ + + base: Atom + + def __repr__(self) -> str: + return "%s()" % self.__class__.__name__ + + def toarray(self, object_: Any) -> NoReturn: + """Convert an `object_` into an array of base atoms.""" + raise NotImplementedError + + def fromarray(self, array: Any) -> NoReturn: + """Convert an `array` of base atoms into an object.""" + raise NotImplementedError + + +class _BufferedAtom(PseudoAtom): + """Pseudo-atom which stores data as a buffer (flat array of uints).""" + + shape = () + + def toarray(self, object_: Any) -> np.ndarray: + buffer_ = self._tobuffer(object_) + array = np.ndarray( + buffer=buffer_, dtype=self.base.dtype, shape=len(buffer_) + ) + return array + + def _tobuffer(self, object_: Any) -> NoReturn: + """Convert an `object_` into a buffer.""" + raise NotImplementedError + + +class VLStringAtom(_BufferedAtom): + """Defines an atom of type ``vlstring``. + + This class describes a *row* of the VLArray class, rather than an atom. It + differs from the StringAtom class in that you can only add *one instance of + it to one specific row*, i.e. the :meth:`VLArray.append` method only + accepts one object when the base atom is of this type. + + This class stores bytestrings. It does not make assumptions on the + encoding of the string, and raw bytes are stored as is. To store a string + you will need to *explicitly* convert it to a bytestring before you can + save them:: + + >>> s = 'A unicode string: hbar = \u210f' + >>> bytestring = s.encode('utf-8') + >>> VLArray.append(bytestring) # noqa: F821 # doctest: +SKIP + + For full Unicode support, using VLUnicodeAtom (see :ref:`VLUnicodeAtom`) is + recommended. + + Variable-length string atoms do not accept parameters and they cause the + reads of rows to always return Python bytestrings. You can regard vlstring + atoms as an easy way to save generic variable length strings. + + """ + + kind = "vlstring" + type = "vlstring" # noqa: A003 + base = UInt8Atom() + + def _tobuffer(self, object_: bytes) -> np.bytes_: + if not isinstance(object_, bytes): + raise TypeError(f"object is not bytes: {object_!r}") + return np.bytes_(object_) + + def fromarray(self, array: np.ndarray) -> bytes: + """Convert array data into bytes.""" + return array.tobytes() + + +class VLUnicodeAtom(_BufferedAtom): + """Defines an atom of type vlunicode. + + This class describes a *row* of the VLArray class, rather than an atom. It + is very similar to VLStringAtom (see :ref:`VLStringAtom`), but it stores + Unicode strings (using 32-bit characters a la UCS-4, so all strings of the + same length also take up the same space). + + This class does not make assumptions on the encoding of plain input + strings. Plain strings are supported as long as no character is out of the + ASCII set; otherwise, you will need to *explicitly* convert them to Unicode + before you can save them. + + Variable-length Unicode atoms do not accept parameters and they cause the + reads of rows to always return Python Unicode strings. You can regard + vlunicode atoms as an easy way to save variable length Unicode strings. + + """ + + kind = "vlunicode" + type = "vlunicode" # noqa: A003 + base = UInt32Atom() + + # numpy.unicode_ no more implements the buffer interface in Python 3 + # + # When the Python build is UCS-2, we need to promote the + # Unicode string to UCS-4. We *must* use a 0-d array since + # NumPy scalars inherit the UCS-2 encoding from Python (see + # NumPy ticket #525). Since ``_tobuffer()`` can't return an + # array, we must override ``toarray()`` itself. + def toarray(self, object_: str) -> np.ndarray: + """Convert a string into a numpy array.""" + if not isinstance(object_, str): + raise TypeError(f"object is not a string: {object_!r}") + ustr = str(object_) + uarr = np.array(ustr, dtype="U") + return np.ndarray(buffer=uarr, dtype=self.base.dtype, shape=len(ustr)) + + def _tobuffer(self, object_: str) -> np.str_: + # This works (and is used) only with UCS-4 builds of Python, + # where the width of the internal representation of a + # character matches that of the base atoms. + if not isinstance(object_, str): + raise TypeError(f"object is not a string: {object_!r}") + return np.str_(object_) + + def fromarray(self, array: np.ndarray) -> str: + """Convert array data into a string.""" + length = len(array) + if length == 0: + return "" # ``array.view('U0')`` raises a `TypeError` + return array.view("U%d" % length).item() + + +class ObjectAtom(_BufferedAtom): + """Defines an atom of type object. + + This class is meant to fit *any* kind of Python object in a row of a + VLArray dataset by using pickle behind the scenes. Due to the fact that + you can not foresee how long will be the output of the pickle + serialization (i.e. the atom already has a *variable* length), you can only + fit *one object per row*. However, you can still group several objects in a + single tuple or list and pass it to the :meth:`VLArray.append` method. + + Object atoms do not accept parameters and they cause the reads of rows to + always return Python objects. You can regard object atoms as an easy way to + save an arbitrary number of generic Python objects in a VLArray dataset. + + """ + + kind = "object" + type = "object" # noqa: A003 + base = UInt8Atom() + + def _tobuffer(self, object_: object) -> bytes: + return pickle.dumps(object_, pickle.HIGHEST_PROTOCOL) + + def fromarray(self, array: np.ndarray) -> Any | None: + """Deserialize data contained in the input array. + + A Python object is returned. + """ + # We have to check for an empty array because of a possible + # bug in HDF5 which makes it claim that a dataset has one + # record when in fact it is empty. + if array.size == 0: + return None + return pickle.loads(array.tobytes()) diff --git a/venv/Lib/site-packages/tables/attributeset.py b/venv/Lib/site-packages/tables/attributeset.py new file mode 100644 index 0000000..5f3d162 --- /dev/null +++ b/venv/Lib/site-packages/tables/attributeset.py @@ -0,0 +1,740 @@ +"""Here is defined the AttributeSet class.""" + +from __future__ import annotations + +import re +import pickle +import warnings +from typing import Any, Literal, TYPE_CHECKING +from collections.abc import Callable + +import numpy as np + +from . import hdf5extension +from .path import check_attribute_name +from .utils import SizeType +from .filters import Filters +from .registry import class_name_dict +from .undoredo import attr_to_shadow +from .exceptions import ClosedNodeError, FiltersWarning, PerformanceWarning + +if TYPE_CHECKING: + from .node import Node + + +# System attributes +SYS_ATTRS = [ + "CLASS", + "VERSION", + "TITLE", + "NROWS", + "EXTDIM", + "ENCODING", + "PYTABLES_FORMAT_VERSION", + "FLAVOR", + "FILTERS", + "AUTO_INDEX", + "DIRTY", + "NODE_TYPE", + "NODE_TYPE_VERSION", + "PSEUDOATOM", +] +# Prefixes of other system attributes +SYS_ATTRS_PREFIXES = ["FIELD_"] +# RO_ATTRS will be disabled and let the user modify them if they +# want to. The user is still not allowed to remove or rename +# system attributes. Francesc Alted 2004-12-19 +# Read-only attributes: +# RO_ATTRS = ["CLASS", "FLAVOR", "VERSION", "NROWS", "EXTDIM", +# "PYTABLES_FORMAT_VERSION", "FILTERS", +# "NODE_TYPE", "NODE_TYPE_VERSION"] +# RO_ATTRS = [] + +# The next attributes are not meant to be copied during a Node copy process +SYS_ATTRS_NOTTOBECOPIED = [ + "CLASS", + "VERSION", + "TITLE", + "NROWS", + "EXTDIM", + "PYTABLES_FORMAT_VERSION", + "FILTERS", + "ENCODING", +] +# Attributes forced to be copied during node copies +FORCE_COPY_CLASS = ["CLASS", "VERSION"] +# Regular expression for column default values. +_field_fill_re = re.compile("^FIELD_[0-9]+_FILL$") +# Regular expression for fixing old pickled filters. +_old_filters_re = re.compile(rb"\(([ic])tables\.Leaf\n") +# Fixed version of the previous string. +_new_filters_sub = rb"(\1tables.filters\n" + + +def issysattrname(name: str) -> bool: + """Check if a name is a system attribute or not.""" + return bool( + name in SYS_ATTRS + or np.prod([name.startswith(prefix) for prefix in SYS_ATTRS_PREFIXES]) + ) + + +class AttributeSet(hdf5extension.AttributeSet): + r"""Container for the HDF5 attributes of a Node. + + This class provides methods to create new HDF5 node attributes, + and to get, rename or delete existing ones. + + Like in Group instances (see :ref:`GroupClassDescr`), AttributeSet + instances make use of the *natural naming* convention, i.e. you can + access the attributes on disk as if they were normal Python + attributes of the AttributeSet instance. + + This offers the user a very convenient way to access HDF5 node + attributes. However, for this reason and in order not to pollute the + object namespace, one can not assign *normal* attributes to + AttributeSet instances, and their members use names which start by + special prefixes as happens with Group objects. + + .. rubric:: Notes on native and pickled attributes + + The values of most basic types are saved as HDF5 native data in the + HDF5 file. This includes Python bool, int, float, complex and str + (but not long nor unicode) values, as well as their NumPy scalar + versions and homogeneous or *structured* NumPy arrays of them. When + read, these values are always loaded as NumPy scalar or array + objects, as needed. + + For that reason, attributes in native HDF5 files will always be + mapped into NumPy objects. Specifically, a multidimensional + attribute will be mapped into a multidimensional ndarray and a + scalar will be mapped into a NumPy scalar object (for example, a + scalar H5T_NATIVE_LLONG will be read and returned as a numpy.int64 + scalar). + + However, other kinds of values are serialized using pickle, so you + only will be able to correctly retrieve them using a Python-aware + HDF5 library. Thus, if you want to save Python scalar values and + make sure you are able to read them with generic HDF5 tools, you + should make use of *scalar or homogeneous/structured array NumPy + objects* (for example, numpy.int64(1) or numpy.array([1, 2, 3], + dtype='int16')). + + One more advice: because of the various potential difficulties in + restoring a Python object stored in an attribute, you may end up + getting a pickle string where a Python object is expected. If this + is the case, you may wish to run pickle.loads() on that string to + get an idea of where things went wrong, as shown in this example:: + + >>> import os, tempfile + >>> import tables as tb + >>> + >>> class MyClass: + ... foo = 'bar' + ... + >>> myObject = MyClass() # save object of custom class in HDF5 attr + >>> h5fname = tempfile.mktemp(suffix='.h5') + >>> h5f = tb.open_file(h5fname, 'w') + >>> h5f.root._v_attrs.obj = myObject # store the object + >>> print(h5f.root._v_attrs.obj.foo) # retrieve it + bar + >>> h5f.close() + >>> + >>> del MyClass, myObject # delete class of object and reopen file + >>> h5f = tb.open_file(h5fname, 'r') + >>> print(repr(h5f.root._v_attrs.obj)) + b'ccopy_reg\\n_reconstructor... + >>> import pickle # let's unpickle that to see what went wrong + >>> pickle.loads(h5f.root._v_attrs.obj) + Traceback (most recent call last): + ... + AttributeError: Can't get attribute 'MyClass' ... + >>> # So the problem was not in the stored object, + ... # but in the *environment* where it was restored. + ... h5f.close() + >>> os.remove(h5fname) + + + .. rubric:: Notes on AttributeSet methods + + Note that this class overrides the __getattr__(), __setattr__(), + __delattr__() and __dir__() special methods. This allows you to + read, assign or delete attributes on disk by just using the next + constructs:: + + leaf.attrs.myattr = 'str attr' # set a string (native support) + leaf.attrs.myattr2 = 3 # set an integer (native support) + leaf.attrs.myattr3 = [3, (1, 2)] # a generic object (Pickled) + attrib = leaf.attrs.myattr # get the attribute ``myattr`` + del leaf.attrs.myattr # delete the attribute ``myattr`` + + In addition, the dictionary-like __getitem__(), __setitem__() and + __delitem__() methods are available, so you may write things like + this:: + + for name in node._v_attrs._f_list(): + print("name: %s, value: %s" % (name, node._v_attrs[name])) + + Use whatever idiom you prefer to access the attributes. + + Finally, on interactive python sessions you may get autocompletions of + attributes named as *valid python identifiers* by pressing the `[Tab]` + key, or to use the dir() global function. + + If an attribute is set on a target node that already has a large + number of attributes, a PerformanceWarning will be issued. + + + .. rubric:: AttributeSet attributes + + .. attribute:: _v_attrnames + + A list with all attribute names. + + .. attribute:: _v_attrnamessys + + A list with system attribute names. + + .. attribute:: _v_attrnamesuser + + A list with user attribute names. + + .. attribute:: _v_unimplemented + + A list of attribute names with unimplemented native HDF5 types. + + """ + + def _g_getnode(self) -> Node: + return self._v__nodefile._get_node(self._v__nodepath) + + @property + def _v_node(self) -> Node: + """:class:`Node` instance this attribute set is associated with.""" + return self._g_getnode() + + def __init__(self, node: Node) -> None: + """Create the basic structures to keep the attribute information. + + Reads all the HDF5 attributes (if any) on disk for the node "node". + + Parameters + ---------- + node + The parent node + + """ + # Refuse to create an instance of an already closed node + if not node._v_isopen: + raise ClosedNodeError("the node for attribute set is closed") + + dict_ = self.__dict__ + + self._g_new(node) + dict_["_v__nodefile"] = node._v_file + dict_["_v__nodepath"] = node._v_pathname + dict_["_v_attrnames"] = self._g_list_attr(node) + # The list of unimplemented attribute names + dict_["_v_unimplemented"] = [] + + # Get the file version format. This is an optimization + # in order to avoid accessing it too much. + try: + format_version = node._v_file.format_version + except AttributeError: + parsed_version = None + else: + if format_version == "unknown": + parsed_version = None + else: + parsed_version = tuple(map(int, format_version.split("."))) + dict_["_v__format_version"] = parsed_version + # Split the attribute list in system and user lists + dict_["_v_attrnamessys"] = [] + dict_["_v_attrnamesuser"] = [] + for attr in self._v_attrnames: + # put the attributes on the local dictionary to allow + # tab-completion + self.__getattr__(attr) + if issysattrname(attr): + self._v_attrnamessys.append(attr) + else: + self._v_attrnamesuser.append(attr) + + # Sort the attributes + self._v_attrnames.sort() + self._v_attrnamessys.sort() + self._v_attrnamesuser.sort() + + def _g_update_node_location(self, node: Node) -> None: + """Update the location information about the associated `node`.""" + dict_ = self.__dict__ + dict_["_v__nodefile"] = node._v_file + dict_["_v__nodepath"] = node._v_pathname + # hdf5extension operations: + self._g_new(node) + + def _f_list( + self, attrset: Literal["all", "sys", "user"] = "user" + ) -> list[str]: + """Get a list of attribute names. + + The attrset string selects the attribute set to be used. A + 'user' value returns only user attributes (this is the default). + A 'sys' value returns only system attributes. Finally, 'all' + returns both system and user attributes. + + """ + if attrset == "user": + return self._v_attrnamesuser[:] + elif attrset == "sys": + return self._v_attrnamessys[:] + elif attrset == "all": + return self._v_attrnames[:] + + def __dir__(self) -> list[str]: + """Autocomplete only children named as valid python identifiers. + + Only PY3 supports this special method. + """ + return list( + { + c + for c in super().__dir__() + self._v_attrnames + if c.isidentifier() + } + ) + + def __getattr__(self, name: str) -> Any: + """Get the attribute named "name".""" + # If attribute does not exist, raise AttributeError + if name not in self._v_attrnames: + raise AttributeError( + f"Attribute {name!r} does not exist " + f"in node: {self._v__nodepath!r}" + ) + + # Read the attribute from disk. This is an optimization to read + # quickly system attributes that are _string_ values, but it + # takes care of other types as well as for example NROWS for + # Tables and EXTDIM for EArrays + format_version = self._v__format_version + value = self._g_getattr(self._v_node, name) + + # Check whether the value is pickled + # Pickled values always seems to end with a "." + maybe_pickled = ( + isinstance(value, np.generic) # NumPy scalar? + and value.dtype.type == np.bytes_ # string type? + and value.itemsize > 0 + and value.endswith(b".") + ) + + if maybe_pickled and value in [b"0", b"0."]: + # Workaround for a bug in many versions of Python (starting + # somewhere after Python 2.6.1). See ticket #253. + retval = value + elif ( + maybe_pickled + and _field_fill_re.match(name) + and format_version == (1, 5) + ): + # This format was used during the first 1.2 releases, just + # for string defaults. + try: + retval = pickle.loads(value) + retval = np.array(retval) + except ImportError: + retval = None # signal error avoiding exception + elif ( + maybe_pickled + and name == "FILTERS" + and format_version is not None + and format_version < (2, 0) + ): + # This is a big hack, but we don't have other way to recognize + # pickled filters of PyTables 1.x files. + value = _old_filters_re.sub(_new_filters_sub, value, 1) + retval = pickle.loads(value) # pass unpickling errors through + elif maybe_pickled: + try: + retval = pickle.loads(value) + # except cPickle.UnpicklingError: + # It seems that pickle may raise other errors than UnpicklingError + # Perhaps it would be better just an "except:" clause? + # except (cPickle.UnpicklingError, ImportError): + # Definitely (see SF bug #1254636) + except UnicodeDecodeError: + # Object maybe pickled on python 2 and unpickled on python 3. + # encoding='bytes' was added in python 3.4 to resolve this. + # However 'bytes' mangles class attributes as they are + # unplicked as bytestrings. Hence try 'latin1' first. + # Ref: http://bugs.python.org/issue6784 + try: + retval = pickle.loads(value, encoding="latin1") + except TypeError: + try: + retval = pickle.loads(value, encoding="bytes") + except Exception: + retval = value + except Exception: + retval = value + except Exception: + # catch other unpickling errors: + # ivb (2005-09-07): It is too hard to tell + # whether the unpickling failed + # because of the string not being a pickle one at all, + # because of a malformed pickle string, + # or because of some other problem in object reconstruction, + # thus making inconvenient even the issuing of a warning here. + # The documentation contains a note on this issue, + # explaining how the user can tell where the problem was. + retval = value + # Additional check for allowing a workaround for #307 + if isinstance(retval, str) and retval == "": + retval = np.array(retval)[()] + elif ( + name == "FILTERS" + and format_version is not None + and format_version >= (2, 0) + ): + try: + retval = Filters._unpack(value) + except ValueError: + warnings.warn(FiltersWarning("Failed parsing FILTERS key")) + retval = None + elif name == "TITLE" and not isinstance(value, str): + retval = value.decode("utf-8") + elif ( + issysattrname(name) + and isinstance(value, (bytes, str)) + and not isinstance(value, str) + and not _field_fill_re.match(name) + ): + # system attributes should always be str + # python 3, bytes and not "FIELD_[0-9]+_FILL" + retval = value.decode("utf-8") + else: + retval = value + + # Put this value in local directory + self.__dict__[name] = retval + return retval + + def _g__setattr(self, name: str, value: Any) -> None: + """Set a PyTables attribute. + + Sets a (maybe new) PyTables attribute with the specified `name` + and `value`. If the attribute already exists, it is simply + replaced. + + It does not log the change. + + """ + # Save this attribute to disk + # (overwriting an existing one if needed) + stvalue = value + if issysattrname(name): + if name in ["EXTDIM", "AUTO_INDEX", "DIRTY", "NODE_TYPE_VERSION"]: + stvalue = np.array(value, dtype=np.int32) + value = stvalue[()] + elif name == "NROWS": + stvalue = np.array(value, dtype=SizeType) + value = stvalue[()] + elif ( + name == "FILTERS" + and self._v__format_version is not None + and self._v__format_version >= (2, 0) + ): + stvalue = value._pack() + # value will remain as a Filters instance here + # Convert value from a Python scalar into a NumPy scalar + # (only in case it has not been converted yet) + # Fixes ticket #59 + if stvalue is value and type(value) in ( + bool, + bytes, + int, + float, + complex, + str, + np.str_, + ): + # Additional check for allowing a workaround for #307 + if isinstance(value, str) and len(value) == 0: + stvalue = np.array("") + else: + stvalue = np.array(value) + value = stvalue[()] + + self._g_setattr(self._v_node, name, stvalue) + + # New attribute or value. Introduce it into the local + # directory + self.__dict__[name] = value + + # Finally, add this attribute to the list if not present + attrnames = self._v_attrnames + if name not in attrnames: + attrnames.append(name) + attrnames.sort() + if issysattrname(name): + attrnamessys = self._v_attrnamessys + attrnamessys.append(name) + attrnamessys.sort() + else: + attrnamesuser = self._v_attrnamesuser + attrnamesuser.append(name) + attrnamesuser.sort() + + def __setattr__(self, name: str, value: Any) -> None: + """Set a PyTables attribute. + + Sets a (maybe new) PyTables attribute with the specified `name` + and `value`. If the attribute already exists, it is simply + replaced. + + A ``ValueError`` is raised when the name starts with a reserved + prefix or contains a ``/``. A `NaturalNameWarning` is issued if + the name is not a valid Python identifier. A + `PerformanceWarning` is issued when the recommended maximum + number of attributes in a node is going to be exceeded. + + """ + nodefile = self._v__nodefile + attrnames = self._v_attrnames + + # Check for name validity + check_attribute_name(name) + + nodefile._check_writable() + + # Check if there are too many attributes. + max_node_attrs = nodefile.params["MAX_NODE_ATTRS"] + if len(attrnames) >= max_node_attrs: + warnings.warn( + """\ +node ``%s`` is exceeding the recommended maximum number of attributes (%d);\ +be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" + % (self._v__nodepath, max_node_attrs), + PerformanceWarning, + ) + + undo_enabled = nodefile.is_undo_enabled() + # Log old attribute removal (if any). + if undo_enabled and (name in attrnames): + self._g_del_and_log(name) + + # Set the attribute. + self._g__setattr(name, value) + + # Log new attribute addition. + if undo_enabled: + self._g_log_add(name) + + def _g_log_add(self, name: str) -> None: + self._v__nodefile._log("ADDATTR", self._v__nodepath, name) + + def _g_del_and_log(self, name: str) -> None: + nodefile = self._v__nodefile + node_pathname = self._v__nodepath + # Log *before* moving to use the right shadow name. + nodefile._log("DELATTR", node_pathname, name) + attr_to_shadow(nodefile, node_pathname, name) + + def _g__delattr(self, name: str) -> None: + """Delete a PyTables attribute. + + Deletes the specified existing PyTables attribute. + + It does not log the change. + + """ + # Delete the attribute from disk + self._g_remove(self._v_node, name) + + # Delete the attribute from local lists + self._v_attrnames.remove(name) + if name in self._v_attrnamessys: + self._v_attrnamessys.remove(name) + else: + self._v_attrnamesuser.remove(name) + + # Delete the attribute from the local directory + # closes (#1049285) + del self.__dict__[name] + + def __delattr__(self, name: str) -> None: + """Delete a PyTables attribute. + + Deletes the specified existing PyTables attribute from the + attribute set. If a nonexistent or system attribute is + specified, an ``AttributeError`` is raised. + + """ + nodefile = self._v__nodefile + + # Check if attribute exists + if name not in self._v_attrnames: + raise AttributeError( + "Attribute ('%s') does not exist in node '%s'" + % (name, self._v__nodepath) + ) + + nodefile._check_writable() + + # Remove the PyTables attribute or move it to shadow. + if nodefile.is_undo_enabled(): + self._g_del_and_log(name) + else: + self._g__delattr(name) + + def __getitem__(self, name: str) -> Any: + """Implement a dictionary like interface for `__getattr__()`.""" + try: + return self.__getattr__(name) + except AttributeError: + # Capture the AttributeError and re-raise a KeyError one + raise KeyError( + "Attribute ('%s') does not exist in node '%s'" + % (name, self._v__nodepath) + ) + + def __setitem__(self, name: str, value: Any) -> None: + """Implement a dictionary like interface for `__setattr__()`.""" + self.__setattr__(name, value) + + def __delitem__(self, name: str) -> None: + """Implement a dictionary like interface for `__delattr__()`.""" + try: + self.__delattr__(name) + except AttributeError: + # Capture the AttributeError and re-raise a KeyError one + raise KeyError( + "Attribute ('%s') does not exist in node '%s'" + % (name, self._v__nodepath) + ) + + def __contains__(self, name: str) -> bool: + """Return True if the set contains an attribute with the specified name. + + A true value is returned if the attribute set has an attribute + with the given name, false otherwise. + + """ + return name in self._v_attrnames + + def _f_rename(self, oldattrname: str, newattrname: str) -> None: + """Rename an attribute from oldattrname to newattrname.""" + if oldattrname == newattrname: + # Do nothing + return + + # First, fetch the value of the oldattrname + attrvalue = getattr(self, oldattrname) + + # Now, create the new attribute + setattr(self, newattrname, attrvalue) + + # Finally, remove the old attribute + delattr(self, oldattrname) + + def _g_copy( + self, + newset: AttributeSet, + set_attr: Callable[[str, Any], None] | None = None, + copyclass: bool = False, + ) -> None: + """Copy set attributes. + + Copies all user and allowed system PyTables attributes to the + given attribute set, replacing the existing ones. + + You can specify a *bound* method of the destination set that + will be used to set its attributes. Else, its `_g__setattr` + method will be used. + + Changes are logged depending on the chosen setting method. The + default setting method does not log anything. + + .. versionchanged:: 3.0 + The *newSet* parameter has been renamed into *newset*. + + .. versionchanged:: 3.0 + The *copyClass* parameter has been renamed into *copyclass*. + + """ + copysysattrs = newset._v__nodefile.params["PYTABLES_SYS_ATTRS"] + if set_attr is None: + set_attr = newset._g__setattr + + for attrname in self._v_attrnamesuser: + # Do not copy the unimplemented attributes. + if attrname not in self._v_unimplemented: + set_attr(attrname, getattr(self, attrname)) + # Copy the system attributes that we are allowed to. + if copysysattrs: + for attrname in self._v_attrnamessys: + if ( + (attrname not in SYS_ATTRS_NOTTOBECOPIED) + # Do not copy the FIELD_ attributes in tables as this can + # be really *slow* (don't know exactly the reason). + # See #304. + and not attrname.startswith("FIELD_") + ): + set_attr(attrname, getattr(self, attrname)) + # Copy CLASS and VERSION attributes if requested + if copyclass: + for attrname in FORCE_COPY_CLASS: + if attrname in self._v_attrnamessys: + set_attr(attrname, getattr(self, attrname)) + + def _f_copy(self, where: Node) -> None: + """Copy attributes to the where node. + + Copies all user and certain system attributes to the given where + node (a Node instance - see :ref:`NodeClassDescr`), replacing + the existing ones. + + """ + # AttributeSet must be defined in order to define a Node. + # However, we need to know Node here. + # Using class_name_dict avoids a circular import. + if not isinstance(where, class_name_dict["Node"]): + raise TypeError(f"destination object is not a node: {where!r}") + self._g_copy(where._v_attrs, where._v_attrs.__setattr__) + + def _g_close(self) -> None: + # Nothing will be done here, as the existing instance is completely + # operative now. + pass + + def __str__(self) -> str: + """Return the string representation for the object.""" + # The pathname + pathname = self._v__nodepath + # Get this class name + classname = self.__class__.__name__ + # The attribute names + attrnumber = sum(1 for _ in self._v_attrnames) + return f"{pathname}._v_attrs ({classname}), {attrnumber} attributes" + + def __repr__(self) -> str: + """Detailed string representation for this object.""" + # print additional info only if there are attributes to show + attrnames = list(self._v_attrnames) + if attrnames: + rep = [f"{attr} := {getattr(self, attr)!r}" for attr in attrnames] + return f"{self!s}:\n [" + ",\n ".join(rep) + "]" + else: + return str(self) + + +class NotLoggedAttributeSet(AttributeSet): + """Attribut set without automatic logging.""" + + def _g_log_add(self, name: str) -> None: + pass + + def _g_del_and_log(self, name: str) -> None: + self._g__delattr(name) diff --git a/venv/Lib/site-packages/tables/carray.py b/venv/Lib/site-packages/tables/carray.py new file mode 100644 index 0000000..58d940f --- /dev/null +++ b/venv/Lib/site-packages/tables/carray.py @@ -0,0 +1,316 @@ +"""Here is defined the CArray class.""" + +from __future__ import annotations + +import sys +from typing import TYPE_CHECKING +from collections.abc import Sequence + +import numpy as np +import numpy.typing as npt + +from .atom import Atom +from .array import Array +from .utils import correct_byteorder, SizeType + +if TYPE_CHECKING: + from .group import Group + from .filters import Filters + +# default version for CARRAY objects +# obversion = "1.0" # Support for time & enumerated datatypes. +obversion = "1.1" # Numeric and numarray flavors are gone. + + +class CArray(Array): + """This class represents homogeneous datasets in an HDF5 file. + + The difference between a CArray and a normal Array (see + :ref:`ArrayClassDescr`), from which it inherits, is that a CArray + has a chunked layout and, as a consequence, it supports compression. + You can use datasets of this class to easily save or load arrays to + or from disk, with compression support included. + + CArray includes all the instance variables and methods of Array. + Only those with different behavior are mentioned here. + + Parameters + ---------- + parentnode + The parent :class:`Group` object. + + .. versionchanged:: 3.0 + Renamed from *parentNode* to *parentnode*. + + name : str + The name of this node in its parent group. + atom + An `Atom` instance representing the *type* and *shape* of + the atomic objects to be saved. + + shape + The shape of the new array. + + title + A description for this node (it sets the ``TITLE`` HDF5 + attribute on disk). + + filters + An instance of the `Filters` class that provides + information about the desired I/O filters to be applied + during the life of this object. + + chunkshape + The shape of the data chunk to be read or written in a + single HDF5 I/O operation. Filters are applied to those + chunks of data. The dimensionality of `chunkshape` must + be the same as that of `shape`. If ``None``, a sensible + value is calculated (which is recommended). + + byteorder + The byteorder of the data *on disk*, specified as 'little' + or 'big'. If this is not specified, the byteorder is that + of the platform. + + track_times + Whether time data associated with the leaf are recorded (object + access time, raw data modification time, metadata change time, object + birth time); default True. Semantics of these times depend on their + implementation in the HDF5 library: refer to documentation of the + H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata + change time) is implemented. + + .. versionadded:: 3.4.3 + + Examples + -------- + See below a small example of the use of the `CArray` class. + The code is available in ``examples/carray1.py``:: + + import numpy as np + import tables as tb + + fileName = 'carray1.h5' + shape = (200, 300) + atom = tb.UInt8Atom() + filters = tb.Filters(complevel=5, complib='zlib') + + h5f = tb.open_file(fileName, 'w') + ca = h5f.create_carray(h5f.root, 'carray', atom, shape, + filters=filters) + + # Fill a hyperslab in ``ca``. + ca[10:60, 20:70] = np.ones((50, 50)) + h5f.close() + + # Re-open a read another hyperslab + h5f = tb.open_file(fileName) + print(h5f) + print(h5f.root.carray[8:12, 18:22]) + h5f.close() + + The output for the previous script is something like:: + + carray1.h5 (File) '' + Last modif.: 'Thu Apr 12 10:15:38 2007' + Object Tree: + / (RootGroup) '' + /carray (CArray(200, 300), shuffle, zlib(5)) '' + + [[0 0 0 0] + [0 0 0 0] + [0 0 1 1] + [0 0 1 1]] + + """ + + # Class identifier. + _c_classid = "CARRAY" + + def __init__( + self, + parentnode: Group, + name: str, + atom: Atom | None = None, + shape: Sequence[int] | None = None, + title: str = "", + filters: Filters | None = None, + chunkshape: tuple[int, ...] | None = None, + byteorder: str | None = None, + _log: bool = True, + track_times: bool = True, + ) -> None: + + self.atom = atom + """An `Atom` instance representing the shape, type of the atomic + objects to be saved. + """ + self.shape: tuple[int, ...] | None = None + """The shape of the stored array.""" + self.extdim = -1 # `CArray` objects are not enlargeable by default + """The index of the enlargeable dimension.""" + + # Other private attributes + self._v_version: str | None = None + """The object version of this array.""" + self._v_new = new = atom is not None + """Is this the first time the node has been created?""" + self._v_new_title = title + """New title for this node.""" + self._v_convert = True + """Whether the ``Array`` object must be converted or not.""" + self._v_chunkshape = chunkshape + """Private storage for the `chunkshape` property of the leaf.""" + + # Miscellaneous iteration rubbish. + self._start: int | None = None + """Starting row for the current iteration.""" + self._stop: int | None = None + """Stopping row for the current iteration.""" + self._step: int | None = None + """Step size for the current iteration.""" + self._nrowsread: int | None = None + """Number of rows read up to the current state of iteration.""" + self._startb: int | None = None + """Starting row for current buffer.""" + self._stopb: int | None = None + """Stopping row for current buffer. """ + self._row: int | None = None + """Current row in iterators (sentinel).""" + self._init = False + """Whether we are in the middle of an iteration or not (sentinel).""" + self.listarr: npt.ArrayLike | None = None + """Current buffer in iterators.""" + + if new: + if not isinstance(atom, Atom): + raise ValueError( + "atom parameter should be an instance of " + "tables.Atom and you passed a %s." % type(atom) + ) + if shape is None: + raise ValueError("you must specify a non-empty shape") + try: + shape = tuple(shape) + except TypeError: + raise TypeError( + "`shape` parameter must be a sequence " + "and you passed a %s" % type(shape) + ) + self.shape = tuple(SizeType(s) for s in shape) + + if chunkshape is not None: + try: + chunkshape = tuple(chunkshape) + except TypeError: + raise TypeError( + "`chunkshape` parameter must be a sequence " + "and you passed a %s" % type(chunkshape) + ) + if len(shape) != len(chunkshape): + raise ValueError( + f"the shape ({shape}) and chunkshape " + f"({chunkshape}) ranks must be equal." + ) + elif min(chunkshape) < 1: + raise ValueError( + "chunkshape parameter cannot have " "zero-dimensions." + ) + self._v_chunkshape = tuple(SizeType(s) for s in chunkshape) + + # The `Array` class is not abstract enough! :( + super(Array, self).__init__( + parentnode, name, new, filters, byteorder, _log, track_times + ) + + def _g_create(self) -> int: + """Create a new array in file (specific part).""" + if min(self.shape) < 1: + raise ValueError("shape parameter cannot have zero-dimensions.") + # Finish the common part of creation process + return self._g_create_common(self.nrows) + + def _g_create_common(self, expectedrows: int) -> int: + """Create a new array in file (common part).""" + self._v_version = obversion + + if self._v_chunkshape is None: + # Compute the optimal chunk size + self._v_chunkshape = self._calc_chunkshape( + expectedrows, self.rowsize, self.atom.size + ) + # Compute the optimal nrowsinbuf + self.nrowsinbuf = self._calc_nrowsinbuf() + # Correct the byteorder if needed + if self.byteorder is None: + self.byteorder = correct_byteorder(self.atom.type, sys.byteorder) + + try: + # ``self._v_objectid`` needs to be set because would be + # needed for setting attributes in some descendants later + # on + self._v_objectid = self._create_carray(self._v_new_title) + except Exception: # XXX + # Problems creating the Array on disk. Close node and re-raise. + self.close(flush=0) + raise + + return self._v_objectid + + def _g_copy_with_stats( + self, + group: Group, + name: str, + start: int, + stop: int, + step: int, + title: str, + filters: Filters | None, + chunkshape: tuple[int, ...] | None, + _log: bool, + **kwargs, + ) -> tuple[CArray, int]: + """Private part of Leaf.copy() for each kind of leaf.""" + start, stop, step = self._process_range_read(start, stop, step) + maindim = self.maindim + shape = list(self.shape) + shape[maindim] = len(range(start, stop, step)) + # Now, fill the new carray with values from source + nrowsinbuf = self.nrowsinbuf + # The slices parameter for self.__getitem__ + slices = [slice(0, dim, 1) for dim in self.shape] + # This is a hack to prevent doing unnecessary conversions + # when copying buffers + self._v_convert = False + # Build the new CArray object + obj = CArray( + group, + name, + atom=self.atom, + shape=shape, + title=title, + filters=filters, + chunkshape=chunkshape, + _log=_log, + ) + # Start the copy itself + for start2 in range(start, stop, step * nrowsinbuf): + # Save the records on disk + stop2 = start2 + step * nrowsinbuf + if stop2 > stop: + stop2 = stop + # Set the proper slice in the main dimension + slices[maindim] = slice(start2, stop2, step) + start3 = (start2 - start) // step + stop3 = start3 + nrowsinbuf + if stop3 > shape[maindim]: + stop3 = shape[maindim] + # The next line should be generalised if, in the future, + # maindim is designed to be different from 0 in CArrays. + # See ticket #199. + obj[start3:stop3] = self.__getitem__(tuple(slices)) + # Activate the conversion again (default) + self._v_convert = True + nbytes = np.prod(self.shape, dtype=SizeType) * self.atom.size + + return (obj, nbytes) diff --git a/venv/Lib/site-packages/tables/conditions.py b/venv/Lib/site-packages/tables/conditions.py new file mode 100644 index 0000000..e3b0f39 --- /dev/null +++ b/venv/Lib/site-packages/tables/conditions.py @@ -0,0 +1,530 @@ +"""Utility functions and classes for supporting query conditions. + +Classes: + +`CompileCondition` + Container for a compiled condition. + +Functions: + +`compile_condition` + Compile a condition and extract usable index conditions. +`call_on_recarr` + Evaluate a function over a structured array. + +""" + +from __future__ import annotations + +import re +from typing import Any, TYPE_CHECKING +from collections.abc import Callable, Iterable + +import numpy as np +import numexpr as ne + +from .utils import lazyattr +from .utilsextension import get_nested_field + +if TYPE_CHECKING: + from .table import Column + + +_no_matching_opcode = re.compile(r"[^a-z]([a-z]+)_([a-z]+)[^a-z]") +# E.g. "gt" and "bfc" from "couldn't find matching opcode for 'gt_bfc'". + + +def _unsupported_operation_error(exception: Exception) -> Exception: + r"""Make the \"no matching opcode\" Numexpr `exception` more clear. + + A new exception of the same kind is returned. + + """ + message = exception.args[0] + op, types = _no_matching_opcode.search(message).groups() + newmessage = "unsupported operand types for *%s*: " % op + newmessage += ", ".join( + ne.necompiler.typecode_to_kind[t] for t in types[1:] + ) + return exception.__class__(newmessage) + + +def _check_indexable_cmp( + getidxcmp: Callable[ + [ne.expressions.ExpressionNode, frozenset[str]], tuple[Any, str, Any] + ], +) -> Callable[ + [ne.expressions.ExpressionNode, frozenset[str]], tuple[Any, str, Any] +]: + """Decorate `getidxcmp` to check the returned indexable comparison. + + This does some extra checking that Numexpr would perform later on + the comparison if it was compiled within a complete condition. + + """ + + def newfunc( + exprnode: ne.expressions.ExpressionNode, + indexedcols: frozenset[str], + ) -> tuple[Any, str, Any]: + result = getidxcmp(exprnode, indexedcols) + if result[0] is not None: + try: + ne.necompiler.typeCompileAst( + ne.necompiler.expressionToAST(exprnode) + ) + except NotImplementedError as nie: + # Try to make this Numexpr error less cryptic. + raise _unsupported_operation_error(nie) + return result + + newfunc.__name__ = getidxcmp.__name__ + newfunc.__doc__ = getidxcmp.__doc__ + return newfunc + + +@_check_indexable_cmp +def _get_indexable_cmp( + exprnode: ne.expressions.ExpressionNode, + indexedcols: frozenset[str], +) -> tuple[Any, str, Any] | tuple[None, None, None]: + """Get the indexable variable-constant comparison in `exprnode`. + + A tuple of (variable, operation, constant) is returned if + `exprnode` is a variable-constant (or constant-variable) + comparison, and the variable is in `indexedcols`. A normal + variable can also be used instead of a constant: a tuple with its + name will appear instead of its value. + + Otherwise, the values in the tuple are ``None``. + """ + not_indexable = (None, None, None) + turncmp = { + "lt": "gt", + "le": "ge", + "eq": "eq", + "ge": "le", + "gt": "lt", + } + + def get_cmp( + var: ne.expressions.ExpressionNode, + const: ne.expressions.ExpressionNode, + op: str, + ) -> tuple[Any, str, Any] | None: + var_value, const_value = var.value, const.value + if ( + var.astType == "variable" + and var_value in indexedcols + and const.astType in ["constant", "variable"] + ): + if const.astType == "variable": + const_value = (const_value,) + return (var_value, op, const_value) + return None + + def is_indexed_boolean(node: ne.expressions.ExpressionNode) -> bool: + return ( + node.astType == "variable" + and node.astKind == "bool" + and node.value in indexedcols + ) + + # Boolean variables are indexable by themselves. + if is_indexed_boolean(exprnode): + return (exprnode.value, "eq", True) + # And so are negations of boolean variables. + if exprnode.astType == "op" and exprnode.value == "invert": + child = exprnode.children[0] + if is_indexed_boolean(child): + return (child.value, "eq", False) + # A negation of an expression will be returned as ``~child``. + # The indexability of the negated expression will be decided later on. + if child.astKind == "bool": + return (child, "invert", None) + + # Check node type. Only comparisons are indexable from now on. + if exprnode.astType != "op": + return not_indexable + cmpop = exprnode.value + if cmpop not in turncmp: + return not_indexable + + # Look for a variable-constant comparison in both directions. + left, right = exprnode.children + cmp_ = get_cmp(left, right, cmpop) + if cmp_: + return cmp_ + cmp_ = get_cmp(right, left, turncmp[cmpop]) + if cmp_: + return cmp_ + + return not_indexable + + +def _equiv_expr_node( + x: Any | ne.expressions.ExpressionNode, + y: Any | ne.expressions.ExpressionNode, +) -> bool: + """Return Truen whether two ExpressionNodes are equivalent. + + This is needed because '==' is overridden on ExpressionNode to + return a new ExpressionNode. + + """ + if not isinstance(x, ne.expressions.ExpressionNode) and not isinstance( + y, ne.expressions.ExpressionNode + ): + return x == y + elif ( + type(x) is not type(y) + or not isinstance(x, ne.expressions.ExpressionNode) + or not isinstance(y, ne.expressions.ExpressionNode) + or x.value != y.value + or x.astKind != y.astKind + or len(x.children) != len(y.children) + ): + return False + for xchild, ychild in zip(x.children, y.children): + if not _equiv_expr_node(xchild, ychild): + return False + return True + + +def _get_idx_expr_recurse( + exprnode: ne.expressions.ExpressionNode, + indexedcols: frozenset[str], + idxexprs: list, + strexpr: list[str], +) -> ( + list[tuple[Any, tuple[str], tuple[Any]]] + | list[ + tuple[ne.expressions.ExpressionNode, tuple[str, str], tuple[Any, Any]] + ] + | tuple[list, list[str]] +): + """Actual implementation of the `get_idx_expr()` wrapper. + + 'idxexprs' is a list of expressions in the form ``(var, (ops), + (limits))``. 'strexpr' is the indexable expression in string format. + These parameters will be received empty (i.e. [], ['']) for the + first time and populated during the different recursive calls. + Finally, they are returned in the last level to the original + wrapper. If 'exprnode' is not indexable, it will return the tuple + ([], ['']) so as to signal this. + + """ + not_indexable = ([], [""]) + op_conv = { + "and": "&", + "or": "|", + "not": "~", + } + negcmp = { + "lt": "ge", + "le": "gt", + "ge": "lt", + "gt": "le", + } + + def fix_invert( + idxcmp: tuple[Any, str, Any] | tuple[None, None, None], + exprnode: ne.expressions.ExpressionNode, + indexedcols: frozenset[str], + ) -> tuple[ + tuple[Any, str, Any] | tuple[None, None, None], + ne.expressions.ExpressionNode, + bool, + ]: + invert = False + # Loop until all leading negations have been dealt with + while idxcmp[1] == "invert": + invert ^= True + # The information about the negated node is in first position + exprnode = idxcmp[0] + idxcmp = _get_indexable_cmp(exprnode, indexedcols) + return idxcmp, exprnode, invert + + # Indexable variable-constant comparison. + idxcmp = _get_indexable_cmp(exprnode, indexedcols) + idxcmp, exprnode, invert = fix_invert(idxcmp, exprnode, indexedcols) + if idxcmp[0]: + if invert: + var, op, value = idxcmp + if op == "eq" and value in [True, False]: + # ``var`` must be a boolean index. Flip its value. + value ^= True + else: + op = negcmp[op] + expr = (var, (op,), (value,)) + invert = False + else: + expr = (idxcmp[0], (idxcmp[1],), (idxcmp[2],)) + return [expr] + + # For now negations of complex expressions will be not supported as + # forming part of an indexable condition. This might be supported in + # the future. + if invert: + return not_indexable + + # Only conjunctions and disjunctions of comparisons are considered + # for the moment. + if exprnode.astType != "op" or exprnode.value not in ["and", "or"]: + return not_indexable + + left, right = exprnode.children + # Get the expression at left + lcolvar, lop, llim = _get_indexable_cmp(left, indexedcols) + # Get the expression at right + rcolvar, rop, rlim = _get_indexable_cmp(right, indexedcols) + + # Use conjunction of indexable VC comparisons like + # ``(a <[=] x) & (x <[=] b)`` or ``(a >[=] x) & (x >[=] b)`` + # as ``a <[=] x <[=] b``, for the moment. + op = exprnode.value + if ( + lcolvar is not None + and rcolvar is not None + and _equiv_expr_node(lcolvar, rcolvar) + and op == "and" + ): + if lop in ["gt", "ge"] and rop in ["lt", "le"]: # l <= x <= r + expr = (lcolvar, (lop, rop), (llim, rlim)) + return [expr] + if lop in ["lt", "le"] and rop in ["gt", "ge"]: # l >= x >= r + expr = (rcolvar, (rop, lop), (rlim, llim)) + return [expr] + + # Recursively get the expressions at the left and the right + lexpr = _get_idx_expr_recurse(left, indexedcols, idxexprs, strexpr) + rexpr = _get_idx_expr_recurse(right, indexedcols, idxexprs, strexpr) + + def add_expr(expr, idxexprs: list, strexpr: list[str]) -> None: + """Add a single expression to the list.""" + if isinstance(expr, list): + # expr is a single expression + idxexprs.append(expr[0]) + lenexprs = len(idxexprs) + # Mutate the strexpr string + if lenexprs == 1: + strexpr[:] = ["e0"] + else: + strexpr[:] = [ + "(%s %s e%d)" % (strexpr[0], op_conv[op], lenexprs - 1) + ] + + # Add expressions to the indexable list when they are and'ed, or + # they are both indexable. + if lexpr != not_indexable and (op == "and" or rexpr != not_indexable): + add_expr(lexpr, idxexprs, strexpr) + if rexpr != not_indexable: + add_expr(rexpr, idxexprs, strexpr) + return (idxexprs, strexpr) + if rexpr != not_indexable and op == "and": + add_expr(rexpr, idxexprs, strexpr) + return (idxexprs, strexpr) + + # Can not use indexed column. + return not_indexable + + +def _get_idx_expr( + expr: ne.expressions.ExpressionNode, + indexedcols: frozenset[str], +) -> tuple[list[tuple[Any, tuple[str], tuple[Any]]], list[str]]: + """Extract an indexable expression out of `exprnode`. + + Looks for variable-constant comparisons in the expression node + `exprnode` involving variables in `indexedcols`. + + It returns a tuple of (idxexprs, strexpr) where 'idxexprs' is a + list of expressions in the form ``(var, (ops), (limits))`` and + 'strexpr' is the indexable expression in string format. + + Expressions such as ``0 < c1 <= 1`` do not work as expected. + + Right now only some of the *indexable comparisons* are considered: + + * ``a <[=] x``, ``a == x`` and ``a >[=] x`` + * ``(a <[=] x) & (y <[=] b)`` and ``(a == x) | (b == y)`` + * ``~(~c_bool)``, ``~~c_bool`` and ``~(~c_bool) & (c_extra != 2)`` + + (where ``a``, ``b`` and ``c_bool`` are indexed columns, but + ``c_extra`` is not) + + Particularly, the ``!=`` operator and negations of complex boolean + expressions are *not considered* as valid candidates: + + * ``a != 1`` and ``c_bool != False`` + * ``~((a > 0) & (c_bool))`` + + """ + return _get_idx_expr_recurse(expr, indexedcols, [], [""]) + + +class CompiledCondition: + """Container for a compiled condition.""" + + @lazyattr + def index_variables(self) -> frozenset: + """Columns participating in the index expression.""" + idxexprs = self.index_expressions + idxvars = [] + for expr in idxexprs: + idxvar = expr[0] + if idxvar not in idxvars: + idxvars.append(idxvar) + return frozenset(idxvars) + + def __init__( + self, + func: ne.interpreter.NumExpr, + params: list[str], + idxexprs: list[tuple[Any, tuple[str, ...], Any]], + strexpr: str, + **kwargs, + ) -> None: + self.function = func + """The compiled function object corresponding to this condition.""" + self.parameters = params + """A list of parameter names for this condition.""" + self.index_expressions = idxexprs + """A list of expressions in the form ``(var, (ops), (limits))``.""" + self.string_expression = strexpr + """The indexable expression in string format.""" + self.kwargs = kwargs + """NumExpr kwargs (used to pass ex_uses_vml to numexpr)""" + + def __repr__(self) -> str: + return f"""idxexprs: {self.index_expressions} +strexpr: {self.string_expression} +idxvars: {self.index_variables}""" + + def with_replaced_vars( + self, condvars: dict[str, Column | np.ndarray] + ) -> CompiledCondition: + """Replace index limit variables with their values in-place. + + A new compiled condition is returned. Values are taken from + the `condvars` mapping and converted to Python scalars. + """ + exprs = self.index_expressions + exprs2 = [] + for expr in exprs: + idxlims = expr[2] # the limits are in third place + limit_values = [] + for idxlim in idxlims: + if isinstance(idxlim, tuple): # variable + idxlim = condvars[idxlim[0]] # look up value + idxlim = idxlim.tolist() # convert back to Python + limit_values.append(idxlim) + # Add this replaced entry to the new exprs2 + var, ops, _ = expr + exprs2.append((var, ops, tuple(limit_values))) + # Create a new container for the converted values + newcc = CompiledCondition( + self.function, + self.parameters, + exprs2, + self.string_expression, + **self.kwargs, + ) + return newcc + + +def _get_variable_names( + expression: ne.expressions.ExpressionNode, +) -> list[str]: + """Return the list of variable names in the Numexpr `expression`.""" + names = [] + stack = [expression] + while stack: + node = stack.pop() + if node.astType == "variable": + names.append(node.value) + elif hasattr(node, "children"): + stack.extend(node.children) + return list(set(names)) # remove repeated names + + +def compile_condition( + condition: str, typemap: dict[str, type], indexedcols: frozenset[str] +) -> CompiledCondition: + """Compile a condition and extract usable index conditions. + + Looks for variable-constant comparisons in the `condition` string + involving the indexed columns whose variable names appear in + `indexedcols`. The part of `condition` having usable indexes is + returned as a compiled condition in a `CompiledCondition` container. + + Expressions such as '0 < c1 <= 1' do not work as expected. The + Numexpr types of *all* variables must be given in the `typemap` + mapping. The ``function`` of the resulting `CompiledCondition` + instance is a Numexpr function object, and the ``parameters`` list + indicates the order of its parameters. + + """ + # Get the expression tree and extract index conditions. + expr = ne.necompiler.stringToExpression(condition, typemap, {}) + if expr.astKind != "bool": + raise TypeError( + "condition ``%s`` does not have a boolean type" % condition + ) + idxexprs = _get_idx_expr(expr, indexedcols) + # Post-process the answer + if isinstance(idxexprs, list): + # Simple expression + strexpr = ["e0"] + else: + # Complex expression + idxexprs, strexpr = idxexprs + # Get rid of the unnecessary list wrapper for strexpr + strexpr = strexpr[0] + + # Get the variable names used in the condition. + # At the same time, build its signature. + varnames = _get_variable_names(expr) + signature = [(var, typemap[var]) for var in varnames] + try: + # See the comments in `numexpr.evaluate()` for the + # reasons of inserting copy operators for unaligned, + # *unidimensional* arrays. + func = ne.necompiler.NumExpr(expr, signature) + except NotImplementedError as nie: + # Try to make this Numexpr error less cryptic. + raise _unsupported_operation_error(nie) + + _, ex_uses_vml = ne.necompiler.getExprNames(condition, {}) + kwargs = {"ex_uses_vml": ex_uses_vml} + + params = varnames + # This is more comfortable to handle about than a tuple. + return CompiledCondition(func, params, idxexprs, strexpr, **kwargs) + + +def call_on_recarr( + func: Callable, + params: Iterable, + recarr: np.ndarray, + param2arg: Callable[[Any], Any] | None = None, + **kwargs, +) -> None: + """Call `func` with `params` over `recarr`. + + The `param2arg` function, when specified, is used to get an argument + given a parameter name; otherwise, the parameter itself is used as + an argument. When the argument is a `Column` object, the proper + column from `recarr` is used as its value. + + """ + args = [] + for param in params: + if param2arg: + arg = param2arg(param) + else: + arg = param + if hasattr(arg, "pathname"): # looks like a column + arg = get_nested_field(recarr, arg.pathname) + args.append(arg) + return func(*args, **kwargs) diff --git a/venv/Lib/site-packages/tables/definitions.pxd b/venv/Lib/site-packages/tables/definitions.pxd new file mode 100644 index 0000000..c56f485 --- /dev/null +++ b/venv/Lib/site-packages/tables/definitions.pxd @@ -0,0 +1,580 @@ +######################################################################## +# +# License: BSD +# Created: June 20, 2005 +# Author: Francesc Alted - faltet@pytables.com +# +# $Id: definitions.pyd 1018 2005-06-20 09:43:34Z faltet $ +# +######################################################################## + +"""Here are some definitions for sharing between extensions.""" + +import sys + + +cdef extern from *: + ctypedef long uintptr_t + +# Standard C functions. +cdef extern from "time.h": + ctypedef int time_t + +from numpy cimport dtype +from libc.stdio cimport FILE + +#----------------------------------------------------------------------------- + +cdef extern from "numpy/arrayobject.h": + object PyArray_Scalar(void *data, dtype descr, object itemsize) + + +#----------------------------------------------------------------------------- + + +# Structs and types from HDF5 +cdef extern from "hdf5.h" nogil: + + ctypedef long long hid_t # In H5Ipublic.h + ctypedef int hbool_t + ctypedef int herr_t + ctypedef int htri_t + ctypedef unsigned int uint32_t + ctypedef unsigned long long hsize_t + ctypedef signed long long hssize_t + ctypedef long long int64_t + ctypedef unsigned long long haddr_t + ctypedef haddr_t hobj_ref_t + + ctypedef struct hvl_t: + size_t len # Length of VL data (in base type units) + void *p # Pointer to VL data + + int H5F_ACC_TRUNC, H5F_ACC_RDONLY, H5F_ACC_RDWR, H5F_ACC_EXCL + int H5F_ACC_DEBUG, H5F_ACC_CREAT + int H5P_DEFAULT, H5P_DATASET_XFER, H5S_ALL + int H5P_FILE_CREATE, H5P_FILE_ACCESS + int H5FD_LOG_LOC_WRITE, H5FD_LOG_ALL + int H5I_INVALID_HID + int H5E_DEFAULT + int H5T_STD_REF_OBJ + int H5R_OBJ_REF_BUF_SIZE + unsigned HADDR_UNDEF + + # Library types + cdef enum H5I_type_t: + H5I_UNINIT = -2 # uninitialized type + H5I_BADID = -1 # invalid Type + H5I_FILE = 1 # File objects + H5I_GROUP = 0 # Group objects + H5I_DATATYPE = 1 # Datatype objects + H5I_DATASPACE = 2 # Dataspace objects + H5I_DATASET = 3 # Dataset objects + H5I_ATTR = 4 # Attribute objects + H5I_REFERENCE = 5 # Reference objects + H5I_VFL = 6 # virtual file layer + H5I_GENPROP_CLS = 7 # generic property list classes + H5I_GENPROP_LST = 8 # generic property lists + H5I_ERROR_CLASS = 9 # error classes + H5I_ERROR_MSG = 10 # error messages + H5I_ERROR_STACK = 11 # error stacks + H5I_NTYPES # Sentinel value - must be last + + # Reference types + cdef enum H5R_type_t: + H5R_BADTYPE = -1 # Invalid Reference Type + H5R_OBJECT = 0 # Object reference + H5R_DATASET_REGION = 1 # Dataset Region Reference + H5R_MAXTYPE # Sentinel value - must be last + + # The difference between a single file and a set of mounted files + cdef enum H5F_scope_t: + H5F_SCOPE_LOCAL = 0 # specified file handle only + H5F_SCOPE_GLOBAL = 1 # entire virtual file + H5F_SCOPE_DOWN = 2 # for internal use only + + cdef enum H5FD_mem_t: + H5FD_MEM_NOLIST = -1, # Data should not appear in the free list. + # Must be negative. + H5FD_MEM_DEFAULT = 0, # Value not yet set. Can also be the + # datatype set in a larger allocation + # that will be suballocated by the library. + # Must be zero. + H5FD_MEM_SUPER = 1, # Superblock data + H5FD_MEM_BTREE = 2, # B-tree data + H5FD_MEM_DRAW = 3, # Raw data (content of datasets, etc.) + H5FD_MEM_GHEAP = 4, # Global heap data + H5FD_MEM_LHEAP = 5, # Local heap data + H5FD_MEM_OHDR = 6, # Object header data + H5FD_MEM_NTYPES # Sentinel value - must be last + + cdef enum H5O_type_t: + H5O_TYPE_UNKNOWN = -1 # Unknown object type + H5O_TYPE_GROUP # Object is a group + H5O_TYPE_DATASET # Object is a dataset + H5O_TYPE_NAMED_DATATYPE # Object is a named data type + + cdef enum H5L_type_t: + H5L_TYPE_ERROR = -1 # Invalid link type id + H5L_TYPE_HARD = 0 # Hard link id + H5L_TYPE_SOFT = 1 # Soft link id + H5L_TYPE_EXTERNAL = 64, # External link id + + # Values for fill value status + cdef enum H5D_fill_value_t: + H5D_FILL_VALUE_ERROR = -1 + H5D_FILL_VALUE_UNDEFINED = 0 + H5D_FILL_VALUE_DEFAULT = 1 + H5D_FILL_VALUE_USER_DEFINED = 2 + + # HDF5 layouts + cdef enum H5D_layout_t: + H5D_LAYOUT_ERROR = -1 + H5D_COMPACT = 0 # raw data is very small + H5D_CONTIGUOUS = 1 # the default + H5D_CHUNKED = 2 # slow and fancy + H5D_NLAYOUTS = 3 # this one must be last! + + # Byte orders + cdef enum H5T_order_t: + H5T_ORDER_ERROR = -1 # error + H5T_ORDER_LE = 0 # little endian + H5T_ORDER_BE = 1 # bit endian + H5T_ORDER_VAX = 2 # VAX mixed endian + H5T_ORDER_NONE = 3 # no particular order (strings, bits,..) + + # HDF5 signed enums + cdef enum H5T_sign_t: + H5T_SGN_ERROR = -1 # error + H5T_SGN_NONE = 0 # this is an unsigned type + H5T_SGN_2 = 1 # two's complement + H5T_NSGN = 2 # this must be last! + + # HDF5 type classes + cdef enum H5T_class_t: + H5T_NO_CLASS = -1 # error + H5T_INTEGER = 0 # integer types + H5T_FLOAT = 1 # floating-point types + H5T_TIME = 2 # date and time types + H5T_STRING = 3 # character string types + H5T_BITFIELD = 4 # bit field types + H5T_OPAQUE = 5 # opaque types + H5T_COMPOUND = 6 # compound types + H5T_REFERENCE = 7 # reference types + H5T_ENUM = 8 # enumeration types + H5T_VLEN = 9 # variable-length types + H5T_ARRAY = 10 # array types + H5T_NCLASSES # this must be last + + # Native types + hid_t H5T_C_S1 + hid_t H5T_NATIVE_B8 + hid_t H5T_NATIVE_CHAR + hid_t H5T_NATIVE_SCHAR + hid_t H5T_NATIVE_UCHAR + hid_t H5T_NATIVE_SHORT + hid_t H5T_NATIVE_USHORT + hid_t H5T_NATIVE_INT + hid_t H5T_NATIVE_UINT + hid_t H5T_NATIVE_LONG + hid_t H5T_NATIVE_ULONG + hid_t H5T_NATIVE_LLONG + hid_t H5T_NATIVE_ULLONG + hid_t H5T_NATIVE_FLOAT + hid_t H5T_NATIVE_DOUBLE + hid_t H5T_NATIVE_LDOUBLE + + # "Standard" types + hid_t H5T_STD_I8LE + hid_t H5T_STD_I16LE + hid_t H5T_STD_I32LE + hid_t H5T_STD_I64LE + hid_t H5T_STD_U8LE + hid_t H5T_STD_U16LE + hid_t H5T_STD_U32LE + hid_t H5T_STD_U64LE + hid_t H5T_STD_B8LE + hid_t H5T_STD_B16LE + hid_t H5T_STD_B32LE + hid_t H5T_STD_B64LE + hid_t H5T_IEEE_F32LE + hid_t H5T_IEEE_F64LE + hid_t H5T_STD_I8BE + hid_t H5T_STD_I16BE + hid_t H5T_STD_I32BE + hid_t H5T_STD_I64BE + hid_t H5T_STD_U8BE + hid_t H5T_STD_U16BE + hid_t H5T_STD_U32BE + hid_t H5T_STD_U64BE + hid_t H5T_STD_B8BE + hid_t H5T_STD_B16BE + hid_t H5T_STD_B32BE + hid_t H5T_STD_B64BE + hid_t H5T_IEEE_F32BE + hid_t H5T_IEEE_F64BE + + # Types which are particular to UNIX (for Time types) + hid_t H5T_UNIX_D32LE + hid_t H5T_UNIX_D64LE + hid_t H5T_UNIX_D32BE + hid_t H5T_UNIX_D64BE + + # The order to retrieve atomic native datatype + cdef enum H5T_direction_t: + H5T_DIR_DEFAULT = 0 # default direction is inscendent + H5T_DIR_ASCEND = 1 # in inscendent order + H5T_DIR_DESCEND = 2 # in descendent order + + # Codes for defining selections + cdef enum H5S_seloper_t: + H5S_SELECT_NOOP = -1 + H5S_SELECT_SET = 0 + H5S_SELECT_OR + H5S_SELECT_AND + H5S_SELECT_XOR + H5S_SELECT_NOTB + H5S_SELECT_NOTA + H5S_SELECT_APPEND + H5S_SELECT_PREPEND + H5S_SELECT_INVALID # Must be the last one + + # Character set to use for text strings + cdef enum H5T_cset_t: + H5T_CSET_ERROR = -1 # error + H5T_CSET_ASCII = 0 # US ASCII + H5T_CSET_UTF8 = 1 # UTF-8 Unicode encoding + H5T_CSET_RESERVED_2 = 2 + H5T_CSET_RESERVED_3 = 3 + H5T_CSET_RESERVED_4 = 4 + H5T_CSET_RESERVED_5 = 5 + H5T_CSET_RESERVED_6 = 6 + H5T_CSET_RESERVED_7 = 7 + H5T_CSET_RESERVED_8 = 8 + H5T_CSET_RESERVED_9 = 9 + H5T_CSET_RESERVED_10 = 10 + H5T_CSET_RESERVED_11 = 11 + H5T_CSET_RESERVED_12 = 12 + H5T_CSET_RESERVED_13 = 13 + H5T_CSET_RESERVED_14 = 14 + H5T_CSET_RESERVED_15 = 15 + + # Error stack traversal direction + cdef enum H5E_direction_t: + H5E_WALK_UPWARD = 0 # begin deep, end at API function + H5E_WALK_DOWNWARD = 1 # begin at API function, end deep + + cdef enum H5E_type_t: + H5E_MAJOR + H5E_MINOR + + ctypedef struct H5E_error_t: + hid_t cls_id # class ID + hid_t maj_num # major error ID + hid_t min_num # minor error number + unsigned line # line in file where error occurs + const char *func_name # function in which error occurred + const char *file_name # file in which error occurred + const char *desc # optional supplied description + + ctypedef herr_t (*H5E_walk_t)(unsigned n, H5E_error_t *err, void *data) + ctypedef herr_t (*H5E_auto_t)(hid_t estack, void *data) + + # object info + ctypedef struct H5O_info_t: + unsigned long fileno # Number of file where object is located + haddr_t addr # Object address in file + H5O_type_t type # Basic object type + unsigned rc # Reference count of object + time_t atime # Access time + time_t mtime # Modification time + time_t ctime # Change time + time_t btime # Birth time + hsize_t num_attrs # number of attributes attached to object + #H5O_hdr_info_t hdr # Object header information + #struct { + # H5_ih_info_t obj + # H5_ih_info_t attr + #} meta_size + + + #------------------------------------------------------------------ + + # HDF5 API + + # Version functions + herr_t H5get_libversion(unsigned *majnum, unsigned *minnum, + unsigned *relnum ) + herr_t H5check_version(unsigned majnum, unsigned minnum, + unsigned relnum ) + + # misc + herr_t H5free_memory(void *buf) + + # Operations with files + hid_t H5Fcreate(char *filename, unsigned int flags, + hid_t create_plist, hid_t access_plist) + hid_t H5Fopen(char *name, unsigned flags, hid_t access_id) + herr_t H5Fclose (hid_t file_id) + htri_t H5Fis_hdf5(char *name) + herr_t H5Fflush(hid_t object_id, H5F_scope_t scope) + herr_t H5Fget_vfd_handle(hid_t file_id, hid_t fapl_id, void **file_handle) + ssize_t H5Fget_file_image(hid_t file_id, void *buf_ptr, size_t buf_len) + herr_t H5Fget_filesize(hid_t file_id, hsize_t *size) + hid_t H5Fget_create_plist(hid_t file_id) + + # Operations with groups + hid_t H5Gcreate(hid_t loc_id, char *name, hid_t lcpl_id, hid_t gcpl_id, + hid_t gapl_id) + hid_t H5Gopen(hid_t loc_id, char *name, hid_t gapl_id) + herr_t H5Gclose(hid_t group_id) + + # Operations with links + herr_t H5Ldelete(hid_t file_id, char *name, hid_t lapl_id) + herr_t H5Lmove(hid_t src_loc_id, char *src_name, + hid_t dst_loc_id, char *dst_name, hid_t lcpl, hid_t lap) + + # For dealing with datasets + hid_t H5Dopen(hid_t file_id, char *name, hid_t dapl_id) + herr_t H5Dclose(hid_t dset_id) + herr_t H5Dread(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id, + hid_t file_space_id, hid_t plist_id, void *buf) + herr_t H5Dwrite(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id, + hid_t file_space_id, hid_t plist_id, void *buf) + hid_t H5Dget_type(hid_t dset_id) + hid_t H5Dget_space(hid_t dset_id) + herr_t H5Dvlen_reclaim(hid_t type_id, hid_t space_id, hid_t plist_id, + void *buf) + hid_t H5Dget_create_plist(hid_t dataset_id) + hsize_t H5Dget_storage_size(hid_t dataset_id) + herr_t H5Dvlen_get_buf_size(hid_t dataset_id, hid_t type_id, hid_t space_id, + hsize_t *size) + herr_t H5Dget_chunk_info_by_coord(hid_t dset_id, const hsize_t *offset, + unsigned *filter_mask, + haddr_t *addr, + hsize_t *size) + herr_t H5Dread_chunk(hid_t dset_id, hid_t dxpl_id, const hsize_t *offset, + uint32_t *filters, void *buf) + herr_t H5Dwrite_chunk(hid_t dset_id, hid_t dxpl_id, uint32_t filters, + const hsize_t *offset, size_t data_size, + const void *buf) + + # Functions for dealing with dataspaces + hid_t H5Screate_simple(int rank, hsize_t dims[], hsize_t maxdims[]) + int H5Sget_simple_extent_ndims(hid_t space_id) + int H5Sget_simple_extent_dims(hid_t space_id, hsize_t dims[], + hsize_t maxdims[]) + herr_t H5Sselect_all(hid_t spaceid) + herr_t H5Sselect_hyperslab(hid_t space_id, H5S_seloper_t op, + hsize_t start[], hsize_t _stride[], + hsize_t count[], hsize_t _block[]) + herr_t H5Sselect_elements(hid_t space_id, H5S_seloper_t op, + size_t num_elements, hsize_t *coord) + herr_t H5Sclose(hid_t space_id) + + + # Functions for dealing with datatypes + H5T_class_t H5Tget_class(hid_t type_id) + hid_t H5Tget_super(hid_t type) + H5T_sign_t H5Tget_sign(hid_t type_id) + H5T_order_t H5Tget_order(hid_t type_id) + size_t H5Tget_size(hid_t type_id) + herr_t H5Tset_size(hid_t type_id, size_t size) + size_t H5Tget_precision(hid_t dtype_id) + herr_t H5Tset_precision(hid_t type_id, size_t prec) + hid_t H5Tcreate(H5T_class_t type, size_t size) + hid_t H5Tvlen_create(hid_t base_type_id) + hid_t H5Tcopy(hid_t type_id) + herr_t H5Tclose(hid_t type_id) + htri_t H5Tequal(hid_t dtype_id1, hid_t dtype_id2) + + # Operations defined on string data types + htri_t H5Tis_variable_str(hid_t dtype_id) + + # Operations for compound data types + int H5Tget_nmembers(hid_t type_id) + char *H5Tget_member_name(hid_t type_id, unsigned membno) + hid_t H5Tget_member_type(hid_t type_id, unsigned membno) + hid_t H5Tget_native_type(hid_t type_id, H5T_direction_t direction) + herr_t H5Tget_member_value(hid_t type_id, int membno, void *value) + size_t H5Tget_member_offset(hid_t type_id, unsigned memb_no) + int H5Tget_offset(hid_t type_id) + herr_t H5Tinsert(hid_t parent_id, char *name, size_t offset, + hid_t member_id) + herr_t H5Tpack(hid_t type_id) + + # Operations for enumerated data types + hid_t H5Tenum_create(hid_t base_id) + herr_t H5Tenum_insert(hid_t type, char *name, void *value) + + # Operations for array data types + hid_t H5Tarray_create(hid_t base_id, int ndims, hsize_t dims[]) + int H5Tget_array_ndims(hid_t type_id) + int H5Tget_array_dims(hid_t type_id, hsize_t dims[]) + + # Operations with attributes + herr_t H5Adelete(hid_t loc_id, char *name) + int H5Aget_num_attrs(hid_t loc_id) + size_t H5Aget_name(hid_t attr_id, size_t buf_size, char *buf) + hid_t H5Aopen_idx(hid_t loc_id, unsigned int idx) + herr_t H5Aread(hid_t attr_id, hid_t mem_type_id, void *buf) + herr_t H5Aclose(hid_t attr_id) + + # Operations with properties + hid_t H5Pcreate(hid_t plist_id) + herr_t H5Pclose(hid_t plist_id) + herr_t H5Pset_cache(hid_t plist_id, int mdc_nelmts, int rdcc_nelmts, + size_t rdcc_nbytes, double rdcc_w0) + herr_t H5Pset_sieve_buf_size(hid_t fapl_id, hsize_t size) + H5D_layout_t H5Pget_layout(hid_t plist) + int H5Pget_chunk(hid_t plist, int max_ndims, hsize_t *dims) + + hid_t H5Pget_driver(hid_t plist_id) + herr_t H5Pset_fapl_sec2(hid_t fapl_id) + #herr_t H5Pget_fapl_direct(hid_t fapl_id, size_t *alignment, + # size_t *block_size, size_t *cbuf_size) + #herr_t H5Pset_fapl_direct(hid_t fapl_id, size_t alignment, + # size_t block_size, size_t cbuf_size) + herr_t H5Pset_fapl_log(hid_t fapl_id, const char *logfile, + unsigned long long flags, size_t buf_size) + #herr_t H5Pset_fapl_windows(hid_t fapl_id) + herr_t H5Pset_fapl_stdio(hid_t fapl_id) + #herr_t H5Pget_fapl_core(hid_t fapl_id, size_t *increment, + # hbool_t *backing_store) + herr_t H5Pset_fapl_core(hid_t fapl_id, size_t increment, + hbool_t backing_store) + #herr_t H5Pget_fapl_family(hid_t fapl_id, hsize_t *memb_size, + # hid_t *memb_fapl_id) + herr_t H5Pset_fapl_family(hid_t fapl_id, hsize_t memb_size, + hid_t memb_fapl_id) + #herr_t H5Pget_fapl_multi(hid_t fapl_id, H5FD_mem_t *memb_map, + # hid_t *memb_fapl, const char **memb_name, + # haddr_t *memb_addr, hbool_t *relax) + herr_t H5Pset_fapl_multi(hid_t fapl_id, H5FD_mem_t *memb_map, + hid_t *memb_fapl, char **memb_name, + haddr_t *memb_addr, hbool_t relax) + herr_t H5Pset_fapl_split(hid_t fapl_id, char *meta_ext, + hid_t meta_plist_id, char *raw_ext, + hid_t raw_plist_id) + #herr_t H5Pget_fapl_mpio(hid_t fapl_id, MPI_Comm *comm, MPI_Info *info) + #herr_t H5Pset_fapl_mpio(hid_t fapl_id, MPI_Comm comm, MPI_Info info) + + #herr_t H5Pget_fapl_mpiposix(hid_t fapl_id, MPI_Comm *comm, + # hbool_t *use_gpfs_hints) + #herr_t H5Pset_fapl_mpiposix(hid_t fapl_id, MPI_Comm comm, + # hbool_t use_gpfs_hints) + herr_t H5Pset_file_image(hid_t fapl_id, void *buf_ptr, size_t buf_len) + herr_t H5Pget_userblock(hid_t plist, hsize_t *size) + herr_t H5Pset_userblock(hid_t plist, hsize_t size) + herr_t H5Pget_obj_track_times(hid_t ocpl_id, hbool_t *track_times) + + # Error Handling Interface + #herr_t H5Eget_auto(hid_t estack_id, H5E_auto_t *func, void** data) + herr_t H5Eset_auto(hid_t estack_id, H5E_auto_t func, void *data) + herr_t H5Eprint(hid_t estack_id, FILE *stream) + herr_t H5Ewalk(hid_t estack_id, H5E_direction_t dir, H5E_walk_t func, + void *data) + #hid_t H5Eget_current_stack(void) + #herr_t H5Eclose_stack(hid_t estack_id) + #ssize_t H5Eget_num(hid_t estack_id) + ssize_t H5Eget_msg(hid_t mesg_id, H5E_type_t* mesg_type, char* mesg, + size_t size) + #herr_t H5Eclose_msg(hid_t mesg_id) + #ssize_t H5Eget_class_name(hid_t class_id, char* name, size_t size) + + # Onject interface + herr_t H5Oget_info(hid_t object_id, H5O_info_t *object_info) + + # Operations with filters and compression interface + ctypedef int H5Z_filter_t + + #herr_t H5Zregister(const void *cls) + herr_t H5Zunregister(H5Z_filter_t id) + #htri_t H5Zfilter_avail(H5Z_filter_t id) + #herr_t H5Zget_filter_info(H5Z_filter_t, unsigned int*) + + # Operations on the references + H5I_type_t H5Iget_type(hid_t id) + herr_t H5Rcreate(void *reference, hid_t loc_id, const char *name, H5R_type_t type, hid_t space_id) + hid_t H5Rdereference(hid_t dset, hid_t oapl_id, H5R_type_t rtype, const void *reference) + herr_t H5Oclose( hid_t object_id ) + + +# Specific HDF5 functions for PyTables +cdef extern from "H5ATTR.h" nogil: + herr_t H5ATTRget_attribute(hid_t loc_id, char *attr_name, + hid_t type_id, void *data) + hsize_t H5ATTRget_attribute_string(hid_t loc_id, char *attr_name, + char **attr_value, int *cset) + hsize_t H5ATTRget_attribute_vlen_string_array(hid_t loc_id, char *attr_name, + char ***attr_value, int *cset) + herr_t H5ATTRset_attribute(hid_t obj_id, char *attr_name, + hid_t type_id, size_t rank, hsize_t *dims, + char *attr_data) + herr_t H5ATTRset_attribute_string(hid_t loc_id, char *attr_name, + char *attr_data, hsize_t attr_size, + int cset) + herr_t H5ATTRfind_attribute(hid_t loc_id, char *attr_name) + herr_t H5ATTRget_type_ndims(hid_t loc_id, char *attr_name, + hid_t *type_id, H5T_class_t *class_id, + size_t *type_size, int *rank) + herr_t H5ATTRget_dims(hid_t loc_id, char *attr_name, hsize_t *dims) + + +# Functions for operations with ARRAY +cdef extern from "H5ARRAY.h" nogil: + herr_t H5ARRAYget_ndims(hid_t dataset_id, int *rank) + herr_t H5ARRAYget_info(hid_t dataset_id, hid_t type_id, hsize_t *dims, + hsize_t *maxdims, H5T_class_t *super_class_id, + char *byteorder) + + +# Some utilities +cdef extern from "utils.h" nogil: + herr_t set_cache_size(hid_t file_id, size_t cache_size) + int get_objinfo(hid_t loc_id, char *name) + int get_linkinfo(hid_t loc_id, char *name) + hsize_t get_len_of_range(hsize_t lo, hsize_t hi, hsize_t step) + hid_t create_ieee_float16(char *byteorder) + hid_t create_ieee_complex64(char *byteorder) + hid_t create_ieee_complex128(char *byteorder) + hid_t create_ieee_complex192(char *byteorder) + hid_t create_ieee_complex256(char *byteorder) + herr_t set_order(hid_t type_id, char *byteorder) + herr_t get_order(hid_t type_id, char *byteorder) + int is_complex(hid_t type_id) + herr_t truncate_dset(hid_t dataset_id, int maindim, hsize_t size) + + # compatibility + herr_t pt_H5Pset_fapl_direct(hid_t fapl_id, size_t alignment, + size_t block_size, size_t cbuf_size) + herr_t pt_H5Pset_fapl_windows(hid_t fapl_id) + + int H5_HAVE_DIRECT_DRIVER, H5_HAVE_WINDOWS_DRIVER, H5_HAVE_IMAGE_FILE + + +cdef extern from "utils.h": + object Giterate(hid_t parent_id, hid_t loc_id, char *name) + object Aiterate(hid_t loc_id) + object H5UIget_info(hid_t loc_id, char *name, char *byteorder) + + +# Type conversion routines +cdef extern from "typeconv.h" nogil: + void conv_float64_timeval32(void *base, + unsigned long byteoffset, + unsigned long bytestride, + long long nrecords, + unsigned long nelements, + int sense) + +# Blosc2 registration +cdef extern from "blosc2_filter.h" nogil: + int register_blosc2(char **version, char **date) + int FILTER_BLOSC2 + + +# Blosc registration +cdef extern from "blosc_filter.h" nogil: + int register_blosc(char **version, char **date) + int FILTER_BLOSC diff --git a/venv/Lib/site-packages/tables/description.py b/venv/Lib/site-packages/tables/description.py new file mode 100644 index 0000000..21208e3 --- /dev/null +++ b/venv/Lib/site-packages/tables/description.py @@ -0,0 +1,1047 @@ +"""Classes for describing columns for ``Table`` objects.""" + +from __future__ import annotations + +import copy +import warnings +from typing import Any, Literal +from collections.abc import Callable, Generator, Sequence + +import numpy as np +import numpy.typing as npt + +from . import atom +from .path import check_name_validity + +__docformat__ = "reStructuredText" +"""The format of documentation strings in this module.""" + + +def same_position( + oldmethod: Callable[[Col, Col], bool], +) -> Callable[[Col, Col], bool]: + """Decorate `oldmethod` to also compare the `_v_pos` attribute.""" + + def newmethod(self: Col, other: Col) -> bool: + try: + other._v_pos + except AttributeError: + return False # not a column definition + return self._v_pos == other._v_pos and oldmethod(self, other) + + newmethod.__name__ = oldmethod.__name__ + newmethod.__doc__ = oldmethod.__doc__ + return newmethod + + +class Col(atom.Atom, metaclass=type): + """Defines a non-nested column. + + Col instances are used as a means to declare the different properties of a + non-nested column in a table or nested column. Col classes are descendants + of their equivalent Atom classes (see :ref:`AtomClassDescr`), but their + instances have an additional _v_pos attribute that is used to decide the + position of the column inside its parent table or nested column (see the + IsDescription class in :ref:`IsDescriptionClassDescr` for more information + on column positions). + + In the same fashion as Atom, you should use a particular Col descendant + class whenever you know the exact type you will need when writing your + code. Otherwise, you may use one of the Col.from_*() factory methods. + + Each factory method inherited from the Atom class is available with the + same signature, plus an additional pos parameter (placed in last position) + which defaults to None and that may take an integer value. This parameter + might be used to specify the position of the column in the table. + + Besides, there are the next additional factory methods, available only for + Col objects. + + The following parameters are available for most Col-derived constructors. + + Parameters + ---------- + itemsize : int + For types with a non-fixed size, this sets the size in bytes of + individual items in the column. + shape : tuple + Sets the shape of the column. An integer shape of N is equivalent to + the tuple (N,). + dflt + Sets the default value for the column. + pos : int + Sets the position of column in table. If unspecified, the position + will be randomly selected. + attrs : dict + Attribute metadata stored in the column (see + :ref:`AttributeSetClassDescr`). + + """ + + # filled as column classes are created + _class_from_prefix: dict[str, type[Col]] = {} + """Maps column prefixes to column classes.""" + + @classmethod + def prefix(cls) -> str: + """Return the column class prefix.""" + cname = cls.__name__ + return cname[: cname.rfind("Col")] + + @classmethod + def from_atom( + cls, + atom: atom.Atom, + pos: int | None = None, + _offset: int | None = None, + ) -> Col: + """Create a Col definition from a PyTables atom. + + An optional position may be specified as the pos argument. + + """ + prefix = atom.prefix() + kwargs = atom._get_init_args() + colclass = cls._class_from_prefix[prefix] + return colclass(pos=pos, _offset=_offset, **kwargs) + + @classmethod + def from_sctype( + cls, + sctype: str | np.dtype, + shape: tuple[int, ...] = (), + dflt: Any | None = None, + pos: int | None = None, + ) -> Col: + """Create a `Col` definition from a NumPy scalar type `sctype`. + + Optional shape, default value and position may be specified as + the `shape`, `dflt` and `pos` arguments, respectively. + Information in the `sctype` not represented in a `Col` is + ignored. + + """ + newatom = atom.Atom.from_sctype(sctype, shape, dflt) + return cls.from_atom(newatom, pos=pos) + + @classmethod + def from_dtype( + cls, + dtype, + dflt: Any | None = None, + pos: int | None = None, + _offset: int | None = None, + ) -> Col: + """Create a `Col` definition from a NumPy `dtype`. + + Optional default value and position may be specified as the + `dflt` and `pos` arguments, respectively. The `dtype` must have + a byte order which is irrelevant or compatible with that of the + system. Information in the `dtype` not represented in a `Col` + is ignored. + + """ + newatom = atom.Atom.from_dtype(dtype, dflt) + return cls.from_atom(newatom, pos=pos, _offset=_offset) + + @classmethod + def from_type( + cls, + type_, + shape: tuple[int, ...] = (), + dflt: Any | None = None, + pos: int | None = None, + ) -> Col: + """Create a `Col` definition from a PyTables `type`. + + Optional shape, default value and position may be specified as + the `shape`, `dflt` and `pos` arguments, respectively. + + """ + newatom = atom.Atom.from_type(type_, shape, dflt) + return cls.from_atom(newatom, pos=pos) + + @classmethod + def from_kind( + cls, + kind, + itemsize=None, + shape: tuple[int, ...] = (), + dflt: Any | None = None, + pos: int | None = None, + ) -> Col: + """Create a `Col` definition from a PyTables `kind`. + + Optional item size, shape, default value and position may be + specified as the `itemsize`, `shape`, `dflt` and `pos` + arguments, respectively. Bear in mind that not all columns + support a default item size. + + """ + newatom = atom.Atom.from_kind(kind, itemsize, shape, dflt) + return cls.from_atom(newatom, pos=pos) + + @classmethod + def _subclass_from_prefix(cls, prefix: str) -> type[Col]: + """Get a column subclass for the given `prefix`.""" + cname = "%sCol" % prefix + class_from_prefix = cls._class_from_prefix + if cname in class_from_prefix: + return class_from_prefix[cname] + atombase = getattr(atom, "%sAtom" % prefix) + + class NewCol(cls, atombase): + """Defines a non-nested column of a particular type. + + The constructor accepts the same arguments as the equivalent + `Atom` class, plus an additional ``pos`` argument for + position information, which is assigned to the `_v_pos` + attribute and an ``attrs`` argument for storing additional metadata + similar to `table.attrs`, which is assigned to the `_v_col_attrs` + attribute. + + """ + + def __init__(self, *args, **kwargs) -> None: + pos = kwargs.pop("pos", None) + col_attrs = kwargs.pop("attrs", {}) + offset = kwargs.pop("_offset", None) + class_from_prefix = self._class_from_prefix + atombase.__init__(self, *args, **kwargs) + # The constructor of an abstract atom may have changed + # the class of `self` to something different of `NewCol` + # and `atombase` (that's why the prefix map is saved). + if self.__class__ is not NewCol: + colclass = class_from_prefix[self.prefix()] + self.__class__ = colclass + self._v_pos = pos + self._v_offset = offset + self._v_col_attrs = col_attrs + + __eq__ = same_position(atombase.__eq__) + _is_equal_to_atom = same_position(atombase._is_equal_to_atom) + + # XXX: API incompatible change for PyTables 3 line + # Overriding __eq__ blocks inheritance of __hash__ in 3.x + # def __hash__(self): + # return hash((self._v_pos, self.atombase)) + + if prefix == "Enum": + _is_equal_to_enumatom = same_position( + atombase._is_equal_to_enumatom + ) + + NewCol.__name__ = cname + + class_from_prefix[prefix] = NewCol + return NewCol + + def __repr__(self) -> str: + # Reuse the atom representation. + atomrepr = super().__repr__() + lpar = atomrepr.index("(") + rpar = atomrepr.rindex(")") + atomargs = atomrepr[lpar + 1 : rpar] + classname = self.__class__.__name__ + if self._v_col_attrs: + return ( + f"{classname}({atomargs}, pos={self._v_pos}" + f", attrs={self._v_col_attrs})" + ) + return f"{classname}({atomargs}, pos={self._v_pos})" + + def _get_init_args(self) -> dict[str, Any]: + """Get a dictionary of instance constructor arguments.""" + kwargs = {arg: getattr(self, arg) for arg in ("shape", "dflt")} + kwargs["pos"] = getattr(self, "_v_pos", None) + return kwargs + + +def _generate_col_classes() -> Generator[type[Col]]: + """Generate all column classes.""" + # Abstract classes are not in the class map. + cprefixes = ["Int", "UInt", "Float", "Time"] + for kind, kdata in atom.atom_map.items(): + if hasattr(kdata, "kind"): # atom class: non-fixed item size + atomclass = kdata + cprefixes.append(atomclass.prefix()) + else: # dictionary: fixed item size + for atomclass in kdata.values(): + cprefixes.append(atomclass.prefix()) + + # Bottom-level complex classes are not in the type map, of course. + # We still want the user to get the compatibility warning, though. + cprefixes.extend(["Complex32", "Complex64", "Complex128"]) + if hasattr(atom, "Complex192Atom"): + cprefixes.append("Complex192") + if hasattr(atom, "Complex256Atom"): + cprefixes.append("Complex256") + + for cprefix in cprefixes: + newclass = Col._subclass_from_prefix(cprefix) + yield newclass + + +# Create all column classes. +# for _newclass in _generate_col_classes(): +# exec('%s = _newclass' % _newclass.__name__) +# del _newclass + +StringCol = Col._subclass_from_prefix("String") +BoolCol = Col._subclass_from_prefix("Bool") +EnumCol = Col._subclass_from_prefix("Enum") +IntCol = Col._subclass_from_prefix("Int") +Int8Col = Col._subclass_from_prefix("Int8") +Int16Col = Col._subclass_from_prefix("Int16") +Int32Col = Col._subclass_from_prefix("Int32") +Int64Col = Col._subclass_from_prefix("Int64") +UIntCol = Col._subclass_from_prefix("UInt") +UInt8Col = Col._subclass_from_prefix("UInt8") +UInt16Col = Col._subclass_from_prefix("UInt16") +UInt32Col = Col._subclass_from_prefix("UInt32") +UInt64Col = Col._subclass_from_prefix("UInt64") + +FloatCol = Col._subclass_from_prefix("Float") +if hasattr(atom, "Float16Atom"): + Float16Col = Col._subclass_from_prefix("Float16") +Float32Col = Col._subclass_from_prefix("Float32") +Float64Col = Col._subclass_from_prefix("Float64") +if hasattr(atom, "Float96Atom"): + Float96Col = Col._subclass_from_prefix("Float96") +if hasattr(atom, "Float128Atom"): + Float128Col = Col._subclass_from_prefix("Float128") + +ComplexCol = Col._subclass_from_prefix("Complex") +Complex32Col = Col._subclass_from_prefix("Complex32") +Complex64Col = Col._subclass_from_prefix("Complex64") +Complex128Col = Col._subclass_from_prefix("Complex128") +if hasattr(atom, "Complex192Atom"): + Complex192Col = Col._subclass_from_prefix("Complex192") +if hasattr(atom, "Complex256Atom"): + Complex256Col = Col._subclass_from_prefix("Complex256") + +TimeCol = Col._subclass_from_prefix("Time") +Time32Col = Col._subclass_from_prefix("Time32") +Time64Col = Col._subclass_from_prefix("Time64") + + +# Table description classes +# ========================= +class Description: + """This class represents descriptions of the structure of tables. + + An instance of this class is automatically bound to Table (see + :ref:`TableClassDescr`) objects when they are created. It provides a + browseable representation of the structure of the table, made of non-nested + (Col - see :ref:`ColClassDescr`) and nested (Description) columns. + + Column definitions under a description can be accessed as attributes of it + (*natural naming*). For instance, if table.description is a Description + instance with a column named col1 under it, the later can be accessed as + table.description.col1. If col1 is nested and contains a col2 column, this + can be accessed as table.description.col1.col2. Because of natural naming, + the names of members start with special prefixes, like in the Group class + (see :ref:`GroupClassDescr`). + + .. rubric:: Description attributes + + .. attribute:: _v_colobjects + + A dictionary mapping the names of the columns hanging + directly from the associated table or nested column to their + respective descriptions (Col - see :ref:`ColClassDescr` or + Description - see :ref:`DescriptionClassDescr` instances). + + .. versionchanged:: 3.0 + The *_v_colObjects* attribute has been renamed into + *_v_colobjects*. + + .. attribute:: _v_dflts + + A dictionary mapping the names of non-nested columns + hanging directly from the associated table or nested column + to their respective default values. + + .. attribute:: _v_dtype + + The NumPy type which reflects the structure of this + table or nested column. You can use this as the + dtype argument of NumPy array factories. + + .. attribute:: _v_dtypes + + A dictionary mapping the names of non-nested columns + hanging directly from the associated table or nested column + to their respective NumPy types. + + .. attribute:: _v_is_nested + + Whether the associated table or nested column contains + further nested columns or not. + + .. attribute:: _v_itemsize + + The size in bytes of an item in this table or nested column. + + .. attribute:: _v_name + + The name of this description group. The name of the + root group is '/'. + + .. attribute:: _v_names + + A list of the names of the columns hanging directly + from the associated table or nested column. The order of the + names matches the order of their respective columns in the + containing table. + + .. attribute:: _v_nested_descr + + A nested list of pairs of (name, format) tuples for all the columns + under this table or nested column. You can use this as the dtype and + descr arguments of NumPy array factories. + + .. versionchanged:: 3.0 + The *_v_nestedDescr* attribute has been renamed into + *_v_nested_descr*. + + .. attribute:: _v_nested_formats + + A nested list of the NumPy string formats (and shapes) of all the + columns under this table or nested column. You can use this as the + formats argument of NumPy array factories. + + .. versionchanged:: 3.0 + The *_v_nestedFormats* attribute has been renamed into + *_v_nested_formats*. + + .. attribute:: _v_nestedlvl + + The level of the associated table or nested column in the nested + datatype. + + .. attribute:: _v_nested_names + + A nested list of the names of all the columns under this table or + nested column. You can use this as the names argument of NumPy array + factories. + + .. versionchanged:: 3.0 + The *_v_nestedNames* attribute has been renamed into + *_v_nested_names*. + + .. attribute:: _v_pathname + + Pathname of the table or nested column. + + .. attribute:: _v_pathnames + + A list of the pathnames of all the columns under this table or nested + column (in preorder). If it does not contain nested columns, this is + exactly the same as the :attr:`Description._v_names` attribute. + + .. attribute:: _v_types + + A dictionary mapping the names of non-nested columns hanging directly + from the associated table or nested column to their respective PyTables + types. + + .. attribute:: _v_offsets + + A list of offsets for all the columns. If the list is empty, means + that there are no padding in the data structure. However, the support + for offsets is currently limited to flat tables; for nested tables, the + potential padding is always removed (exactly the same as in pre-3.5 + versions), and this variable is set to empty. + + .. versionadded:: 3.5 + Previous to this version all the compound types were converted + internally to 'packed' types, i.e. with no padding between the + component types. Starting with 3.5, the holes in native HDF5 + types (non-nested) are honored and replicated during dataset + and attribute copies. + """ + + def __init__( + self, + classdict: dict[str, Any], + nestedlvl: int = -1, + validate: bool = True, + ptparams: dict[str, Any] | None = None, + ) -> None: + + if not classdict: + raise ValueError("cannot create an empty data type") + + # Do a shallow copy of classdict just in case this is going to + # be shared by other instances + newdict = self.__dict__ + newdict["_v_name"] = "/" # The name for root descriptor + newdict["_v_names"] = [] + newdict["_v_dtypes"] = {} + newdict["_v_types"] = {} + newdict["_v_dflts"] = {} + newdict["_v_colobjects"] = {} + newdict["_v_is_nested"] = False + nested_formats = [] + nested_dtype = [] + + if not hasattr(newdict, "_v_nestedlvl"): + newdict["_v_nestedlvl"] = nestedlvl + 1 + + cols_with_pos = [] # colum (position, name) pairs + cols_no_pos = [] # just column names + cols_offsets = [] # the offsets of the columns + valid_offsets = False # by default there a no valid offsets + + # Check for special variables and convert column descriptions + for name, descr in classdict.items(): + if name.startswith("_v_"): + if name in newdict: + # print("Warning!") + # special methods &c: copy to newdict, warn about conflicts + warnings.warn( + f"Can't set attr {name!r} in description " + f"class {self!r}" + ) + else: + # print("Special variable!-->", name, classdict[name]) + newdict[name] = descr + continue # This variable is not needed anymore + + columns = None + if type(descr) is type(IsDescription) and issubclass( + descr, IsDescription + ): + # print("Nested object (type I)-->", name) + columns = descr().columns + elif type(descr.__class__) is type(IsDescription) and issubclass( + descr.__class__, IsDescription + ): + # print("Nested object (type II)-->", name) + columns = descr.columns + elif isinstance(descr, dict): + # print("Nested object (type III)-->", name) + columns = descr + else: + # print("Nested object (type IV)-->", name) + descr = copy.copy(descr) + # The copies above and below ensure that the structures + # provided by the user will remain unchanged even if we + # tamper with the values of ``_v_pos`` here. + if columns is not None: + descr = Description( + copy.copy(columns), self._v_nestedlvl, ptparams=ptparams + ) + classdict[name] = descr + + pos = getattr(descr, "_v_pos", None) + if pos is None: + cols_no_pos.append(name) + else: + cols_with_pos.append((pos, name)) + offset = getattr(descr, "_v_offset", None) + if offset is not None: + cols_offsets.append(offset) + + # Sort field names: + # + # 1. Fields with explicit positions, according to their + # positions (and their names if coincident). + # 2. Fields with no position, in alphabetical order. + cols_with_pos.sort() + cols_no_pos.sort() + keys = [name for (pos, name) in cols_with_pos] + cols_no_pos + + pos = 0 + nested = False + # Get properties for compound types + for k in keys: + if validate: + # Check for key name validity + check_name_validity(k) + # Class variables + obj = classdict[k] + newdict[k] = obj # To allow natural naming + if not isinstance(obj, (Col, Description)): + raise TypeError( + f"Passing an incorrect value to a table column." + f" Expected a Col (or subclass) instance and " + f'got: "{obj}". Please make use of the Col(), or ' + f"descendant, constructor to properly " + f"initialize columns." + ) + obj._v_pos = pos # Set the position of this object + obj._v_parent = self # The parent description + pos += 1 + newdict["_v_colobjects"][k] = obj + newdict["_v_names"].append(k) + obj.__dict__["_v_name"] = k + + if not isinstance(k, str): + # numpy only accepts "str" for field names + # Python 3.x: bytes --> str (unicode) + kk = k.decode() + else: + kk = k + + if isinstance(obj, Col): + dtype = obj.dtype + newdict["_v_dtypes"][k] = dtype + newdict["_v_types"][k] = obj.type + newdict["_v_dflts"][k] = obj.dflt + nested_formats.append(obj.recarrtype) + baserecarrtype = dtype.base.str[1:] + nested_dtype.append((kk, baserecarrtype, dtype.shape)) + else: # A description + nested_formats.append(obj._v_nested_formats) + nested_dtype.append((kk, obj._v_dtype)) + nested = True + + # Useful for debugging purposes + # import traceback + # if ptparams is None: + # print("*** print_stack:") + # traceback.print_stack() + + # Check whether we are gonna use padding or not. Two possibilities: + # 1) Make padding True by default (except if ALLOW_PADDING is set + # to False) + # 2) Make padding False by default (except if ALLOW_PADDING is set + # to True) + # Currently we choose 1) because it favours honoring padding even on + # unhandled situations (should be very few). + # However, for development, option 2) is recommended as it catches + # most of the unhandled situations. + allow_padding = ptparams is None or ptparams["ALLOW_PADDING"] + # allow_padding = ptparams is not None and ptparams['ALLOW_PADDING'] + if ( + allow_padding + and len(cols_offsets) > 1 + and len(keys) == len(cols_with_pos) + and len(keys) == len(cols_offsets) + and not nested + ): # TODO: support offsets with nested types + # We have to sort the offsets too, as they must follow the column + # order. As the offsets and the pos should be place in the same + # order, a single sort is enough here. + cols_offsets.sort() + valid_offsets = True + else: + newdict["_v_offsets"] = [] + + # Assign the format list to _v_nested_formats + newdict["_v_nested_formats"] = nested_formats + + if self._v_nestedlvl == 0: + # Get recursively nested _v_nested_names and _v_nested_descr attrs + self._g_set_nested_names_descr() + # Get pathnames for nested groups + self._g_set_path_names() + # Check the _v_byteorder has been used an issue an Error + if hasattr(self, "_v_byteorder"): + raise ValueError( + "Using a ``_v_byteorder`` in the description is obsolete. " + "Use the byteorder parameter in the constructor instead." + ) + + # Compute the dtype with offsets or without + # print("offsets ->", cols_offsets, nestedDType, nested, valid_offsets) + if valid_offsets: + # TODO: support offsets within nested types + dtype_fields = { + "names": newdict["_v_names"], + "formats": nested_formats, + "offsets": cols_offsets, + } + itemsize = newdict.get("_v_itemsize", None) + if itemsize is not None: + dtype_fields["itemsize"] = itemsize + dtype = np.dtype(dtype_fields) + else: + dtype = np.dtype(nested_dtype) + newdict["_v_dtype"] = dtype + newdict["_v_itemsize"] = dtype.itemsize + newdict["_v_offsets"] = [dtype.fields[name][1] for name in dtype.names] + + def _g_set_nested_names_descr(self) -> None: + """Compute the nested names and descriptions for nested datatypes.""" + names = self._v_names + fmts = self._v_nested_formats + self._v_nested_names = names[:] # Important to do a copy! + self._v_nested_descr = list(zip(names, fmts)) + for i, name in enumerate(names): + new_object = self._v_colobjects[name] + if isinstance(new_object, Description): + new_object._g_set_nested_names_descr() + # replace the column nested name by a correct tuple + self._v_nested_names[i] = (name, new_object._v_nested_names) + self._v_nested_descr[i] = (name, new_object._v_nested_descr) + # set the _v_is_nested flag + self._v_is_nested = True + + def _g_set_path_names(self) -> None: + """Compute the pathnames for arbitrary nested descriptions. + + This method sets the ``_v_pathname`` and ``_v_pathnames`` + attributes of all the elements (both descriptions and columns) + in this nested description. + + """ + + def get_cols_in_order(description: Description) -> list[Col]: + return [ + description._v_colobjects[colname] + for colname in description._v_names + ] + + def join_paths(path1: str, path2: str) -> str: + if not path1: + return path2 + return f"{path1}/{path2}" + + # The top of the stack always has a nested description + # and a list of its child columns + # (be they nested ``Description`` or non-nested ``Col`` objects). + # In the end, the list contains only a list of column paths + # under this one. + # + # For instance, given this top of the stack:: + # + # (, [, ]) + # + # After computing the rest of the stack, the top is:: + # + # (, ['a', 'a/m', 'a/n', ... , 'b', ...]) + + stack: list[tuple[Description, list[Col]]] = [] + + # We start by pushing the top-level description + # and its child columns. + self._v_pathname = "" + stack.append((self, get_cols_in_order(self))) + + while stack: + desc, cols = stack.pop() + head = cols[0] + + # What's the first child in the list? + if isinstance(head, Description): + # A nested description. We remove it from the list and + # push it with its child columns. This will be the next + # handled description. + head._v_pathname = join_paths(desc._v_pathname, head._v_name) + stack.append((desc, cols[1:])) # alter the top + stack.append((head, get_cols_in_order(head))) # new top + elif isinstance(head, Col): + # A non-nested column. We simply remove it from the + # list and append its name to it. + head._v_pathname = join_paths(desc._v_pathname, head._v_name) + cols.append(head._v_name) # alter the top + stack.append((desc, cols[1:])) # alter the top + else: + # Since paths and names are appended *to the end* of + # children lists, a string signals that no more children + # remain to be processed, so we are done with the + # description at the top of the stack. + assert isinstance(head, str) + # Assign the computed set of descendent column paths. + desc._v_pathnames = cols + if len(stack) > 0: + # Compute the paths with respect to the parent node + # (including the path of the current description) + # and append them to its list. + desc_name = desc._v_name + col_paths = [join_paths(desc_name, path) for path in cols] + col_paths.insert(0, desc_name) + parent_cols = stack[-1][1] + parent_cols.extend(col_paths) + # (Nothing is pushed, we are done with this description.) + + def _f_walk( + self, + type: Literal["All", "Col", "Description"] = "All", # noqa: A002 + ) -> Generator[Col | Description]: + """Iterate over nested columns. + + If type is 'All' (the default), all column description objects (Col and + Description instances) are yielded in top-to-bottom order (preorder). + + If type is 'Col' or 'Description', only column descriptions of that + type are yielded. + + """ + if type not in ["All", "Col", "Description"]: + raise ValueError( + "type can only take the parameters 'All', 'Col' or " + "'Description'." + ) + + stack: list[Description] = [self] + while stack: + obj = stack.pop(0) # pop at the front so as to ensure the order + if type in ["All", "Description"]: + yield obj # yield description + for name in obj._v_names: + new_object = obj._v_colobjects[name] + if isinstance(new_object, Description): + stack.append(new_object) + else: + if type in ["All", "Col"]: + yield new_object # yield column + + def __repr__(self) -> str: + """Give a detailed Description column representation.""" + rep = [ + f'{" " * self._v_nestedlvl}"{k}": {self._v_colobjects[k]!r}' + for k in self._v_names + ] + return "{\n %s}" % (",\n ".join(rep)) + + def __str__(self) -> str: + """Give a brief Description representation.""" + return f"Description({self._v_nested_descr})" + + +class MetaIsDescription(type): + """Helper metaclass to return the class variables as a dictionary.""" + + def __new__( + cls, classname: str, bases: Sequence, classdict: dict[str, Any] + ) -> MetaIsDescription: + """Return a new class with a "columns" attribute filled.""" + newdict = { + "columns": {}, + } + if "__doc__" in classdict: + newdict["__doc__"] = classdict["__doc__"] + for b in bases: + if "columns" in b.__dict__: + newdict["columns"].update(b.__dict__["columns"]) + for k in classdict: + # if not (k.startswith('__') or k.startswith('_v_')): + # We let pass _v_ variables to configure class behaviour + if not (k.startswith("__")): + newdict["columns"][k] = classdict[k] + + # Return a new class with the "columns" attribute filled + return type.__new__(cls, classname, bases, newdict) + + +class IsDescription(metaclass=MetaIsDescription): + """Description of the structure of a table or nested column. + + This class is designed to be used as an easy, yet meaningful way to + describe the structure of new Table (see :ref:`TableClassDescr`) datasets + or nested columns through the definition of *derived classes*. In order to + define such a class, you must declare it as descendant of IsDescription, + with as many attributes as columns you want in your table. The name of each + attribute will become the name of a column, and its value will hold a + description of it. + + Ordinary columns can be described using instances of the Col class (see + :ref:`ColClassDescr`). Nested columns can be described by using classes + derived from IsDescription, instances of it, or name-description + dictionaries. Derived classes can be declared in place (in which case the + column takes the name of the class) or referenced by name. + + Nested columns can have a _v_pos special attribute which sets the + *relative* position of the column among sibling columns *also having + explicit positions*. The pos constructor argument of Col instances is used + for the same purpose. Columns with no explicit position will be placed + afterwards in alphanumeric order. + + Once you have created a description object, you can pass it to the Table + constructor, where all the information it contains will be used to define + the table structure. + + .. rubric:: IsDescription attributes + + .. attribute:: _v_pos + + Sets the position of a possible nested column description among its + sibling columns. This attribute can be specified *when declaring* + an IsDescription subclass to complement its *metadata*. + + .. attribute:: columns + + Maps the name of each column in the description to its own descriptive + object. This attribute is *automatically created* when an IsDescription + subclass is declared. Please note that declared columns can no longer + be accessed as normal class variables after its creation. + + """ + + +def descr_from_dtype( + dtype_: npt.DTypeLike, ptparams: dict[str, Any] | None = None +) -> tuple[Description, str]: + """Get a description instance and byteorder from a (nested) NumPy dtype.""" + fields = {} + fbyteorder = "|" + for name in dtype_.names: + dtype, offset = dtype_.fields[name][:2] + kind = dtype.base.kind + byteorder = dtype.base.byteorder + if byteorder in "><=": + if fbyteorder not in ["|", byteorder]: + raise NotImplementedError( + "structured arrays with mixed byteorders " + "are not supported yet, sorry" + ) + fbyteorder = byteorder + # Non-nested column + if kind in "biufSUc": + col = Col.from_dtype(dtype, pos=offset, _offset=offset) + # Nested column + elif kind == "V" and dtype.shape in [(), (1,)]: + if dtype.shape != (): + warnings.warn( + "nested descriptions will be converted to scalar" + ) + col, _ = descr_from_dtype(dtype.base, ptparams=ptparams) + col._v_pos = offset + col._v_offset = offset + else: + raise NotImplementedError( + "structured arrays with columns with type description ``%s`` " + "are not supported yet, sorry" % dtype + ) + fields[name] = col + + return Description(fields, ptparams=ptparams), fbyteorder + + +def dtype_from_descr( + descr: dict | type[IsDescription] | IsDescription, + byteorder: str | None = None, + ptparams: dict[str, Any] | None = None, +) -> np.dtype: + """Get a (nested) NumPy dtype from a description instance and byteorder. + + The descr parameter can be a Description or IsDescription + instance, sub-class of IsDescription or a dictionary. + + """ + if isinstance(descr, dict): + descr = Description(descr, ptparams=ptparams) + elif type(descr) is type(IsDescription) and issubclass( + descr, IsDescription + ): + descr = Description(descr().columns, ptparams=ptparams) + elif isinstance(descr, IsDescription): + descr = Description(descr.columns, ptparams=ptparams) + elif not isinstance(descr, Description): + raise ValueError(f"invalid description: {descr!r}") + + dtype_ = descr._v_dtype + + if byteorder and byteorder != "|": + dtype_ = dtype_.newbyteorder(byteorder) + + return dtype_ + + +if __name__ == "__main__": + """Test code.""" + + class Info(IsDescription): # noqa: D101 + _v_pos = 2 + Name = UInt32Col() + Value = Float64Col() + + class Test(IsDescription): + """A description that has several columns.""" + + x = Col.from_type("int32", 2, 0, pos=0) + y = Col.from_kind("float", dflt=1, shape=(2, 3)) + z = UInt8Col(dflt=1) + color = StringCol(2, dflt=" ") + # color = UInt32Col(2) + Info = Info() + + class LInfo(IsDescription): # noqa: D106 + _v_pos = 1 + name = UInt32Col() + value = Float64Col(pos=0) + y2 = Col.from_kind("float", dflt=1, shape=(2, 3), pos=1) + z2 = UInt8Col(dflt=1) + + class LInfo2(IsDescription): # noqa: D106 + y3 = Col.from_kind("float", dflt=1, shape=(2, 3)) + z3 = UInt8Col(dflt=1) + name = UInt32Col() + value = Float64Col() + + class LInfo3(IsDescription): # noqa: D106 + name = UInt32Col() + value = Float64Col() + y4 = Col.from_kind("float", dflt=1, shape=(2, 3)) + z4 = UInt8Col(dflt=1) + + # class Info(IsDescription): + # _v_pos = 2 + # Name = StringCol(itemsize=2) + # Value = ComplexCol(itemsize=16) + + # class Test(IsDescription): + # """A description that has several columns""" + # x = Col.from_type("int32", 2, 0, pos=0) + # y = Col.from_kind('float', dflt=1, shape=(2,3)) + # z = UInt8Col(dflt=1) + # color = StringCol(2, dflt=" ") + # Info = Info() + # class info(IsDescription): + # _v_pos = 1 + # name = StringCol(itemsize=2) + # value = ComplexCol(itemsize=16, pos=0) + # y2 = Col.from_kind('float', dflt=1, shape=(2,3), pos=1) + # z2 = UInt8Col(dflt=1) + # class info2(IsDescription): + # y3 = Col.from_kind('float', dflt=1, shape=(2,3)) + # z3 = UInt8Col(dflt=1) + # name = StringCol(itemsize=2) + # value = ComplexCol(itemsize=16) + # class info3(IsDescription): + # name = StringCol(itemsize=2) + # value = ComplexCol(itemsize=16) + # y4 = Col.from_kind('float', dflt=1, shape=(2,3)) + # z4 = UInt8Col(dflt=1) + + # example cases of class Test + klass = Test() + # klass = Info() + desc = Description(klass.columns) + print("Description representation (short) ==>", desc) + print("Description representation (long) ==>", repr(desc)) + print("Column names ==>", desc._v_names) + print("Column x ==>", desc.x) + print("Column Info ==>", desc.Info) + print("Column Info.value ==>", desc.Info.Value) + print("Nested column names ==>", desc._v_nested_names) + print("Defaults ==>", desc._v_dflts) + print("Nested Formats ==>", desc._v_nested_formats) + print("Nested Descriptions ==>", desc._v_nested_descr) + print("Nested Descriptions (info) ==>", desc.info._v_nested_descr) + print("Total size ==>", desc._v_dtype.itemsize) + + # check _f_walk + for obj in desc._f_walk(): + if isinstance(obj, Description): + print("******begin object*************", end=" ") + print("name -->", obj._v_name) + # print("name -->", object._v_dtype.name) + # print("object childs-->", object._v_names) + # print("object nested childs-->", object._v_nested_names) + print("totalsize-->", obj._v_dtype.itemsize) + else: + # pass + print("leaf -->", obj._v_name, obj.dtype) + + class TestDescParent(IsDescription): # noqa: D101 + c = Int32Col() + + class TestDesc(TestDescParent): # noqa: D101 + pass + + assert "c" in TestDesc.columns diff --git a/venv/Lib/site-packages/tables/earray.py b/venv/Lib/site-packages/tables/earray.py new file mode 100644 index 0000000..7f7ce50 --- /dev/null +++ b/venv/Lib/site-packages/tables/earray.py @@ -0,0 +1,293 @@ +"""Here is defined the EArray class.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING +from collections.abc import Sequence + +import numpy as np +import numpy.typing as npt + +from .utils import convert_to_np_atom2, SizeType +from .carray import CArray + +if TYPE_CHECKING: + from .atom import Atom + from .group import Group + from .filters import Filters + +# default version for EARRAY objects +# obversion = "1.0" # initial version +# obversion = "1.1" # support for complex datatypes +# obversion = "1.2" # This adds support for time datatypes. +# obversion = "1.3" # This adds support for enumerated datatypes. +obversion = "1.4" # Numeric and numarray flavors are gone. + + +class EArray(CArray): + r"""This class represents extendable, homogeneous datasets in an HDF5 file. + + The main difference between an EArray and a CArray (see + :ref:`CArrayClassDescr`), from which it inherits, is that the former + can be enlarged along one of its dimensions, the *enlargeable + dimension*. That means that the :attr:`Leaf.extdim` attribute (see + :class:`Leaf`) of any EArray instance will always be non-negative. + Multiple enlargeable dimensions might be supported in the future. + + New rows can be added to the end of an enlargeable array by using the + :meth:`EArray.append` method. + + Parameters + ---------- + parentnode + The parent :class:`Group` object. + + .. versionchanged:: 3.0 + Renamed from *parentNode* to *parentnode*. + + name : str + The name of this node in its parent group. + + atom + An `Atom` instance representing the *type* and *shape* + of the atomic objects to be saved. + + shape + The shape of the new array. One (and only one) of + the shape dimensions *must* be 0. The dimension being 0 + means that the resulting `EArray` object can be extended + along it. Multiple enlargeable dimensions are not supported + right now. + + title + A description for this node (it sets the ``TITLE`` + HDF5 attribute on disk). + + filters + An instance of the `Filters` class that provides information + about the desired I/O filters to be applied during the life + of this object. + + expectedrows + A user estimate about the number of row elements that will + be added to the growable dimension in the `EArray` node. + If not provided, the default value is ``EXPECTED_ROWS_EARRAY`` + (see ``tables/parameters.py``). If you plan to create either + a much smaller or a much bigger `EArray` try providing a guess; + this will optimize the HDF5 B-Tree creation and management + process time and the amount of memory used. + + chunkshape + The shape of the data chunk to be read or written in a single + HDF5 I/O operation. Filters are applied to those chunks of data. + The dimensionality of `chunkshape` must be the same as that of + `shape` (beware: no dimension should be 0 this time!). + If ``None``, a sensible value is calculated based on the + `expectedrows` parameter (which is recommended). + + byteorder + The byteorder of the data *on disk*, specified as 'little' or + 'big'. If this is not specified, the byteorder is that of the + platform. + + track_times + Whether time data associated with the leaf are recorded (object + access time, raw data modification time, metadata change time, object + birth time); default True. Semantics of these times depend on their + implementation in the HDF5 library: refer to documentation of the + H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata + change time) is implemented. + + .. versionadded:: 3.4.3 + + Examples + -------- + See below a small example of the use of the `EArray` class. The + code is available in ``examples/earray1.py``:: + + import numpy as np + import tables as tb + + fileh = tb.open_file('earray1.h5', mode='w') + a = tb.StringAtom(itemsize=8) + + # Use ``a`` as the object type for the enlargeable array. + array_c = fileh.create_earray(fileh.root, 'array_c', a, (0,), + \"Chars\") + array_c.append(np.array(['a'*2, 'b'*4], dtype='S8')) + array_c.append(np.array(['a'*6, 'b'*8, 'c'*10], dtype='S8')) + + # Read the string ``EArray`` we have created on disk. + for s in array_c: + print('array_c[%s] => %r' % (array_c.nrow, s)) + # Close the file. + fileh.close() + + The output for the previous script is something like:: + + array_c[0] => 'aa' + array_c[1] => 'bbbb' + array_c[2] => 'aaaaaa' + array_c[3] => 'bbbbbbbb' + array_c[4] => 'cccccccc' + + """ + + # Class identifier. + _c_classid = "EARRAY" + + def __init__( + self, + parentnode: Group, + name: str, + atom: Atom | None = None, + shape: Sequence[int] | None = None, + title: str = "", + filters: Filters | None = None, + expectedrows: int | None = None, + chunkshape: tuple[int, ...] | None = None, + byteorder: str | None = None, + _log: bool = True, + track_times: bool = True, + ) -> None: + + # Specific of EArray + if expectedrows is None: + expectedrows = parentnode._v_file.params["EXPECTED_ROWS_EARRAY"] + self._v_expectedrows = int(expectedrows) + """The expected number of rows to be stored in the array.""" + + # Call the parent (CArray) init code + super().__init__( + parentnode, + name, + atom, + shape, + title, + filters, + chunkshape, + byteorder, + _log, + track_times, + ) + + def _g_create(self) -> int: + """Create a new array in file (specific part).""" + # Pre-conditions and extdim computation + zerodims = np.sum(np.array(self.shape) == 0) + if zerodims > 0: + if zerodims == 1: + self.extdim = list(self.shape).index(0) + else: + raise NotImplementedError( + "Multiple enlargeable (0-)dimensions are not " "supported." + ) + else: + raise ValueError( + "When creating EArrays, you need to set one of " + "the dimensions of the Atom instance to zero." + ) + + # Finish the common part of the creation process + return self._g_create_common(self._v_expectedrows) + + def _check_shape_append(self, nparr: np.ndarray) -> None: + """Test that nparr shape is consistent with underlying EArray.""" + # Does the array conform to self expandibility? + myrank = len(self.shape) + narank = len(nparr.shape) - len(self.atom.shape) + if myrank != narank: + raise ValueError( + ( + "the ranks of the appended object (%d) and the " + "``%s`` EArray (%d) differ" + ) + % (narank, self._v_pathname, myrank) + ) + for i in range(myrank): + if i != self.extdim and self.shape[i] != nparr.shape[i]: + raise ValueError( + ( + "the shapes of the appended object and the " + "``%s`` EArray differ in non-enlargeable " + "dimension %d" + ) + % (self._v_pathname, i) + ) + + def append(self, sequence: npt.ArrayLike) -> None: + """Add a sequence of data to the end of the dataset. + + The sequence must have the same type as the array; otherwise a + TypeError is raised. In the same way, the dimensions of the + sequence must conform to the shape of the array, that is, all + dimensions must match, with the exception of the enlargeable + dimension, which can be of any length (even 0!). If the shape + of the sequence is invalid, a ValueError is raised. + + """ + self._g_check_open() + self._v_file._check_writable() + + # Convert the sequence into a NumPy object + nparr = convert_to_np_atom2(sequence, self.atom) + # Check if it has a consistent shape with underlying EArray + self._check_shape_append(nparr) + # If the size of the nparr is zero, don't do anything else + if nparr.size > 0: + self._append(nparr) + + def _g_copy_with_stats( + self, + group: Group, + name: str, + start: int, + stop: int, + step: int, + title: str, + filters: Filters | None, + chunkshape: tuple[int, ...] | None, + _log: bool, + **kwargs, + ) -> tuple[EArray, int]: + """Private part of Leaf.copy() for each kind of leaf.""" + start, stop, step = self._process_range_read(start, stop, step) + # Build the new EArray object + maindim = self.maindim + shape = list(self.shape) + shape[maindim] = 0 + # The number of final rows + nrows = len(range(start, stop, step)) + # Build the new EArray object + obj = EArray( + group, + name, + atom=self.atom, + shape=shape, + title=title, + filters=filters, + expectedrows=nrows, + chunkshape=chunkshape, + _log=_log, + ) + # Now, fill the new earray with values from source + nrowsinbuf = self.nrowsinbuf + # The slices parameter for self.__getitem__ + slices = [slice(0, dim, 1) for dim in self.shape] + # This is a hack to prevent doing unnecessary conversions + # when copying buffers + self._v_convert = False + # Start the copy itself + for start2 in range(start, stop, step * nrowsinbuf): + # Save the records on disk + stop2 = start2 + step * nrowsinbuf + if stop2 > stop: + stop2 = stop + # Set the proper slice in the extensible dimension + slices[maindim] = slice(start2, stop2, step) + obj._append(self.__getitem__(tuple(slices))) + # Active the conversion again (default) + self._v_convert = True + nbytes = np.prod(self.shape, dtype=SizeType) * self.atom.itemsize + + return (obj, nbytes) diff --git a/venv/Lib/site-packages/tables/exceptions.py b/venv/Lib/site-packages/tables/exceptions.py new file mode 100644 index 0000000..6409020 --- /dev/null +++ b/venv/Lib/site-packages/tables/exceptions.py @@ -0,0 +1,467 @@ +"""Declare exceptions and warnings that are specific to PyTables.""" + +from __future__ import annotations + +import os +import warnings +import traceback +from collections.abc import Callable + +__all__ = [ + "ChunkError", + "ClosedFileError", + "ClosedNodeError", + "DataTypeWarning", + "ExperimentalFeatureWarning", + "FileModeError", + "FiltersWarning", + "FlavorError", + "FlavorWarning", + "HDF5ExtError", + "NaturalNameWarning", + "NoSuchChunkError", + "NoSuchNodeError", + "NodeError", + "NotChunkedError", + "NotChunkAlignedError", + "OldIndexWarning", + "PerformanceWarning", + "UnclosedFileWarning", + "UndoRedoError", + "UndoRedoWarning", +] + + +__docformat__ = "reStructuredText" +"""The format of documentation strings in this module.""" + + +class HDF5ExtError(RuntimeError): + """A low level HDF5 operation failed. + + This exception is raised the low level PyTables components used for + accessing HDF5 files. It usually signals that something is not + going well in the HDF5 library or even at the Input/Output level. + + Errors in the HDF5 C library may be accompanied by an extensive + HDF5 back trace on standard error (see also + :func:`tables.silence_hdf5_messages`). + + .. versionchanged:: 2.4 + + Parameters + ---------- + message + error message + h5bt + This parameter (keyword only) controls the HDF5 back trace + handling. Any keyword arguments other than h5bt is ignored. + + * if set to False the HDF5 back trace is ignored and the + :attr:`HDF5ExtError.h5backtrace` attribute is set to None + * if set to True the back trace is retrieved from the HDF5 + library and stored in the :attr:`HDF5ExtError.h5backtrace` + attribute as a list of tuples + * if set to "VERBOSE" (default) the HDF5 back trace is + stored in the :attr:`HDF5ExtError.h5backtrace` attribute + and also included in the string representation of the + exception + * if not set (or set to None) the default policy is used + (see :attr:`HDF5ExtError.DEFAULT_H5_BACKTRACE_POLICY`) + + """ + + # NOTE: in order to avoid circular dependencies between modules the + # _dump_h5_backtrace method is set at initialization time in + # the utilsextension.pyx. + _dump_h5_backtrace: ( + Callable[[], list[tuple[str, int, str, str]]] | None + ) = None + + DEFAULT_H5_BACKTRACE_POLICY = "VERBOSE" + """Default policy for HDF5 backtrace handling + + * if set to False the HDF5 back trace is ignored and the + :attr:`HDF5ExtError.h5backtrace` attribute is set to None + * if set to True the back trace is retrieved from the HDF5 + library and stored in the :attr:`HDF5ExtError.h5backtrace` + attribute as a list of tuples + * if set to "VERBOSE" (default) the HDF5 back trace is + stored in the :attr:`HDF5ExtError.h5backtrace` attribute + and also included in the string representation of the + exception + + This parameter can be set using the + :envvar:`PT_DEFAULT_H5_BACKTRACE_POLICY` environment variable. + Allowed values are "IGNORE" (or "FALSE"), "SAVE" (or "TRUE") and + "VERBOSE" to set the policy to False, True and "VERBOSE" + respectively. The special value "DEFAULT" can be used to reset + the policy to the default value + + .. versionadded:: 2.4 + """ + + @classmethod + def set_policy_from_env(cls) -> str: + """Set the policy from environment variables.""" + envmap = { + "IGNORE": False, + "FALSE": False, + "SAVE": True, + "TRUE": True, + "VERBOSE": "VERBOSE", + "DEFAULT": "VERBOSE", + } + oldvalue = cls.DEFAULT_H5_BACKTRACE_POLICY + envvalue = os.environ.get("PT_DEFAULT_H5_BACKTRACE_POLICY", "DEFAULT") + try: + newvalue = envmap[envvalue.upper()] + except KeyError: + warnings.warn( + "Invalid value for the environment variable " + "'PT_DEFAULT_H5_BACKTRACE_POLICY'. The default " + "policy for HDF5 back trace management in PyTables " + "will be: '%s'" % oldvalue + ) + else: + cls.DEFAULT_H5_BACKTRACE_POLICY = newvalue + + return oldvalue + + def __init__(self, *args, **kargs) -> None: + + super().__init__(*args) + + self._h5bt_policy = kargs.get("h5bt", self.DEFAULT_H5_BACKTRACE_POLICY) + + if self._h5bt_policy and self._dump_h5_backtrace is not None: + self.h5backtrace = self._dump_h5_backtrace() + """HDF5 back trace. + + Contains the HDF5 back trace as a (possibly empty) list of + tuples. Each tuple has the following format:: + + (filename, line number, function name, text) + + Depending on the value of the *h5bt* parameter passed to the + initializer the h5backtrace attribute can be set to None. + This means that the HDF5 back trace has been simply ignored + (not retrieved from the HDF5 C library error stack) or that + there has been an error (silently ignored) during the HDF5 back + trace retrieval. + + .. versionadded:: 2.4 + + See Also + -------- + traceback.format_list : :func:`traceback.format_list` + + """ + + # XXX: check _dump_h5_backtrace failures + else: + self.h5backtrace = None + + def __str__(self) -> str: + """Return a sting representation of the exception. + + The actual result depends on policy set in the initializer + :meth:`HDF5ExtError.__init__`. + + .. versionadded:: 2.4 + + """ + verbose = bool(self._h5bt_policy in ("VERBOSE", "verbose")) + + if verbose and self.h5backtrace: + bt = "\n".join( + [ + "HDF5 error back trace\n", + self.format_h5_backtrace(), + "End of HDF5 error back trace", + ] + ) + + if len(self.args) == 1 and isinstance(self.args[0], str): + msg = super().__str__() + msg = f"{bt}\n\n{msg}" + elif self.h5backtrace[-1][-1]: + msg = f"{bt}\n\n{self.h5backtrace[-1][-1]}" + else: + msg = bt + else: + msg = super().__str__() + + return msg + + def format_h5_backtrace( + self, backtrace: list[tuple[str, int, str, str]] | None = None + ) -> str: + """Convert the HDF5 trace back into a string. + + The HDF5 trace back is represented as a list of tuples. + + See :attr:`HDF5ExtError.h5backtrace`. + + .. versionadded:: 2.4 + + """ + if backtrace is None: + backtrace = self.h5backtrace + + if backtrace is None: + return "No HDF5 back trace available" + else: + return "".join(traceback.format_list(backtrace)) + + +# Initialize the policy for HDF5 back trace handling +HDF5ExtError.set_policy_from_env() + + +# The following exceptions are concretions of the ``ValueError`` exceptions +# raised by ``file`` objects on certain operations. + + +class ClosedNodeError(ValueError): + """The operation can not be completed because the node is closed. + + For instance, listing the children of a closed group is not allowed. + + """ + + pass + + +class ClosedFileError(ValueError): + """The operation can not be completed because the hosting file is closed. + + For instance, getting an existing node from a closed file is not + allowed. + + """ + + pass + + +class FileModeError(ValueError): + """FIle mode error. + + The operation can not be carried out because the mode in which the + hosting file is opened is not adequate. + + For instance, removing an existing leaf from a read-only file is not + allowed. + + """ + + pass + + +class NodeError(AttributeError, LookupError): + """Invalid hierarchy manipulation operation requested. + + This exception is raised when the user requests an operation on the + hierarchy which can not be run because of the current layout of the + tree. This includes accessing nonexistent nodes, moving or copying + or creating over an existing node, non-recursively removing groups + with children, and other similarly invalid operations. + + A node in a PyTables database cannot be simply overwritten by + replacing it. Instead, the old node must be removed explicitly + before another one can take its place. This is done to protect + interactive users from inadvertently deleting whole trees of data by + a single erroneous command. + + """ + + pass + + +class NoSuchNodeError(NodeError): + """An operation was requested on a node that does not exist. + + This exception is raised when an operation gets a path name or a + ``(where, name)`` pair leading to a nonexistent node. + + """ + + pass + + +class UndoRedoError(Exception): + """Problems with doing/redoing actions with Undo/Redo feature. + + This exception indicates a problem related to the Undo/Redo + mechanism, such as trying to undo or redo actions with this + mechanism disabled, or going to a nonexistent mark. + + """ + + pass + + +class UndoRedoWarning(Warning): + """Issued when an action not supporting Undo/Redo is run. + + This warning is only shown when the Undo/Redo mechanism is enabled. + + """ + + pass + + +class NaturalNameWarning(Warning): + """Issued when a non-pythonic name is given for a node. + + This is not an error and may even be very useful in certain + contexts, but one should be aware that such nodes cannot be + accessed using natural naming (instead, ``getattr()`` must be + used explicitly). + """ + + pass + + +class PerformanceWarning(Warning): + """Warning for operations which may cause a performance drop. + + This warning is issued when an operation is made on the database + which may cause it to slow down on future operations (i.e. making + the node tree grow too much). + + """ + + pass + + +class FlavorError(ValueError): + """Unsupported or unavailable flavor or flavor conversion. + + This exception is raised when an unsupported or unavailable flavor + is given to a dataset, or when a conversion of data between two + given flavors is not supported nor available. + + """ + + pass + + +class FlavorWarning(Warning): + """Unsupported or unavailable flavor conversion. + + This warning is issued when a conversion of data between two given + flavors is not supported nor available, and raising an error would + render the data inaccessible (e.g. on a dataset of an unavailable + flavor in a read-only file). + + See the `FlavorError` class for more information. + + """ + + pass + + +class FiltersWarning(Warning): + """Unavailable filters. + + This warning is issued when a valid filter is specified but it is + not available in the system. It may mean that an available default + filter is to be used instead. + + """ + + pass + + +class OldIndexWarning(Warning): + """Unsupported index format. + + This warning is issued when an index in an unsupported format is + found. The index will be marked as invalid and will behave as if + it doesn't exist. + + """ + + pass + + +class DataTypeWarning(Warning): + """Unsupported data type. + + This warning is issued when an unsupported HDF5 data type is found + (normally in a file created with other tool than PyTables). + + """ + + pass + + +class ExperimentalFeatureWarning(Warning): + """Generic warning for experimental features. + + This warning is issued when using a functionality that is still + experimental and that users have to use with care. + + """ + + pass + + +class UnclosedFileWarning(Warning): + """Warning raised when there are still open files at program exit. + + PyTables will close remaining open files at exit, but raise this warning. + """ + + pass + + +class ChunkError(Exception): + """An operation related to direct chunk access failed. + + This exception may be related with the properties of the dataset or the + chunk being accessed, or with how the chunk is being accessed. It is a + base for more specific exceptions. + + """ + + pass + + +class NotChunkedError(ChunkError): + """A direct chunking operation was attempted on a non-chunked dataset. + + For instance, chunk information was requested for a plain ``Array`` + instance. + + """ + + pass + + +class NotChunkAlignedError(ChunkError): + """Coordinate not aligned to the chunks. + + A direct chunk read/write operation was given coordinates that do not + match the chunk's start. + + These operations require coordinates that are integer multiples of the + dataset's chunksize. + + """ + + pass + + +class NoSuchChunkError(ChunkError): + """The chunk with the given coordinates does not exist in storage. + + The coordinates are within the dataset's shape, though. + + This is only an error when the chunk is to be read. Such a missing chunk + can be written, in which case it is created in storage. + + """ + + pass diff --git a/venv/Lib/site-packages/tables/expression.py b/venv/Lib/site-packages/tables/expression.py new file mode 100644 index 0000000..c28cb38 --- /dev/null +++ b/venv/Lib/site-packages/tables/expression.py @@ -0,0 +1,785 @@ +"""Here is defined the Expr class.""" + +from __future__ import annotations + +import sys +import warnings +from typing import Any, Union, TYPE_CHECKING +from collections.abc import Iterator + +import numpy as np +import numexpr as ne + +import tables as tb + +from .exceptions import PerformanceWarning +from .parameters import IO_BUFFER_SIZE, BUFFER_TIMES + +if TYPE_CHECKING: + from .array import Array + from .table import Column + from .tableextension import Row + +ContainerType = Union[np.ndarray, "Array", "Column"] + + +class Expr: + """A class for evaluating expressions with arbitrary array-like objects. + + Expr is a class for evaluating expressions containing array-like objects. + With it, you can evaluate expressions (like "3 * a + 4 * b") that + operate on arbitrary large arrays while optimizing the resources + required to perform them (basically main memory and CPU cache memory). + It is similar to the Numexpr package (see :ref:`[NUMEXPR] `), + but in addition to NumPy objects, it also accepts disk-based homogeneous + arrays, like the Array, CArray, EArray and Column PyTables objects. + + .. warning:: + + Expr class only offers a subset of the Numexpr features due to the + complexity of implement some of them when dealing with huge amount of + data. + + All the internal computations are performed via the Numexpr package, + so all the broadcast and upcasting rules of Numexpr applies here too. + These rules are very similar to the NumPy ones, but with some exceptions + due to the particularities of having to deal with potentially very large + disk-based arrays. Be sure to read the documentation of the Expr + constructor and methods as well as that of Numexpr, if you want to fully + grasp these particularities. + + + Parameters + ---------- + expr : str + This specifies the expression to be evaluated, such as "2 * a + 3 * b". + uservars : dict + This can be used to define the variable names appearing in *expr*. + This mapping should consist of identifier-like strings pointing to any + `Array`, `CArray`, `EArray`, `Column` or NumPy ndarray instances (or + even others which will tried to be converted to ndarrays). When + `uservars` is not provided or `None`, the current local and global + namespace is sought instead of `uservars`. It is also possible to pass + just some of the variables in expression via the `uservars` mapping, + and the rest will be retrieved from the current local and global + namespaces. + kwargs : dict + This is meant to pass additional parameters to the Numexpr kernel. + This is basically the same as the kwargs argument in + Numexpr.evaluate(), and is mainly meant for advanced use. + + Examples + -------- + The following shows an example of using Expr:: + + >>> f = tb.open_file('/tmp/test_expr.h5', 'w') + >>> a = f.create_array('/', 'a', np.array([1,2,3])) + >>> b = f.create_array('/', 'b', np.array([3,4,5])) + >>> c = np.array([4,5,6]) + >>> expr = tb.Expr("2 * a + b * c") # initialize the expression + >>> expr.eval() # evaluate it + array([14, 24, 36], dtype=int64) + >>> sum(expr) # use as an iterator + 74 + + where you can see that you can mix different containers in + the expression (whenever shapes are consistent). + + You can also work with multidimensional arrays:: + + >>> a2 = f.create_array('/', 'a2', np.array([[1,2],[3,4]])) + >>> b2 = f.create_array('/', 'b2', np.array([[3,4],[5,6]])) + >>> c2 = np.array([4,5]) # This will be broadcasted + >>> expr = tb.Expr("2 * a2 + b2-c2") + >>> expr.eval() + array([[1, 3], + [7, 9]], dtype=int64) + >>> sum(expr) + array([ 8, 12], dtype=int64) + >>> f.close() + + .. rubric:: Expr attributes + + .. attribute:: append_mode + + The append mode for user-provided output containers. + + .. attribute:: maindim + + Common main dimension for inputs in expression. + + .. attribute:: names + + The names of variables in expression (list). + + .. attribute:: out + + The user-provided container (if any) for the expression outcome. + + .. attribute:: o_start + + The start range selection for the user-provided output. + + .. attribute:: o_stop + + The stop range selection for the user-provided output. + + .. attribute:: o_step + + The step range selection for the user-provided output. + + .. attribute:: shape + + Common shape for the arrays in expression. + + .. attribute:: values + + The values of variables in expression (list). + + """ + + _exprvars_cache = {} + """Cache of variables participating in expressions. + + .. versionadded:: 3.0 + + """ + + def __init__( + self, + expr: str, + uservars: dict[str, Any] | None = None, + **kwargs, + ) -> None: + + self.append_mode = False + """The append mode for user-provided output containers.""" + self.maindim = 0 + """Common main dimension for inputs in expression.""" + self.names: list[str] = [] + """The names of variables in expression (list).""" + self.out: ContainerType | None = None + """The user-provided container (if any) for the expression outcome.""" + self.o_start: int | None = None + """The start range selection for the user-provided output.""" + self.o_stop: int | None = None + """The stop range selection for the user-provided output.""" + self.o_step: int | None = None + """The step range selection for the user-provided output.""" + self.shape: tuple[int, ...] | None = None + """Common shape for the arrays in expression.""" + self.start, self.stop, self.step = (None,) * 3 + self.start: int | None = None + """The start range selection for the input.""" + self.stop: int | None = None + """The stop range selection for the input.""" + self.step: int | None = None + """The step range selection for the input.""" + self.values: list = [] + """The values of variables in expression (list).""" + + self._compiled_expr: ne.interpreter.NumExpr | None = None + """The compiled expression.""" + self._single_row_out: np.ndarray | None = None + """A sample of the output with just a single row.""" + + # First, get the signature for the arrays in expression + vars_ = self._required_expr_vars(expr, uservars) + context = ne.necompiler.getContext(kwargs) + self.names, _ = ne.necompiler.getExprNames(expr, context) + + # Raise a ValueError in case we have unsupported objects + for name, var in vars_.items(): + if type(var) in (int, float, str): + continue + if not isinstance(var, (tb.Leaf, tb.Column)): + if hasattr(var, "dtype"): + # Quacks like a NumPy object + continue + raise TypeError("Unsupported variable type: %r" % var) + objname = var.__class__.__name__ + if objname not in ("Array", "CArray", "EArray", "Column"): + raise TypeError("Unsupported variable type: %r" % var) + + # NumPy arrays to be copied? (we don't need to worry about + # PyTables objects, as the reads always return contiguous and + # aligned objects, or at least I think so). + for name, var in vars_.items(): + if isinstance(var, np.ndarray): + # See numexpr.necompiler.evaluate for a rational + # of the code below + if not var.flags.aligned: + if var.ndim != 1: + # Do a copy of this variable + var = var.copy() + # Update the vars_ dictionary + vars_[name] = var + + # Get the variables and types + values = self.values + types_ = [] + for name in self.names: + value = vars_[name] + if hasattr(value, "atom"): + types_.append(value.atom) + elif hasattr(value, "dtype"): + types_.append(value) + else: + # try to convert into a NumPy array + value = np.array(value) + types_.append(value) + values.append(value) + + # Create a signature for the expression + signature = [ + (name, ne.necompiler.getType(type_)) + for (name, type_) in zip(self.names, types_) + ] + + # Compile the expression + self._compiled_expr = ne.necompiler.NumExpr(expr, signature, **kwargs) + + # Guess the shape for the outcome and the maindim of inputs + self.shape, self.maindim = self._guess_shape() + + # The next method is similar to their counterpart in `Table`, but + # adapted to the `Expr` own requirements. + def _required_expr_vars( + self, + expression: str, + uservars: dict[str, Any] | None, + depth: int = 2, + ) -> dict[str, Any]: + """Get the variables required by the `expression`. + + A new dictionary defining the variables used in the `expression` + is returned. Required variables are first looked up in the + `uservars` mapping, then in the set of top-level columns of the + table. Unknown variables cause a `NameError` to be raised. + + When `uservars` is `None`, the local and global namespace where + the API callable which uses this method is called is sought + instead. To disable this mechanism, just specify a mapping as + `uservars`. + + Nested columns and variables with an ``uint64`` type are not + allowed (`TypeError` and `NotImplementedError` are raised, + respectively). + + `depth` specifies the depth of the frame in order to reach local + or global variables. + + """ + # Get the names of variables used in the expression. + exprvars_cache = self._exprvars_cache + if expression not in exprvars_cache: + # Protection against growing the cache too much + if len(exprvars_cache) > 256: + # Remove 10 (arbitrary) elements from the cache + for k in list(exprvars_cache)[:10]: + del exprvars_cache[k] + cexpr = compile(expression, "", "eval") + exprvars = [ + var + for var in cexpr.co_names + if var not in ["None", "False", "True"] + and var not in ne.expressions.functions + ] + exprvars_cache[expression] = exprvars + else: + exprvars = exprvars_cache[expression] + + # Get the local and global variable mappings of the user frame + # if no mapping has been explicitly given for user variables. + user_locals, user_globals = {}, {} + if uservars is None: + user_frame = sys._getframe(depth) + user_locals = user_frame.f_locals + user_globals = user_frame.f_globals + + # Look for the required variables first among the ones + # explicitly provided by the user. + reqvars = {} + for var in exprvars: + # Get the value. + if uservars is not None and var in uservars: + val = uservars[var] + elif uservars is None and var in user_locals: + val = user_locals[var] + elif uservars is None and var in user_globals: + val = user_globals[var] + else: + raise NameError("name ``%s`` is not defined" % var) + + # Check the value. + if hasattr(val, "dtype") and val.dtype.str[1:] == "u8": + raise NotImplementedError( + "variable ``%s`` refers to " + "a 64-bit unsigned integer object, that is " + "not yet supported in expressions, sorry; " % var + ) + elif hasattr(val, "_v_colpathnames"): # nested column + # This branch is never reached because the compile step + # above already raise a ``TypeError`` for nested + # columns, but that could change in the future. So it + # is best to let this here. + raise TypeError( + "variable ``%s`` refers to a nested column, " + "not allowed in expressions" % var + ) + reqvars[var] = val + return reqvars + + def set_inputs_range( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + ) -> None: + """Define a range for all inputs in expression. + + The computation will only take place for the range defined by + the start, stop and step parameters in the main dimension of + inputs (or the leading one, if the object lacks the concept of + main dimension, like a NumPy container). If not a common main + dimension exists for all inputs, the leading dimension will be + used instead. + + """ + self.start = start + self.stop = stop + self.step = step + + def set_output( + self, out: ContainerType, append_mode: bool = False + ) -> None: + """Set out as container for output as well as the append_mode. + + The out must be a container that is meant to keep the outcome of + the expression. It should be an homogeneous type container and + can typically be an Array, CArray, EArray, Column or a NumPy ndarray. + + The append_mode specifies the way of which the output is filled. + If true, the rows of the outcome are *appended* to the out container. + Of course, for doing this it is necessary that out would have an + append() method (like an EArray, for example). + + If append_mode is false, the output is set via the __setitem__() + method (see the Expr.set_output_range() for info on how to select + the rows to be updated). If out is smaller than what is required + by the expression, only the computations that are needed to fill + up the container are carried out. If it is larger, the excess + elements are unaffected. + + """ + if not (hasattr(out, "shape") and hasattr(out, "__setitem__")): + raise ValueError( + "You need to pass a settable multidimensional container " + "as output" + ) + self.out = out + if append_mode and not hasattr(out, "append"): + raise ValueError( + "For activating the ``append`` mode, you need a container " + "with an `append()` method (like the `EArray`)" + ) + self.append_mode = append_mode + + def set_output_range( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + ) -> None: + """Define a range for user-provided output object. + + The output object will only be modified in the range specified by the + start, stop and step parameters in the main dimension of output (or the + leading one, if the object does not have the concept of main dimension, + like a NumPy container). + + """ + if self.out is None: + raise IndexError( + "You need to pass an output object to `setOut()` first" + ) + self.o_start = start + self.o_stop = stop + self.o_step = step + + # Although the next code is similar to the method in `Leaf`, it + # allows the use of pure NumPy objects. + def _calc_nrowsinbuf(self, obj: np.ndarray) -> int: + """Calculate the number of rows that will fit in a buffer.""" + # Compute the rowsize for the *leading* dimension + shape_ = list(obj.shape) + if shape_: + shape_[0] = 1 + + rowsize = np.prod(shape_) * obj.dtype.itemsize + + # Compute the nrowsinbuf + # Multiplying the I/O buffer size by 4 gives optimal results + # in my benchmarks with `tables.Expr` (see ``bench/poly.py``) + buffersize = IO_BUFFER_SIZE * 4 + nrowsinbuf = buffersize // rowsize + + # Safeguard against row sizes being extremely large + if nrowsinbuf == 0: + nrowsinbuf = 1 + # If rowsize is too large, issue a Performance warning + maxrowsize = BUFFER_TIMES * buffersize + if rowsize > maxrowsize: + warnings.warn( + f"""\ +The object ``{obj}`` is exceeding the maximum recommended rowsize ({maxrowsize} +bytes); be ready to see PyTables asking for *lots* of memory and +possibly slow I/O. You may want to reduce the rowsize by trimming the +value of dimensions that are orthogonal (and preferably close) to the +*leading* dimension of this object.""", + PerformanceWarning, + stacklevel=2, + ) + + return nrowsinbuf + + def _guess_shape( + self, + ) -> tuple[list[tuple[int, int]], int] | tuple[tuple, None]: + """Guess the shape of the output of the expression.""" + # First, compute the maximum dimension of inputs and maindim + # (if it exists) + maxndim = 0 + maindims = [] + for val in self.values: + # Get the minimum of the lengths + if len(val.shape) > maxndim: + maxndim = len(val.shape) + if hasattr(val, "maindim"): + maindims.append(val.maindim) + if maxndim == 0: + self._single_row_out = out = self._compiled_expr(*self.values) + return (), None + if maindims and [maindims[0]] * len(maindims) == maindims: + # If all maindims detected are the same, use this as maindim + maindim = maindims[0] + else: + # If not, the main dimension will be the default one + maindim = 0 + + # The slices parameter for inputs + slices = (slice(None),) * maindim + (0,) + + # Now, collect the values in first row of arrays with maximum dims + vals = [] + lens = [] + for val in self.values: + shape = val.shape + # Warning: don't use len(val) below or it will raise an + # `Overflow` error on 32-bit platforms for large enough arrays. + if shape != () and shape[maindim] == 0: + vals.append(val[:]) + lens.append(0) + elif len(shape) < maxndim: + vals.append(val) + else: + vals.append(val.__getitem__(slices)) + lens.append(shape[maindim]) + minlen = min(lens) + self._single_row_out = out = self._compiled_expr(*vals) + shape = list(out.shape) + if minlen > 0: + shape.insert(maindim, minlen) + return shape, maindim + + def _get_info( + self, shape: list[int], maindim: int | None, itermode: bool = False + ) -> ( + tuple[int, list[int], int, int, int | None, int] + | tuple[ + int, + list[int], + int, + int, + int | None, + int, + ContainerType, + int, + int, + int, + int, + ] + ): + """Return various info needed for evaluating the computation loop.""" + # Compute the shape of the resulting container having + # in account new possible values of start, stop and step in + # the inputs range + if maindim is not None: + start, stop, step = slice( + self.start, self.stop, self.step + ).indices(shape[maindim]) + shape[maindim] = min(shape[maindim], len(range(start, stop, step))) + i_nrows = shape[maindim] + else: + start, stop, step = 0, 0, None + i_nrows = 0 + + if not itermode: + # Create a container for output if not defined yet + o_maindim = 0 # Default maindim + if self.out is None: + out = np.empty(shape, dtype=self._single_row_out.dtype) + # Get the trivial values for start, stop and step + if maindim is not None: + o_start, o_stop, o_step = (0, shape[maindim], 1) + else: + o_start, o_stop, o_step = (0, 0, 1) + else: + out = self.out + # Out container already provided. Do some sanity checks. + if hasattr(out, "maindim"): + o_maindim = out.maindim + + # Refine the shape of the resulting container having in + # account new possible values of start, stop and step in + # the output range + o_shape = list(out.shape) + s = slice(self.o_start, self.o_stop, self.o_step) + o_start, o_stop, o_step = s.indices(o_shape[o_maindim]) + o_shape[o_maindim] = min( + o_shape[o_maindim], len(range(o_start, o_stop, o_step)) + ) + + # Check that the shape of output is consistent with inputs + tr_oshape = list(o_shape) # this implies a copy + olen_ = tr_oshape.pop(o_maindim) + tr_shape = list(shape) # do a copy + if maindim is not None: + len_ = tr_shape.pop(o_maindim) + else: + len_ = 1 + if tr_oshape != tr_shape: + raise ValueError( + "Shape for out container does not match expression" + ) + # Force the input length to fit in `out` + if not self.append_mode and olen_ < len_: + shape[o_maindim] = olen_ + stop = start + olen_ + + # Get the positions of inputs that should be sliced (the others + # will be broadcasted) + ndim = len(shape) + slice_pos = [ + i for i, val in enumerate(self.values) if len(val.shape) == ndim + ] + + # The size of the I/O buffer + nrowsinbuf = 1 + for i, val in enumerate(self.values): + # Skip scalar values in variables + if i in slice_pos: + nrows = self._calc_nrowsinbuf(val) + if nrows > nrowsinbuf: + nrowsinbuf = nrows + + if not itermode: + return ( + i_nrows, + slice_pos, + start, + stop, + step, + nrowsinbuf, + out, + o_maindim, + o_start, + o_stop, + o_step, + ) + else: + # For itermode, we don't need the out info + return (i_nrows, slice_pos, start, stop, step, nrowsinbuf) + + def eval(self) -> ContainerType: # noqa: A003 + """Evaluate the expression and return the outcome. + + Because of performance reasons, the computation order tries to go along + the common main dimension of all inputs. If not such a common main + dimension is found, the iteration will go along the leading dimension + instead. + + For non-consistent shapes in inputs (i.e. shapes having a different + number of dimensions), the regular NumPy broadcast rules applies. + There is one exception to this rule though: when the dimensions + orthogonal to the main dimension of the expression are consistent, but + the main dimension itself differs among the inputs, then the shortest + one is chosen for doing the computations. This is so because trying to + expand very large on-disk arrays could be too expensive or simply not + possible. + + Also, the regular Numexpr casting rules (which are similar to those of + NumPy, although you should check the Numexpr manual for the exceptions) + are applied to determine the output type. + + Finally, if the set_output() method specifying a user container has + already been called, the output is sent to this user-provided + container. If not, a fresh NumPy container is returned instead. + + .. warning:: + + When dealing with large on-disk inputs, failing to specify an + on-disk container may consume all your available memory. + + """ + values, shape, maindim = self.values, self.shape, self.maindim + + # Get different info we need for the main computation loop + ( + i_nrows, + slice_pos, + start, + stop, + step, + nrowsinbuf, + out, + o_maindim, + o_start, + o_stop, + o_step, + ) = self._get_info(shape, maindim) + + if i_nrows == 0: + # No elements to compute + if start >= stop and self.start is not None: + return out + else: + return self._single_row_out + + # Create a key that selects every element in inputs and output + # (including the main dimension) + i_slices = [slice(None)] * (maindim + 1) + o_slices = [slice(None)] * (o_maindim + 1) + + # This is a hack to prevent doing unnecessary flavor conversions + # while reading buffers + for val in values: + if hasattr(val, "maindim"): + val._v_convert = False + + # Start the computation itself + for start2 in range(start, stop, step * nrowsinbuf): + stop2 = start2 + step * nrowsinbuf + if stop2 > stop: + stop2 = stop + # Set the proper slice for inputs + i_slices[maindim] = slice(start2, stop2, step) + # Get the input values + vals = [] + for i, val in enumerate(values): + if i in slice_pos: + vals.append(val.__getitem__(tuple(i_slices))) + else: + # A read of values is not apparently needed, as PyTables + # leaves seems to work just fine inside Numexpr + vals.append(val) + # Do the actual computation for this slice + rout = self._compiled_expr(*vals) + # Set the values into the out buffer + if self.append_mode: + out.append(rout) + else: + # Compute the slice to be filled in output + start3 = o_start + (start2 - start) // step + stop3 = start3 + nrowsinbuf * o_step + if stop3 > o_stop: + stop3 = o_stop + o_slices[o_maindim] = slice(start3, stop3, o_step) + # Set the slice + out[tuple(o_slices)] = rout + + # Activate the conversion again (default) + for val in values: + if hasattr(val, "maindim"): + val._v_convert = True + + return out + + def __iter__(self) -> Iterator[Row]: + """Iterate over the rows of the outcome of the expression. + + This iterator always returns rows as NumPy objects, so a possible out + container specified in :meth:`Expr.set_output` method is ignored here. + + """ + values, shape, maindim = self.values, self.shape, self.maindim + + # Get different info we need for the main computation loop + i_nrows, slice_pos, start, stop, step, nrowsinbuf = self._get_info( + shape, maindim, itermode=True + ) + + if i_nrows == 0: + # No elements to compute + return + + # Create a key that selects every element in inputs + # (including the main dimension) + i_slices = [slice(None)] * (maindim + 1) + + # This is a hack to prevent doing unnecessary flavor conversions + # while reading buffers + for val in values: + if hasattr(val, "maindim"): + val._v_convert = False + + # Start the computation itself + for start2 in range(start, stop, step * nrowsinbuf): + stop2 = start2 + step * nrowsinbuf + if stop2 > stop: + stop2 = stop + # Set the proper slice in the main dimension + i_slices[maindim] = slice(start2, stop2, step) + # Get the values for computing the buffer + vals = [] + for i, val in enumerate(values): + if i in slice_pos: + vals.append(val.__getitem__(tuple(i_slices))) + else: + # A read of values is not apparently needed, as PyTables + # leaves seems to work just fine inside Numexpr + vals.append(val) + # Do the actual computation + rout = self._compiled_expr(*vals) + # Return one row per call + yield from rout + + # Activate the conversion again (default) + for val in values: + if hasattr(val, "maindim"): + val._v_convert = True + + +if __name__ == "__main__": + + # shape = (10000,10000) + shape = (10, 10_000) + + f = tb.open_file("/tmp/expression.h5", "w") + + # Create some arrays + a = f.create_carray(f.root, "a", atom=tb.Float32Atom(dflt=1), shape=shape) + b = f.create_carray(f.root, "b", atom=tb.Float32Atom(dflt=2), shape=shape) + c = f.create_carray(f.root, "c", atom=tb.Float32Atom(dflt=3), shape=shape) + out = f.create_carray( + f.root, "out", atom=tb.Float32Atom(dflt=3), shape=shape + ) + + expr = Expr("a * b + c") + expr.set_output(out) + d = expr.eval() + + print("returned-->", repr(d)) + # print(`d[:]`) + + f.close() diff --git a/venv/Lib/site-packages/tables/file.py b/venv/Lib/site-packages/tables/file.py new file mode 100644 index 0000000..021139d --- /dev/null +++ b/venv/Lib/site-packages/tables/file.py @@ -0,0 +1,3037 @@ +"""Create PyTables files and the object tree. + +This module support importing generic HDF5 files, on top of which +PyTables files are created, read or extended. If a file exists, an +object tree mirroring their hierarchical structure is created in memory. +File class offer methods to traverse the tree, as well as to create new +nodes. + +""" + +from __future__ import annotations + +import os +import atexit +import weakref +import datetime +import warnings +from typing import Any, Literal +from pathlib import Path +from collections import defaultdict +from collections.abc import Callable, Generator, Iterator + +import numpy as np +import numexpr as ne +import numpy.typing as npt + +from . import ( + hdf5extension, + utilsextension, + parameters, + undoredo, + linkextension, + lrucacheextension, +) +from .atom import Atom +from .leaf import Leaf +from .link import SoftLink, ExternalLink +from .node import Node, NotLoggedMixin +from .path import join_path, split_path +from .array import Array +from .group import Group, RootGroup, TransactionGroupG, TransactionG, MarkG +from .table import Table +from .utils import detect_number_of_cores +from .carray import CArray +from .earray import EArray +from .flavor import flavor_of, array_as_internal +from .filters import Filters +from .vlarray import VLArray +from .registry import get_class_by_name +from .exceptions import ( + ClosedFileError, + FileModeError, + NodeError, + NoSuchNodeError, + UnclosedFileWarning, + UndoRedoError, + ClosedNodeError, + PerformanceWarning, +) +from .description import ( + Description, + IsDescription, + UInt8Col, + StringCol, + descr_from_dtype, + dtype_from_descr, +) + +# format_version = "1.0" # Initial format +# format_version = "1.1" # Changes in ucl compression +# format_version = "1.2" # Support for enlargeable arrays and VLA's +# # 1.2 was introduced in PyTables 0.8 +# format_version = "1.3" # Support for indexes in Tables +# # 1.3 was introduced in PyTables 0.9 +# format_version = "1.4" # Support for multidimensional attributes +# # 1.4 was introduced in PyTables 1.1 +# format_version = "1.5" # Support for persistent defaults in tables +# # 1.5 was introduced in PyTables 1.2 +# format_version = "1.6" # Support for NumPy objects and new flavors for +# # objects. +# # 1.6 was introduced in pytables 1.3 +# format_version = "2.0" # Pickles are not used anymore in system attrs +# # 2.0 was introduced in PyTables 2.0 +format_version = "2.1" # Numeric and numarray flavors are gone. + +compatible_formats = [] # Old format versions we can read +# # Empty means that we support all the old formats + + +class _FileRegistry: + def __init__(self) -> None: + self._name_mapping: dict[str, set[File]] = defaultdict(set) + self._handlers: set[File] = set() + + @property + def filenames(self) -> list[str]: + return list(self._name_mapping) + + @property + def handlers(self) -> set[File]: + # return set(self._handlers) # return a copy + return self._handlers + + def __len__(self) -> int: + return len(self._handlers) + + def __contains__(self, filename: str) -> bool: + return filename in self.filenames + + def add(self, handler: File) -> None: + self._name_mapping[handler.filename].add(handler) + self._handlers.add(handler) + + def remove(self, handler: File) -> None: + filename = handler.filename + self._name_mapping[filename].remove(handler) + # remove enpty keys + if not self._name_mapping[filename]: + del self._name_mapping[filename] + self._handlers.remove(handler) + + def get_handlers_by_name(self, filename: str) -> set[File]: + # return set(self._name_mapping[filename]) # return a copy + return self._name_mapping[filename] + + def close_all(self) -> None: + handlers = list(self._handlers) # make a copy + for fileh in handlers: + msg = f"Closing remaining open file: {fileh.filename}" + warnings.warn(UnclosedFileWarning(msg)) + fileh.close() + + +# Dict of opened files (keys are filenames and values filehandlers) +_open_files = _FileRegistry() + +# Opcodes for do-undo actions +_op_to_code = { + "MARK": 0, + "CREATE": 1, + "REMOVE": 2, + "MOVE": 3, + "ADDATTR": 4, + "DELATTR": 5, +} + +_code_to_op = ["MARK", "CREATE", "REMOVE", "MOVE", "ADDATTR", "DELATTR"] + + +# Paths and names for hidden nodes related with transactions. +_trans_version = "1.0" + +_trans_group_parent = "/" +_trans_group_name = "_p_transactions" +_trans_group_path = join_path(_trans_group_parent, _trans_group_name) + +_action_log_parent = _trans_group_path +_action_log_name = "actionlog" +_action_log_path = join_path(_action_log_parent, _action_log_name) + +_trans_parent = _trans_group_path +_trans_name = "t%d" # %d -> transaction number +_trans_path = join_path(_trans_parent, _trans_name) + +_mark_parent = _trans_path +_mark_name = "m%d" # %d -> mark number +_mark_path = join_path(_mark_parent, _mark_name) + +_shadow_parent = _mark_path +_shadow_name = "a%d" # %d -> action number +_shadow_path = join_path(_shadow_parent, _shadow_name) + + +def _checkfilters(filters: Filters) -> None: + if not (filters is None or isinstance(filters, Filters)): + raise TypeError( + f"filter parameter has to be None or a Filter " + f"instance and the passed type is: '{type(filters)}'" + ) + + +def copy_file( + srcfilename: str, dstfilename: str, overwrite: bool = False, **kwargs +) -> None: + """Copy the content of a PyTables file to another. + + This function allows you to copy an existing PyTables file named + srcfilename to another file called dstfilename. The source file + must exist and be readable. The destination file can be + overwritten in place if existing by asserting the overwrite + argument. + + This function is a shorthand for the :meth:`File.copy_file` method, + which acts on an already opened file. kwargs takes keyword + arguments used to customize the copying process. See the + documentation of :meth:`File.copy_file` for a description of those + arguments. + + """ + # Open the source file. + srcfileh = open_file(srcfilename, mode="r") + + try: + # Copy it to the destination file. + srcfileh.copy_file(dstfilename, overwrite=overwrite, **kwargs) + finally: + # Close the source file. + srcfileh.close() + + +hdf5_version_str = utilsextension.get_hdf5_version() +hdf5_version_tup = tuple(map(int, hdf5_version_str.split("-")[0].split("."))) +_FILE_OPEN_POLICY = "strict" if hdf5_version_tup < (1, 8, 7) else "default" + + +def open_file( + filename: str, + mode: Literal["r", "w", "a", "r+"] = "r", + title: str = "", + root_uep: str = "/", + filters: Filters | None = None, + **kwargs, +) -> File: + """Open a PyTables (or generic HDF5) file and return a File object. + + Parameters + ---------- + filename : str + The name of the file (supports environment variable expansion). + It is suggested that file names have any of the .h5, .hdf or + .hdf5 extensions, although this is not mandatory. + mode : str + The mode to open the file. It can be one of the + following: + + * *'r'*: Read-only; no data can be modified. + * *'w'*: Write; a new file is created (an existing file + with the same name would be deleted). + * *'a'*: Append; an existing file is opened for reading and + writing, and if the file does not exist it is created. + * *'r+'*: It is similar to 'a', but the file must already + exist. + + title : str + If the file is to be created, a TITLE string attribute will be + set on the root group with the given value. Otherwise, the + title will be read from disk, and this will not have any effect. + root_uep : str + The root User Entry Point. This is a group in the HDF5 hierarchy + which will be taken as the starting point to create the object + tree. It can be whatever existing group in the file, named by + its HDF5 path. If it does not exist, an HDF5ExtError is issued. + Use this if you do not want to build the *entire* object tree, + but rather only a *subtree* of it. + + .. versionchanged:: 3.0 + The *rootUEP* parameter has been renamed into *root_uep*. + + filters : Filters + An instance of the Filters (see :ref:`FiltersClassDescr`) class + that provides information about the desired I/O filters + applicable to the leaves that hang directly from the *root group*, + unless other filter properties are specified for these leaves. + Besides, if you do not specify filter properties for child groups, + they will inherit these ones, which will in turn propagate to + child nodes. + + Notes + ----- + In addition, it recognizes the (lowercase) names of parameters + present in :file:`tables/parameters.py` as additional keyword + arguments. + See :ref:`parameter_files` for a detailed info on the supported + parameters. + + .. note:: + + If you need to deal with a large number of nodes in an + efficient way, please see :ref:`LRUOptim` for more info and + advices about the integrated node cache engine. + + """ + filename = os.fspath(filename) + # XXX filename normalization ?? + + # Check already opened files + if _FILE_OPEN_POLICY == "strict": + # This policy does not allow to open the same file multiple times + # even in read-only mode + if filename in _open_files: + raise ValueError( + "The file '%s' is already opened. " + "Please close it before reopening. " + "HDF5 v.%s, FILE_OPEN_POLICY = '%s'" + % ( + filename, + utilsextension.get_hdf5_version(), + _FILE_OPEN_POLICY, + ) + ) + else: + for filehandle in _open_files.get_handlers_by_name(filename): + omode = filehandle.mode + # 'r' is incompatible with everything except 'r' itself + if mode == "r" and omode != "r": + raise ValueError( + "The file '%s' is already opened, but " + "not in read-only mode (as requested)." % filename + ) + # 'a' and 'r+' are compatible with everything except 'r' + elif mode in ("a", "r+") and omode == "r": + raise ValueError( + "The file '%s' is already opened, but " + "in read-only mode. Please close it before " + "reopening in append mode." % filename + ) + # 'w' means that we want to destroy existing contents + elif mode == "w": + raise ValueError( + "The file '%s' is already opened. Please " + "close it before reopening in write mode." % filename + ) + + # Finally, create the File instance, and return it + return File(filename, mode, title, root_uep, filters, **kwargs) + + +# A dumb class that doesn't keep anything at all +class _NoCache: + def __len__(self) -> int: + return 0 + + def __contains__(self, key: Any) -> bool: + return False + + def __iter__(self) -> Iterator: + return iter([]) + + def __setitem__(self, key: Any, value: Any) -> None: + pass + + __marker = object() + + def pop(self, key, d=__marker): + if d is not self.__marker: + return d + raise KeyError(key) + + +class _DictCache(dict): + def __init__(self, nslots: int) -> None: + if nslots < 1: + raise ValueError("Invalid number of slots: %d" % nslots) + self.nslots = nslots + super().__init__() + + def __setitem__(self, key: Any, value: Any) -> None: + # Check if we are running out of space + if len(self) > self.nslots: + warnings.warn( + "the dictionary of node cache is exceeding the recommended " + "maximum number (%d); be ready to see PyTables asking for " + "*lots* of memory and possibly slow I/O." % (self.nslots), + PerformanceWarning, + ) + super().__setitem__(key, value) + + +class NodeManager: + """Node manager.""" + + def __init__(self, nslots: int = 64, node_factory=None) -> None: + super().__init__() + + self.registry = weakref.WeakValueDictionary() + + if nslots > 0: + cache = lrucacheextension.NodeCache(nslots) + elif nslots == 0: + cache = _NoCache() + else: + # nslots < 0 + cache = _DictCache(-nslots) + + self.cache = cache + + # node_factory(node_path) + self.node_factory = node_factory + + def register_node(self, node: Node, key: str | None) -> None: + """Register a node.""" + if key is None: + key = node._v_pathname + + if key in self.registry: + if not self.registry[key]._v_isopen: + del self.registry[key] + self.registry[key] = node + elif self.registry[key] is not node: + raise RuntimeError( + "trying to register a node with an " + "existing key: ``%s``" % key + ) + else: + self.registry[key] = node + + def cache_node(self, node: Node, key: str | None = None) -> None: + """Create a node.""" + if key is None: + key = node._v_pathname + + self.register_node(node, key) + if key in self.cache: + oldnode = self.cache.pop(key) + if oldnode is not node and oldnode._v_isopen: + raise RuntimeError( + "trying to cache a node with an " + "existing key: ``%s``" % key + ) + + self.cache[key] = node + + def get_node(self, key: str) -> Node: + """Return a node matching the input key.""" + node = self.cache.pop(key, None) + if node is not None: + if node._v_isopen: + self.cache_node(node, key) + return node + else: + # this should not happen + warnings.warn("a closed node found in the cache: ``%s``" % key) + + if key in self.registry: + node = self.registry[key] + if node is None: + # this should not happen since WeakValueDictionary drops all + # dead weakrefs + warnings.warn( + "None is stored in the registry for key: " "``%s``" % key + ) + elif node._v_isopen: + self.cache_node(node, key) + return node + else: + # this should not happen + warnings.warn( + "a closed node found in the registry: " "``%s``" % key + ) + del self.registry[key] + node = None + + if self.node_factory: + node = self.node_factory(key) + self.cache_node(node, key) + + return node + + def rename_node(self, oldkey: str, newkey: str) -> None: + """Rename a node.""" + for cache in (self.cache, self.registry): + if oldkey in cache: + node = cache.pop(oldkey) + cache[newkey] = node + + def drop_from_cache(self, nodepath: str) -> None: + """Remove the node from cache.""" + # Remove the node from the cache. + self.cache.pop(nodepath, None) + + def drop_node(self, node: Node, check_unregistered: bool = True) -> None: + """Drop the `node`. + + Remove the node from the cache and, if it has no more references, + close it. + + """ + # Remove all references to the node. + nodepath = node._v_pathname + + self.drop_from_cache(nodepath) + + if nodepath in self.registry: + if not node._v_isopen: + del self.registry[nodepath] + elif check_unregistered: + # If the node is not in the registry (this should never happen) + # we close it forcibly since it is not ensured that the __del__ + # method is called for object that are still alive when the + # interpreter is shut down + if node._v_isopen: + warnings.warn( + "dropping a node that is not in the registry: " + "``%s``" % nodepath + ) + + node._g_pre_kill_hook() + node._f_close() + + def flush_nodes(self) -> None: + """Flush all nodes.""" + # Only iter on the nodes in the registry since nodes in the cache + # should always have an entry in the registry + closed_keys = [] + for path, node in list(self.registry.items()): + if not node._v_isopen: + closed_keys.append(path) + elif "/_i_" not in path: # Indexes are not necessary to be flushed + if isinstance(node, Leaf): + node.flush() + + for path in closed_keys: + # self.cache.pop(path, None) + if path in self.cache: + warnings.warn("closed node the cache: ``%s``" % path) + self.cache.pop(path, None) + self.registry.pop(path) + + @staticmethod + def _close_nodes( + nodepaths: list[str], get_node: Callable[[str], Group | Node] + ) -> None: + for nodepath in nodepaths: + try: + node = get_node(nodepath) + except KeyError: + pass + else: + if not node._v_isopen or node._v__deleting: + continue + + try: + # Avoid descendent nodes to also iterate over + # their descendents, which are already to be + # closed by this loop. + if hasattr(node, "_f_get_child"): + node._g_close() + else: + node._f_close() + del node + except ClosedNodeError: + # import traceback + # type_, value, tb = sys.exc_info() + # exception_dump = ''.join( + # traceback.format_exception(type_, value, tb)) + # warnings.warn( + # "A '%s' exception occurred trying to close a node " + # "that was supposed to be open.\n" + # "%s" % (type_.__name__, exception_dump)) + pass + + def close_subtree(self, prefix: str = "/") -> None: + """Close a sub-tree of nodes.""" + if not prefix.endswith("/"): + prefix = prefix + "/" + + cache = self.cache + registry = self.registry + + # Ensure tables are closed before their indices + paths = [ + path + for path in cache + if path.startswith(prefix) and "/_i_" not in path + ] + self._close_nodes(paths, cache.pop) + + # Close everything else (i.e. indices) + paths = [path for path in cache if path.startswith(prefix)] + self._close_nodes(paths, cache.pop) + + # Ensure tables are closed before their indices + paths = [ + path + for path in registry + if path.startswith(prefix) and "/_i_" not in path + ] + self._close_nodes(paths, registry.pop) + + # Close everything else (i.e. indices) + paths = [path for path in registry if path.startswith(prefix)] + self._close_nodes(paths, registry.pop) + + def shutdown(self) -> None: + """Shutdown the node manager.""" + registry = self.registry + cache = self.cache + + # self.close_subtree('/') + + keys = list(cache) # copy + for key in keys: + node = cache.pop(key) + if node._v_isopen: + registry.pop(node._v_pathname, None) + node._f_close() + + while registry: + key, node = registry.popitem() + if node._v_isopen: + node._f_close() + + +class File(hdf5extension.File): + """The in-memory representation of a PyTables file. + + An instance of this class is returned when a PyTables file is + opened with the :func:`tables.open_file` function. It offers methods + to manipulate (create, rename, delete...) nodes and handle their + attributes, as well as methods to traverse the object tree. + The *user entry point* to the object tree attached to the HDF5 file + is represented in the root_uep attribute. + Other attributes are available. + + File objects support an *Undo/Redo mechanism* which can be enabled + with the :meth:`File.enable_undo` method. Once the Undo/Redo + mechanism is enabled, explicit *marks* (with an optional unique + name) can be set on the state of the database using the + :meth:`File.mark` + method. There are two implicit marks which are always available: + the initial mark (0) and the final mark (-1). Both the identifier + of a mark and its name can be used in *undo* and *redo* operations. + + Hierarchy manipulation operations (node creation, movement and + removal) and attribute handling operations (setting and deleting) + made after a mark can be undone by using the :meth:`File.undo` + method, which returns the database to the state of a past mark. + If undo() is not followed by operations that modify the hierarchy + or attributes, the :meth:`File.redo` method can be used to return + the database to the state of a future mark. Else, future states of + the database are forgotten. + + Note that data handling operations can not be undone nor redone by + now. Also, hierarchy manipulation operations on nodes that do not + support the Undo/Redo mechanism issue an UndoRedoWarning *before* + changing the database. + + The Undo/Redo mechanism is persistent between sessions and can + only be disabled by calling the :meth:`File.disable_undo` method. + + File objects can also act as context managers when using the with + statement introduced in Python 2.5. When exiting a context, the + file is automatically closed. + + Parameters + ---------- + filename : str + The name of the file (supports environment variable expansion). + It is suggested that file names have any of the .h5, .hdf or + .hdf5 extensions, although this is not mandatory. + + mode : str + The mode to open the file. It can be one of the + following: + + * *'r'*: Read-only; no data can be modified. + * *'w'*: Write; a new file is created (an existing file + with the same name would be deleted). + * *'a'*: Append; an existing file is opened for reading + and writing, and if the file does not exist it is created. + * *'r+'*: It is similar to 'a', but the file must already + exist. + + title : str + If the file is to be created, a TITLE string attribute will be + set on the root group with the given value. Otherwise, the + title will be read from disk, and this will not have any effect. + + root_uep : str + The root User Entry Point. This is a group in the HDF5 hierarchy + which will be taken as the starting point to create the object + tree. It can be whatever existing group in the file, named by + its HDF5 path. If it does not exist, an HDF5ExtError is issued. + Use this if you do not want to build the *entire* object tree, + but rather only a *subtree* of it. + + .. versionchanged:: 3.0 + The *rootUEP* parameter has been renamed into *root_uep*. + + filters : Filters + An instance of the Filters (see :ref:`FiltersClassDescr`) class that + provides information about the desired I/O filters applicable to the + leaves that hang directly from the *root group*, unless other filter + properties are specified for these leaves. Besides, if you do not + specify filter properties for child groups, they will inherit these + ones, which will in turn propagate to child nodes. + + Notes + ----- + In addition, it recognizes the (lowercase) names of parameters + present in :file:`tables/parameters.py` as additional keyword + arguments. + See :ref:`parameter_files` for a detailed info on the supported + parameters. + + + .. rubric:: File attributes + + .. attribute:: filename + + The name of the opened file. + + .. attribute:: format_version + + The PyTables version number of this file. + + .. attribute:: isopen + + True if the underlying file is open, false otherwise. + + .. attribute:: mode + + The mode in which the file was opened. + + .. attribute:: root + + The *root* of the object tree hierarchy (a Group instance). + + .. attribute:: root_uep + + The UEP (user entry point) group name in the file (see + the :func:`open_file` function). + + .. versionchanged:: 3.0 + The *rootUEP* attribute has been renamed into *root_uep*. + + """ + + # The top level kinds. Group must go first! + _node_kinds = ("Group", "Leaf", "Link", "Unknown") + + @property + def title(self) -> str: + """Title of the root group in the file.""" + return self.root._v_title + + @title.setter + def title(self, title: str) -> None: + self.root._v_title = title + + @title.deleter + def title(self) -> None: + del self.root._v_title + + @property + def filters(self) -> Filters: + """Filter properties for the root group. + + See :ref:`FiltersClassDescr`). + """ + return self.root._v_filters + + @filters.setter + def filters(self, filters: Filters) -> None: + self.root._v_filters = filters + + @filters.deleter + def filters(self) -> None: + del self.root._v_filters + + def __init__( + self, + filename: str, + mode: Literal["r", "w", "a", "r+"] = "r", + title: str = "", + root_uep: str = "/", + filters: Filters | None = None, + **kwargs, + ) -> None: + + self.filename = os.fspath(filename) + """The name of the opened file.""" + + self.mode = mode + """The mode in which the file was opened.""" + + if mode not in ("r", "r+", "a", "w"): + raise ValueError( + "invalid mode string ``%s``. Allowed modes are: " + "'r', 'r+', 'a' and 'w'" % mode + ) + + # Get all the parameters in parameter file(s) + params = { + k: v + for k, v in parameters.__dict__.items() + if k.isupper() and not k.startswith("_") + } + # Update them with possible keyword arguments + if [k for k in kwargs if k.isupper()]: + warnings.warn( + "The use of uppercase keyword parameters is " "deprecated", + DeprecationWarning, + ) + + kwargs = {k.upper(): v for k, v in kwargs.items()} + params.update(kwargs) + + # If MAX_ * _THREADS is not set yet, set it to the number of cores + # on this machine. + + if params["MAX_NUMEXPR_THREADS"] is None: + params["MAX_NUMEXPR_THREADS"] = detect_number_of_cores() + + if params["MAX_BLOSC_THREADS"] is None: + params["MAX_BLOSC_THREADS"] = detect_number_of_cores() + + self.params = params + + # Now, it is time to initialize the File extension + self._g_new(filename, mode, **params) + + # Check filters and set PyTables format version for new files. + new = self._v_new + if new: + _checkfilters(filters) + self.format_version = format_version + """The PyTables version number of this file.""" + + # The node manager must be initialized before the root group + # initialization but the node_factory attribute is set onl later + # because it is a bound method of the root grop itself. + node_cache_slots = params["NODE_CACHE_SLOTS"] + self._node_manager = NodeManager(nslots=node_cache_slots) + + # For the moment Undo/Redo is not enabled. + self._undoEnabled = False + + # Set the flag to indicate that the file has been opened. + # It must be set before opening the root group + # to allow some basic access to its attributes. + self.isopen = 1 + """True if the underlying file os open, False otherwise.""" + + # Append the name of the file to the global dict of files opened. + _open_files.add(self) + + # Set the number of times this file has been opened to 1 + self._open_count = 1 + + # Get the root group from this file + self.root = root = self.__get_root_group(root_uep, title, filters) + """The *root* of the object tree hierarchy (a Group instance).""" + # Complete the creation of the root node + # (see the explanation in ``RootGroup.__init__()``). + root._g_post_init_hook() + self._node_manager.node_factory = self.root._g_load_child + + # Save the PyTables format version for this file. + if new: + if params["PYTABLES_SYS_ATTRS"]: + root._v_attrs._g__setattr( + "PYTABLES_FORMAT_VERSION", format_version + ) + + # If the file is old, and not opened in "read-only" mode, + # check if it has a transaction log + if not new and self.mode != "r" and _trans_group_path in self: + # It does. Enable the undo. + self.enable_undo() + + # Set the maximum number of threads for Numexpr + ne.set_vml_num_threads(params["MAX_NUMEXPR_THREADS"]) + + def __get_root_group( + self, root_uep: str | None, title: str, filters: Filters + ) -> RootGroup: + """Return a Group instance which can act as the root group in a tree. + + If file is opened in "r", "r+" or "a" mode, and the file already + exists, this method dynamically builds a python object tree + emulating the structure present on file. + + """ + self._v_objectid = self._get_file_id() + + if root_uep in [None, ""]: + root_uep = "/" + # Save the User Entry Point in a variable class + self.root_uep = root_uep + + new = self._v_new + + # Get format version *before* getting the object tree + if not new: + # Firstly, get the PyTables format version for this file + self.format_version = utilsextension.read_f_attr( + self._v_objectid, "PYTABLES_FORMAT_VERSION" + ) + if not self.format_version: + # PYTABLES_FORMAT_VERSION attribute is not present + self.format_version = "unknown" + self._isPTFile = False + elif not isinstance(self.format_version, str): + # system attributes should always be str + self.format_version = self.format_version.decode("utf-8") + + # Create new attributes for the root Group instance and + # create the object tree + return RootGroup(self, root_uep, title=title, new=new, filters=filters) + + def _get_or_create_path( + self, path: Node | str, create: bool + ) -> Group | Node | RootGroup: + """Get the given `path` or create it if `create` is true. + + If `create` is true, `path` *must* be a string path and not a + node, otherwise a `TypeError`will be raised. + + """ + if create: + path = path._v_pathname if hasattr(path, "_v_pathname") else path + return self._create_path(path) + else: + return self.get_node(path) + + def _create_path(self, path: str) -> Group: + """Create the groups needed for the `path` to exist. + + The group associated with the given `path` is returned. + + """ + if not hasattr(path, "split"): + raise TypeError("when creating parents, parent must be a path") + + if path == "/": + return self.root + + parent, create_group = self.root, self.create_group + for pcomp in path.split("/")[1:]: + try: + child = parent._f_get_child(pcomp) + except NoSuchNodeError: + child = create_group(parent, pcomp) + parent = child + return parent + + def create_group( + self, + where: Group | str, + name: str, + title: str = "", + filters: Filters | None = None, + createparents: bool = False, + ) -> Group: + """Create a new group. + + Parameters + ---------- + where : str or Group + The parent group from which the new group will hang. It can be a + path string (for example '/level1/leaf5'), or a Group instance + (see :ref:`GroupClassDescr`). + name : str + The name of the new group. + title : str, optional + A description for this node (it sets the TITLE HDF5 attribute on + disk). + filters : Filters + An instance of the Filters class (see :ref:`FiltersClassDescr`) + that provides information about the desired I/O filters applicable + to the leaves that hang directly from this new group (unless other + filter properties are specified for these leaves). Besides, if you + do not specify filter properties for its child groups, they will + inherit these ones. + createparents : bool + Whether to create the needed groups for the parent + path to exist (not done by default). + + See Also + -------- + Group : for more information on groups + + """ + parentnode = self._get_or_create_path(where, createparents) + _checkfilters(filters) + return Group(parentnode, name, title=title, new=True, filters=filters) + + def create_table( + self, + where: Group | str, + name: str, + description: ( + dict | type[IsDescription] | Description | npt.DTypeLike | None + ) = None, + title: str = "", + filters: Filters | None = None, + expectedrows: int = 10_000, + chunkshape: int | tuple[int] | None = None, + byteorder: str | None = None, + createparents: bool = False, + obj: np.ndarray | None = None, + track_times: bool = True, + ) -> Table: + """Create a new table with the given name in where location. + + Parameters + ---------- + where : str or Group + The parent group from which the new table will hang. It can be a + path string (for example '/level1/leaf5'), or a Group instance + (see :ref:`GroupClassDescr`). + name : str + The name of the new table. + description : Description + This is an object that describes the table, i.e. how + many columns it has, their names, types, shapes, etc. It + can be any of the following: + + * *A user-defined class*: This should inherit from the + IsDescription class (see :ref:`IsDescriptionClassDescr`) + where table fields are specified. + * *A dictionary*: For example, when you do not know + beforehand which structure your table will have). + * *A Description instance*: You can use the description + attribute of another table to create a new one with the + same structure. + * *A NumPy dtype*: A completely general structured NumPy + dtype. + * *A NumPy (structured) array instance*: The dtype of + this structured array will be used as the description. + Also, in case the array has actual data, it will be + injected into the newly created table. + + .. versionchanged:: 3.0 + The *description* parameter can be None (default) if *obj* is + provided. In that case the structure of the table is deduced + by *obj*. + + title : str + A description for this node (it sets the TITLE HDF5 attribute + on disk). + filters : Filters + An instance of the Filters class (see :ref:`FiltersClassDescr`) + that provides information about the desired I/O filters to be + applied during the life of this object. + expectedrows : int + A user estimate of the number of records that will be in the table. + If not provided, the default value is EXPECTED_ROWS_TABLE (see + :file:`tables/parameters.py`). If you plan to create a bigger + table try providing a guess; this will optimize the HDF5 B-Tree + creation and management process time and memory used. + chunkshape + The shape of the data chunk to be read or written in a + single HDF5 I/O operation. Filters are applied to those + chunks of data. The rank of the chunkshape for tables must + be 1. If None, a sensible value is calculated based on the + expectedrows parameter (which is recommended). + byteorder : str + The byteorder of data *on disk*, specified as 'little' or 'big'. + If this is not specified, the byteorder is that of the platform, + unless you passed an array as the description, in which case + its byteorder will be used. + createparents : bool + Whether to create the needed groups for the parent path to exist + (not done by default). + obj : python object + The recarray to be saved. Accepted types are NumPy record + arrays. + + The *obj* parameter is optional and it can be provided in + alternative to the *description* parameter. + If both *obj* and *description* are provided they must + be consistent with each other. + + .. versionadded:: 3.0 + + track_times + Whether time data associated with the leaf are recorded (object + access time, raw data modification time, metadata change time, + object birth time); default True. Semantics of these times + depend on their implementation in the HDF5 library: refer to + documentation of the H5O_info_t data structure. As of HDF5 + 1.8.15, only ctime (metadata change time) is implemented. + + .. versionadded:: 3.4.3 + + See Also + -------- + Table : for more information on tables + + """ + if obj is not None: + if not isinstance(obj, np.ndarray): + raise TypeError("invalid obj parameter %r" % obj) + + descr, _ = descr_from_dtype(obj.dtype, ptparams=self.params) + if ( + description is not None + and dtype_from_descr(description, ptparams=self.params) + != obj.dtype + ): + raise TypeError( + "the desctiption parameter is not consistent " + "with the data type of the obj parameter" + ) + elif description is None: + description = descr + + parentnode = self._get_or_create_path(where, createparents) + if description is None: + raise ValueError("invalid table description: None") + _checkfilters(filters) + + ptobj = Table( + parentnode, + name, + description=description, + title=title, + filters=filters, + expectedrows=expectedrows, + chunkshape=chunkshape, + byteorder=byteorder, + track_times=track_times, + ) + + if obj is not None: + ptobj.append(obj) + + return ptobj + + def create_array( + self, + where: Group | str, + name: str, + obj: np.ndarray | None = None, + title: str = "", + byteorder: str | None = None, + createparents: bool = False, + atom: Atom | None = None, + shape: tuple[int, ...] | None = None, + track_times: bool = True, + ) -> Array: + """Create a new array. + + Parameters + ---------- + where : str or Group + The parent group from which the new array will hang. It can be a + path string (for example '/level1/leaf5'), or a Group instance + (see :ref:`GroupClassDescr`). + name : str + The name of the new array + obj : python object + The array or scalar to be saved. Accepted types are NumPy + arrays and scalars, as well as native Python sequences and + scalars, provided that values are regular (i.e. they are + not like ``[[1,2],2]``) and homogeneous (i.e. all the + elements are of the same type). + + Also, objects that have some of their dimensions equal to 0 + are not supported (use an EArray node (see + :ref:`EArrayClassDescr`) if you want to store an array with + one of its dimensions equal to 0). + + .. versionchanged:: 3.0 + The *Object parameter has been renamed into *obj*.* + + title : str + A description for this node (it sets the TITLE HDF5 attribute on + disk). + byteorder : str + The byteorder of the data *on disk*, specified as 'little' or + 'big'. If this is not specified, the byteorder is that of the + given object. + createparents : bool, optional + Whether to create the needed groups for the parent path to exist + (not done by default). + atom : Atom + An Atom (see :ref:`AtomClassDescr`) instance representing + the *type* and *shape* of the atomic objects to be saved. + + .. versionadded:: 3.0 + + shape : tuple of ints + The shape of the stored array. + + .. versionadded:: 3.0 + + track_times + Whether time data associated with the leaf are recorded (object + access time, raw data modification time, metadata change time, + object birth time); default True. Semantics of these times + depend on their implementation in the HDF5 library: refer to + documentation of the H5O_info_t data structure. As of HDF5 + 1.8.15, only ctime (metadata change time) is implemented. + + .. versionadded:: 3.4.3 + + See Also + -------- + Array : for more information on arrays + create_table : for more information on the rest of parameters + + """ + if obj is None: + if atom is None or shape is None: + raise TypeError( + "if the obj parameter is not specified " + "(or None) then both the atom and shape " + "parametes should be provided." + ) + else: + # Making strides=(0,...) below is a trick to create the + # array fast and without memory consumption + dflt = np.zeros((), dtype=atom.dtype) + obj = np.ndarray( + shape, + dtype=atom.dtype, + buffer=dflt, + strides=(0,) * len(shape), + ) + else: + flavor = flavor_of(obj) + # use a temporary object because converting obj at this stage + # breaks some test. This is solution performs a double, + # potentially expensive, conversion of the obj parameter. + _obj = array_as_internal(obj, flavor) + + if shape is not None and shape != _obj.shape: + raise TypeError("the shape parameter do not match obj.shape") + + if atom is not None and atom.dtype != _obj.dtype: + raise TypeError( + "the atom parameter is not consistent with " + "the data type of the obj parameter" + ) + + parentnode = self._get_or_create_path(where, createparents) + return Array( + parentnode, + name, + obj=obj, + title=title, + byteorder=byteorder, + track_times=track_times, + ) + + def create_carray( + self, + where: Group | str, + name: str, + atom: Atom | None = None, + shape: tuple[int, ...] | None = None, + title: str = "", + filters: Filters | None = None, + chunkshape: int | tuple[int, ...] | None = None, + byteorder: str | None = None, + createparents: bool = False, + obj: np.ndarray | None = None, + track_times: bool = True, + ) -> CArray: + """Create a new chunked array. + + Parameters + ---------- + where : str or Group + The parent group from which the new array will hang. It can + be a path string (for example '/level1/leaf5'), or a Group + instance (see :ref:`GroupClassDescr`). + name : str + The name of the new array + atom : Atom + An Atom (see :ref:`AtomClassDescr`) instance representing + the *type* and *shape* of the atomic objects to be saved. + + .. versionchanged:: 3.0 + The *atom* parameter can be None (default) if *obj* is + provided. + + shape : tuple + The shape of the new array. + + .. versionchanged:: 3.0 + The *shape* parameter can be None (default) if *obj* is + provided. + + title : str, optional + A description for this node (it sets the TITLE HDF5 attribute + on disk). + filters : Filters, optional + An instance of the Filters class (see :ref:`FiltersClassDescr`) + that provides information about the desired I/O filters to + be applied during the life of this object. + chunkshape : tuple or number or None, optional + The shape of the data chunk to be read or written in a + single HDF5 I/O operation. Filters are applied to those + chunks of data. The dimensionality of chunkshape must be + the same as that of shape. If None, a sensible value is + calculated (which is recommended). + byteorder : str, optional + The byteorder of the data *on disk*, specified as 'little' + or 'big'. If this is not specified, the byteorder is that + of the given object. + createparents : bool, optional + Whether to create the needed groups for the parent path to + exist (not done by default). + obj : python object + The array or scalar to be saved. Accepted types are NumPy + arrays and scalars, as well as native Python sequences and + scalars, provided that values are regular (i.e. they are + not like ``[[1,2],2]``) and homogeneous (i.e. all the + elements are of the same type). + + Also, objects that have some of their dimensions equal to 0 + are not supported. Please use an EArray node (see + :ref:`EArrayClassDescr`) if you want to store an array with + one of its dimensions equal to 0. + + The *obj* parameter is optional and it can be provided in + alternative to the *atom* and *shape* parameters. + If both *obj* and *atom* and/or *shape* are provided they must + be consistent with each other. + + .. versionadded:: 3.0 + + track_times + Whether time data associated with the leaf are recorded (object + access time, raw data modification time, metadata change time, + object birth time); default True. Semantics of these times + depend on their implementation in the HDF5 library: refer to + documentation of the H5O_info_t data structure. As of HDF5 + 1.8.15, only ctime (metadata change time) is implemented. + + .. versionadded:: 3.4.3 + + See Also + -------- + CArray : for more information on chunked arrays + + """ + if obj is not None: + flavor = flavor_of(obj) + obj = array_as_internal(obj, flavor) + + if shape is not None and shape != obj.shape: + raise TypeError("the shape parameter do not match obj.shape") + else: + shape = obj.shape + + if atom is not None and atom.dtype != obj.dtype: + raise TypeError( + "the 'atom' parameter is not consistent with " + "the data type of the 'obj' parameter" + ) + elif atom is None: + atom = Atom.from_dtype(obj.dtype) + else: + if atom is None and shape is None: + raise TypeError( + "the 'atom' and 'shape' parameters or the 'obj' parameter " + "must be provided" + ) + + parentnode = self._get_or_create_path(where, createparents) + _checkfilters(filters) + ptobj = CArray( + parentnode, + name, + atom=atom, + shape=shape, + title=title, + filters=filters, + chunkshape=chunkshape, + byteorder=byteorder, + track_times=track_times, + ) + + if obj is not None: + ptobj[...] = obj + + return ptobj + + def create_earray( + self, + where: Group | str, + name: str, + atom: Atom | None = None, + shape: tuple[int, ...] | None = None, + title: str = "", + filters: Filters | None = None, + expectedrows: int = 1000, + chunkshape: int | tuple[int, ...] | None = None, + byteorder: str | None = None, + createparents: bool = False, + obj: np.ndarray | None = None, + track_times: bool = True, + ) -> EArray: + """Create a new enlargeable array. + + Parameters + ---------- + where : str or Group + The parent group from which the new array will hang. It can be a + path string (for example '/level1/leaf5'), or a Group instance + (see :ref:`GroupClassDescr`). + name : str + The name of the new array + atom : Atom + An Atom (see :ref:`AtomClassDescr`) instance representing the + *type* and *shape* of the atomic objects to be saved. + + .. versionchanged:: 3.0 + The *atom* parameter can be None (default) if *obj* is + provided. + + shape : tuple + The shape of the new array. One (and only one) of the shape + dimensions *must* be 0. The dimension being 0 means that the + resulting EArray object can be extended along it. Multiple + enlargeable dimensions are not supported right now. + + .. versionchanged:: 3.0 + The *shape* parameter can be None (default) if *obj* is + provided. + + title : str, optional + A description for this node (it sets the TITLE HDF5 attribute on + disk). + filters : Filters, optional + An instance of the Filters class (see :ref:`FiltersClassDescr`) + that provides information about the desired I/O filters to + be applied during the life of this object. + expectedrows : int, optional + A user estimate about the number of row elements that will be added + to the growable dimension in the EArray node. If not provided, the + default value is EXPECTED_ROWS_EARRAY (see tables/parameters.py). + If you plan to create either a much smaller or a much bigger array + try providing a guess; this will optimize the HDF5 B-Tree creation + and management process time and the amount of memory used. + chunkshape : tuple, numeric, or None, optional + The shape of the data chunk to be read or written in a single HDF5 + I/O operation. Filters are applied to those chunks of data. The + dimensionality of chunkshape must be the same as that of shape + (beware: no dimension should be 0 this time!). If None, a sensible + value is calculated based on the expectedrows parameter (which is + recommended). + byteorder : str, optional + The byteorder of the data *on disk*, specified as 'little' or + 'big'. If this is not specified, the byteorder is that of the + platform. + createparents : bool, optional + Whether to create the needed groups for the parent path to exist + (not done by default). + obj : python object + The array or scalar to be saved. Accepted types are NumPy + arrays and scalars, as well as native Python sequences and + scalars, provided that values are regular (i.e. they are + not like ``[[1,2],2]``) and homogeneous (i.e. all the + elements are of the same type). + + The *obj* parameter is optional and it can be provided in + alternative to the *atom* and *shape* parameters. + If both *obj* and *atom* and/or *shape* are provided they must + be consistent with each other. + + .. versionadded:: 3.0 + + track_times + Whether time data associated with the leaf are recorded (object + access time, raw data modification time, metadata change time, + object birth time); default True. Semantics of these times + depend on their implementation in the HDF5 library: refer to + documentation of the H5O_info_t data structure. As of HDF5 + 1.8.15, only ctime (metadata change time) is implemented. + + .. versionadded:: 3.4.3 + + See Also + -------- + EArray : for more information on enlargeable arrays + + """ + if obj is not None: + flavor = flavor_of(obj) + obj = array_as_internal(obj, flavor) + + earray_shape = (0,) + obj.shape[1:] + + if shape is not None and shape != earray_shape: + raise TypeError( + "the shape parameter is not compatible " "with obj.shape." + ) + else: + shape = earray_shape + + if atom is not None and atom.dtype != obj.dtype: + raise TypeError( + "the atom parameter is not consistent with " + "the data type of the obj parameter" + ) + elif atom is None: + atom = Atom.from_dtype(obj.dtype) + + parentnode = self._get_or_create_path(where, createparents) + _checkfilters(filters) + ptobj = EArray( + parentnode, + name, + atom=atom, + shape=shape, + title=title, + filters=filters, + expectedrows=expectedrows, + chunkshape=chunkshape, + byteorder=byteorder, + track_times=track_times, + ) + + if obj is not None: + ptobj.append(obj) + + return ptobj + + def create_vlarray( + self, + where: Group | str, + name: str, + atom: Atom | None = None, + title: str = "", + filters: Filters | None = None, + expectedrows: int | None = None, + chunkshape: int | tuple[int, ...] | None = None, + byteorder: str | None = None, + createparents: bool = False, + obj: np.ndarray | None = None, + track_times: bool = True, + ) -> VLArray: + """Create a new variable-length array. + + Parameters + ---------- + where : str or Group + The parent group from which the new array will hang. It can + be a path string (for example '/level1/leaf5'), or a Group + instance (see :ref:`GroupClassDescr`). + name : str + The name of the new array + atom : Atom + An Atom (see :ref:`AtomClassDescr`) instance representing + the *type* and *shape* of the atomic objects to be saved. + + .. versionchanged:: 3.0 + The *atom* parameter can be None (default) if *obj* is + provided. + + title : str, optional + A description for this node (it sets the TITLE HDF5 attribute + on disk). + filters : Filters + An instance of the Filters class (see :ref:`FiltersClassDescr`) + that provides information about the desired I/O filters to + be applied during the life of this object. + expectedrows : int, optional + A user estimate about the number of row elements that will + be added to the growable dimension in the `VLArray` node. + If not provided, the default value is ``EXPECTED_ROWS_VLARRAY`` + (see ``tables/parameters.py``). If you plan to create either + a much smaller or a much bigger `VLArray` try providing a guess; + this will optimize the HDF5 B-Tree creation and management + process time and the amount of memory used. + + .. versionadded:: 3.0 + + chunkshape : int or tuple of int, optional + The shape of the data chunk to be read or written in a + single HDF5 I/O operation. Filters are applied to those + chunks of data. The dimensionality of chunkshape must be 1. + If None, a sensible value is calculated (which is recommended). + byteorder : str, optional + The byteorder of the data *on disk*, specified as 'little' or + 'big'. If this is not specified, the byteorder is that of the + platform. + createparents : bool, optional + Whether to create the needed groups for the parent path to + exist (not done by default). + obj : python object + The array or scalar to be saved. Accepted types are NumPy + arrays and scalars, as well as native Python sequences and + scalars, provided that values are regular (i.e. they are + not like ``[[1,2],2]``) and homogeneous (i.e. all the + elements are of the same type). + + The *obj* parameter is optional and it can be provided in + alternative to the *atom* parameter. + If both *obj* and *atom* and are provided they must + be consistent with each other. + + .. versionadded:: 3.0 + + track_times + Whether time data associated with the leaf are recorded (object + access time, raw data modification time, metadata change time, + object birth time); default True. Semantics of these times + depend on their implementation in the HDF5 library: refer to + documentation of the H5O_info_t data structure. As of HDF5 + 1.8.15, only ctime (metadata change time) is implemented. + + .. versionadded:: 3.4.3 + + See Also + -------- + VLArray : for more informationon variable-length arrays + + .. versionchanged:: 3.0 + The *expectedsizeinMB* parameter has been replaced by + *expectedrows*. + + """ + if obj is not None: + flavor = flavor_of(obj) + obj = array_as_internal(obj, flavor) + + if atom is not None and atom.dtype != obj.dtype: + raise TypeError( + "the atom parameter is not consistent with " + "the data type of the obj parameter" + ) + if atom is None: + atom = Atom.from_dtype(obj.dtype) + elif atom is None: + raise ValueError("atom parameter cannot be None") + + parentnode = self._get_or_create_path(where, createparents) + _checkfilters(filters) + ptobj = VLArray( + parentnode, + name, + atom=atom, + title=title, + filters=filters, + expectedrows=expectedrows, + chunkshape=chunkshape, + byteorder=byteorder, + track_times=track_times, + ) + + if obj is not None: + ptobj.append(obj) + + return ptobj + + def create_hard_link( + self, + where: Node | str, + name: str, + target: Node | str, + createparents: bool = False, + ) -> Group | Leaf: + """Create a hard link. + + Create a hard link to a `target` node with the given `name` in + `where` location. `target` can be a node object or a path + string. If `createparents` is true, the intermediate groups + required for reaching `where` are created (the default is not + doing so). + + The returned node is a regular `Group` or `Leaf` instance. + + """ + targetnode = self.get_node(target) + parentnode = self._get_or_create_path(where, createparents) + linkextension._g_create_hard_link(parentnode, name, targetnode) + # Refresh children names in link's parent node + parentnode._g_add_children_names() + # Return the target node + return self.get_node(parentnode, name) + + def create_soft_link( + self, + where: Node | str, + name: str, + target: Node | str, + createparents: bool = False, + ) -> SoftLink: + """Create a soft link (aka symbolic link) to a `target` node. + + Create a soft link (aka symbolic link) to a `target` nodewith + the given `name` in `where` location. `target` can be a node + object or a path string. If `createparents` is true, the + intermediate groups required for reaching `where` are created. + + (the default is not doing so). + + The returned node is a SoftLink instance. See the SoftLink + class (in :ref:`SoftLinkClassDescr`) for more information on + soft links. + + """ + if not isinstance(target, str): + if hasattr(target, "_v_pathname"): # quacks like a Node + target = target._v_pathname + else: + raise ValueError( + "`target` has to be a string or a node object" + ) + parentnode = self._get_or_create_path(where, createparents) + slink = SoftLink(parentnode, name, target) + # Refresh children names in link's parent node + parentnode._g_add_children_names() + return slink + + def create_external_link( + self, + where: Node | str, + name: str, + target: Node | str, + createparents: bool = False, + ) -> ExternalLink: + """Create an external link. + + Create an external link to a *target* node with the given *name* + in *where* location. *target* can be a node object in another + file or a path string in the form 'file:/path/to/node'. If + *createparents* is true, the intermediate groups required for + reaching *where* are created (the default is not doing so). + + The returned node is an :class:`ExternalLink` instance. + + """ + if not isinstance(target, str): + if hasattr(target, "_v_pathname"): # quacks like a Node + target = target._v_file.filename + ":" + target._v_pathname + else: + raise ValueError( + "`target` has to be a string or a node object" + ) + elif target.find(":/") == -1: + raise ValueError("`target` must expressed as 'file:/path/to/node'") + parentnode = self._get_or_create_path(where, createparents) + elink = ExternalLink(parentnode, name, target) + # Refresh children names in link's parent node + parentnode._g_add_children_names() + return elink + + def _get_node(self, nodepath: str) -> Node | RootGroup: + # The root node is always at hand. + if nodepath == "/": + return self.root + + node = self._node_manager.get_node(nodepath) + assert node is not None, "unable to instantiate node ``%s``" % nodepath + + return node + + def get_node( + self, + where: Node | str, + name: str | None = None, + classname: str | None = None, + ) -> Node: + """Get the node under where with the given name. + + Parameters + ---------- + where : str or Node + This can be a path string leading to a node or a Node instance (see + :ref:`NodeClassDescr`). If no name is specified, that node is + returned. + + .. note:: + + If where is a Node instance from a different file than the one + on which this function is called, the returned node will also + be from that other file. + + name : str, optional + If a name is specified, this must be a string with the name of + a node under where. In this case the where argument can only + lead to a Group (see :ref:`GroupClassDescr`) instance (else a + TypeError is raised). The node called name under the group + where is returned. + classname : str, optional + If the classname argument is specified, it must be the name of + a class derived from Node (e.g. Table). If the node is found but it + is not an instance of that class, a NoSuchNodeError is also raised. + + Notes + ----- + If the node to be returned does not exist, a NoSuchNodeError is + raised. Please note that hidden nodes are also considered. + + """ + self._check_open() + + if isinstance(where, Node): + where._g_check_open() + + basepath = where._v_pathname + nodepath = join_path(basepath, name or "") or "/" + node = where._v_file._get_node(nodepath) + elif isinstance(where, (str, np.str_)): + if not where.startswith("/"): + raise NameError("``where`` must start with a slash ('/')") + + basepath = where + nodepath = join_path(basepath, name or "") or "/" + node = self._get_node(nodepath) + else: + raise TypeError(f"``where`` must be a string or a node: {where!r}") + + # Finally, check whether the desired node is an instance + # of the expected class. + if classname: + class_ = get_class_by_name(classname) + if not isinstance(node, class_): + npathname = node._v_pathname + nclassname = node.__class__.__name__ + # This error message is right since it can never be shown + # for ``classname in [None, 'Node']``. + raise NoSuchNodeError( + "could not find a ``%s`` node at ``%s``; " + "instead, a ``%s`` node has been found there" + % (classname, npathname, nclassname) + ) + + return node + + def is_visible_node(self, path: str) -> bool: + """Return True if the node under `path` is visible. + + If the node does not exist, a NoSuchNodeError is raised. + + """ + # ``util.isvisiblepath()`` is still recommended for internal use. + return self.get_node(path)._f_isvisible() + + def rename_node( + self, + where: Node | str, + newname: str, + name: str | None = None, + overwrite: bool = False, + ) -> None: + """Change the name of the node specified by where and name to newname. + + Parameters + ---------- + where, name + These arguments work as in + :meth:`File.get_node`, referencing the node to be acted upon. + newname : str + The new name to be assigned to the node (a string). + overwrite : bool + Whether to recursively remove a node with the same + newname if it already exists (not done by default). + + """ + obj = self.get_node(where, name=name) + obj._f_rename(newname, overwrite) + + def move_node( + self, + where: Node | str, + newparent: Group | str | None = None, + newname: str | None = None, + name: str | None = None, + overwrite: bool = False, + createparents: bool = False, + ) -> None: + """Move the node specified by where and name to newparent/newname. + + Parameters + ---------- + where, name : path + These arguments work as in + :meth:`File.get_node`, referencing the node to be acted upon. + newparent + The destination group the node will be moved into (a + path name or a Group instance). If it is + not specified or None, the current parent + group is chosen as the new parent. + newname + The new name to be assigned to the node in its + destination (a string). If it is not specified or + None, the current name is chosen as the + new name. + overwrite : bool, optional + Whether to allow moving over another node, in which case that + node is recursively removed before moving (not done by default). + createparents : bool, optional + If True, any necessary parents of newparent will be created. + Defaults to False. + + Notes + ----- + The other arguments work as in :meth:`Node._f_move`. + + """ + obj = self.get_node(where, name=name) + obj._f_move(newparent, newname, overwrite, createparents) + + def copy_node( + self, + where: Node | str, + newparent: Group | str | None = None, + newname: str | None = None, + name: str | None = None, + overwrite: bool = False, + recursive: bool = False, + createparents: bool = False, + **kwargs, + ) -> Node: + """Copy the node specified by where and name to newparent/newname. + + Parameters + ---------- + where : str + These arguments work as in + :meth:`File.get_node`, referencing the node to be acted + upon. + newparent : str or Group + The destination group that the node will be copied + into (a path name or a Group + instance). If not specified or None, the + current parent group is chosen as the new parent. + newname : str + The name to be assigned to the new copy in its + destination (a string). If it is not specified or + None, the current name is chosen as the + new name. + name : str + These arguments work as in + :meth:`File.get_node`, referencing the node to be acted + upon. + overwrite : bool, optional + If True, the destination group will be overwritten if it already + exists. Defaults to False. + recursive : bool, optional + If True, all descendant nodes of the node to be acted upon are + recursively copied. Defaults to False. + createparents : bool, optional + If True, any necessary parents of newparent will be created. + Defaults to False. + kwargs + Additional keyword arguments can be used to customize the copying + process. See the documentation of :meth:`Group._f_copy` + for a description of those arguments. + + Returns + ------- + node : Node + The newly created copy of the source node (i.e. the destination + node). See :meth:`.Node._f_copy` for further details on the + semantics of copying nodes. + + """ + obj = self.get_node(where, name=name) + if obj._v_depth == 0 and newparent and not newname: + npobj = self.get_node(newparent) + if obj._v_file is not npobj._v_file: + # Special case for copying file1:/ --> file2:/path + self.root._f_copy_children( + npobj, overwrite=overwrite, recursive=recursive, **kwargs + ) + return npobj + else: + raise OSError( + "You cannot copy a root group over the same file" + ) + return obj._f_copy( + newparent, newname, overwrite, recursive, createparents, **kwargs + ) + + def remove_node( + self, + where: Node | str, + name: str | None = None, + recursive: bool = False, + ) -> None: + """Remove the object node *name* under *where* location. + + Parameters + ---------- + where, name + These arguments work as in + :meth:`File.get_node`, referencing the node to be acted upon. + recursive : bool + If not supplied or false, the node will be removed + only if it has no children; if it does, a + NodeError will be raised. If supplied + with a true value, the node and all its descendants will be + completely removed. + + """ + obj = self.get_node(where, name=name) + obj._f_remove(recursive) + + def get_node_attr( + self, where: Node | str, attrname: str, name: str | None = None + ) -> Any: + """Get a PyTables attribute from the given node. + + Parameters + ---------- + where, name + These arguments work as in :meth:`File.get_node`, referencing the + node to be acted upon. + attrname + The name of the attribute to retrieve. If the named + attribute does not exist, an AttributeError is raised. + + """ + obj = self.get_node(where, name=name) + return obj._f_getattr(attrname) + + def set_node_attr( + self, + where: Node | str, + attrname: str, + attrvalue: Any, + name: str | None = None, + ) -> None: + """Set a PyTables attribute for the given node. + + Parameters + ---------- + where, name + These arguments work as in + :meth:`File.get_node`, referencing the node to be acted upon. + attrname + The name of the attribute to set. + attrvalue + The value of the attribute to set. Any kind of Python + object (like strings, ints, floats, lists, tuples, dicts, + small NumPy objects ...) can be stored as an attribute. + However, if necessary, pickle is automatically used so as + to serialize objects that you might want to save. + See the :class:`AttributeSet` class for details. + + Notes + ----- + If the node already has a large number of attributes, a + PerformanceWarning is issued. + + """ + obj = self.get_node(where, name=name) + obj._f_setattr(attrname, attrvalue) + + def del_node_attr( + self, where: Node | str, attrname: str, name: str | None = None + ) -> None: + """Delete a PyTables attribute from the given node. + + Parameters + ---------- + where, name + These arguments work as in :meth:`File.get_node`, referencing the + node to be acted upon. + attrname + The name of the attribute to delete. If the named + attribute does not exist, an AttributeError is raised. + + """ + obj = self.get_node(where, name=name) + obj._f_delattr(attrname) + + def copy_node_attrs( + self, where: Node | str, dstnode: Node | str, name: str | None = None + ) -> None: + """Copy PyTables attributes from one node to another. + + Parameters + ---------- + where, name + These arguments work as in :meth:`File.get_node`, referencing the + node to be acted upon. + dstnode + The destination node where the attributes will be copied to. It can + be a path string or a Node instance (see :ref:`NodeClassDescr`). + + """ + srcobject = self.get_node(where, name=name) + dstobject = self.get_node(dstnode) + srcobject._v_attrs._f_copy(dstobject) + + def copy_children( + self, + srcgroup: str, + dstgroup: str, + overwrite: bool = False, + recursive: bool = False, + createparents: bool = False, + **kwargs, + ) -> None: + """Copy the children of a group into another group. + + Parameters + ---------- + srcgroup : str + The group to copy from. + dstgroup : str + The destination group. + overwrite : bool, optional + If True, the destination group will be overwritten if it already + exists. Defaults to False. + recursive : bool, optional + If True, all descendant nodes of srcgroup are recursively copied. + Defaults to False. + createparents : bool, optional + If True, any necessary parents of dstgroup will be created. + Defaults to False. + kwargs : dict + Additional keyword arguments can be used to customize the copying + process. See the documentation of :meth:`Group._f_copy_children` + for a description of those arguments. + + """ + srcgroup = self.get_node(srcgroup) # Does the source node exist? + self._check_group(srcgroup) # Is it a group? + + srcgroup._f_copy_children( + dstgroup, overwrite, recursive, createparents, **kwargs + ) + + def copy_file( + self, dstfilename: str, overwrite: bool = False, **kwargs + ) -> None: + """Copy the contents of this file to dstfilename. + + Parameters + ---------- + dstfilename : str + A path string indicating the name of the destination file. If + it already exists, the copy will fail with an IOError, unless + the overwrite argument is true. + overwrite : bool, optional + If true, the destination file will be overwritten if it already + exists. In this case, the destination file must be closed, or + errors will occur. Defaults to False. + kwargs + Additional keyword arguments discussed below. + + Notes + ----- + Additional keyword arguments may be passed to customize the + copying process. For instance, title and filters may be changed, + user attributes may be or may not be copied, data may be + sub-sampled, stats may be collected, etc. Arguments unknown to + nodes are simply ignored. Check the documentation for copying + operations of nodes to see which options they support. + + In addition, it recognizes the names of parameters present in + :file:`tables/parameters.py` as additional keyword arguments. + See :ref:`parameter_files` for a detailed info on the supported + parameters. + + Copying a file usually has the beneficial side effect of + creating a more compact and cleaner version of the original + file. + + """ + self._check_open() + + # Check that we are not treading our own shoes + if Path(self.filename).resolve() == Path(dstfilename).resolve(): + raise OSError("You cannot copy a file over itself") + + # Compute default arguments. + # These are *not* passed on. + filters = kwargs.pop("filters", None) + if filters is None: + # By checking the HDF5 attribute, we avoid setting filters + # in the destination file if not explicitly set in the + # source file. Just by assigning ``self.filters`` we would + # not be able to tell. + filters = getattr(self.root._v_attrs, "FILTERS", None) + copyuserattrs = kwargs.get("copyuserattrs", True) + title = kwargs.pop("title", self.title) + + if Path(dstfilename).is_file() and not overwrite: + raise OSError( + f"file ``{dstfilename}`` already exists; you may want to " + f"use the ``overwrite`` argument" + ) + + # Create destination file, overwriting it. + dstfileh = open_file( + dstfilename, mode="w", title=title, filters=filters, **kwargs + ) + + try: + # Maybe copy the user attributes of the root group. + if copyuserattrs: + self.root._v_attrs._f_copy(dstfileh.root) + + # Copy the rest of the hierarchy. + self.root._f_copy_children(dstfileh.root, recursive=True, **kwargs) + finally: + dstfileh.close() + + def list_nodes( + self, where: Node | str, classname: str | None = None + ) -> list[Node]: + """Return a *list* with children nodes hanging from where. + + This is a list-returning version of :meth:`File.iter_nodes`. + + """ + group = self.get_node(where) # Does the parent exist? + self._check_group(group) # Is it a group? + + return group._f_list_nodes(classname) + + def iter_nodes( + self, where: Node | str, classname: str | None = None + ) -> Generator[Node]: + """Iterate over children nodes hanging from where. + + Parameters + ---------- + where + This argument works as in :meth:`File.get_node`, referencing the + node to be acted upon. + classname + If the name of a class derived from + Node (see :ref:`NodeClassDescr`) is supplied, only instances of + that class (or subclasses of it) will be returned. + + Notes + ----- + The returned nodes are alphanumerically sorted by their name. + This is an iterator version of :meth:`File.list_nodes`. + + """ + group = self.get_node(where) # Does the parent exist? + self._check_group(group) # Is it a group? + + return group._f_iter_nodes(classname) + + def __contains__(self, path: str) -> bool: + """Return True if there is a node with the specified path. + + Returns True if the file has a node with the given path (a + string), False otherwise. + + """ + try: + self.get_node(path) + except NoSuchNodeError: + return False + else: + return True + + def __iter__(self) -> Generator[Node]: + """Recursively iterate over the nodes in the tree. + + This is equivalent to calling :meth:`File.walk_nodes` with no + arguments. + + Examples + -------- + :: + + # Recursively list all the nodes in the object tree. + h5file = tables.open_file('vlarray1.h5') + print("All nodes in the object tree:") + for node in h5file: + print(node) + + """ + return self.walk_nodes("/") + + def walk_nodes( + self, where: Group | str = "/", classname: str | None = None + ) -> Generator[Node]: + """Recursively iterate over nodes hanging from where. + + Parameters + ---------- + where : str or Group, optional + If supplied, the iteration starts from (and includes) + this group. It can be a path string or a + Group instance (see :ref:`GroupClassDescr`). + classname + If the name of a class derived from + Node (see :ref:`GroupClassDescr`) is supplied, only instances of + that class (or subclasses of it) will be returned. + + Notes + ----- + This version iterates over the leaves in the same group in order + to avoid having a list referencing to them and thus, preventing + the LRU cache to remove them after their use. + + Examples + -------- + :: + + # Recursively print all the nodes hanging from '/detector'. + print("Nodes hanging from group '/detector':") + for node in h5file.walk_nodes('/detector', classname='EArray'): + print(node) + + """ + class_ = get_class_by_name(classname) + + if class_ is Group: # only groups + yield from self.walk_groups(where) + elif class_ is Node: # all nodes + yield self.get_node(where) + for group in self.walk_groups(where): + yield from self.iter_nodes(group) + else: # only nodes of the named type + for group in self.walk_groups(where): + yield from self.iter_nodes(group, classname) + + def walk_groups(self, where: Group | str = "/") -> Generator[Group]: + """Recursively iterate over groups (not leaves) hanging from where. + + The where group itself is listed first (preorder), then each of its + child groups (following an alphanumerical order) is also traversed, + following the same procedure. If where is not supplied, the root + group is used. + + The where argument can be a path string + or a Group instance (see :ref:`GroupClassDescr`). + + """ + group = self.get_node(where) # Does the parent exist? + self._check_group(group) # Is it a group? + return group._f_walk_groups() + + def _check_open(self) -> None: + """Check the state of the file. + + If the file is closed, a `ClosedFileError` is raised. + + """ + if not self.isopen: + raise ClosedFileError("the file object is closed") + + def _iswritable(self) -> bool: + """Return True if the file is writable.""" + return self.mode in ("w", "a", "r+") + + def _check_writable(self) -> None: + """Check whether the file is writable. + + If the file is not writable, a `FileModeError` is raised. + + """ + if not self._iswritable(): + raise FileModeError("the file is not writable") + + def _check_group(self, node: Group) -> None: + # `node` must already be a node. + if not isinstance(node, Group): + raise TypeError(f"node ``{node._v_pathname}`` is not a group") + + def is_undo_enabled(self) -> bool: + """Return True uf the Undo/Redo mechanism is enabled. + + Returns True if the Undo/Redo mechanism has been enabled for + this file, False otherwise. Please note that this mechanism is + persistent, so a newly opened PyTables file may already have + Undo/Redo support enabled. + + """ + self._check_open() + return self._undoEnabled + + def _check_undo_enabled(self) -> None: + if not self._undoEnabled: + raise UndoRedoError("Undo/Redo feature is currently disabled!") + + def _create_transaction_group(self) -> TransactionGroupG: + tgroup = TransactionGroupG( + self.root, + _trans_group_name, + "Transaction information container", + new=True, + ) + # The format of the transaction container. + tgroup._v_attrs._g__setattr("FORMATVERSION", _trans_version) + return tgroup + + def _create_transaction( + self, troot: TransactionGroupG, tid: int + ) -> TransactionG: + return TransactionG( + troot, _trans_name % tid, f"Transaction number {tid}", new=True + ) + + def _create_mark(self, trans: TransactionG, mid: int) -> MarkG: + return MarkG(trans, _mark_name % mid, "Mark number %d" % mid, new=True) + + def enable_undo(self, filters: Filters = Filters(complevel=1)) -> None: + """Enable the Undo/Redo mechanism. + + This operation prepares the database for undoing and redoing + modifications in the node hierarchy. This + allows :meth:`File.mark`, :meth:`File.undo`, :meth:`File.redo` and + other methods to be called. + + The filters argument, when specified, + must be an instance of class Filters (see :ref:`FiltersClassDescr`) and + is meant for setting the compression values for the action log. The + default is having compression enabled, as the gains in terms of + space can be considerable. You may want to disable compression if + you want maximum speed for Undo/Redo operations. + + Calling this method when the Undo/Redo mechanism is already + enabled raises an UndoRedoError. + + """ + maxundo = self.params["MAX_UNDO_PATH_LENGTH"] + + class ActionLog(NotLoggedMixin, Table): + pass + + class ActionLogDesc(IsDescription): + opcode = UInt8Col(pos=0) + arg1 = StringCol(maxundo, pos=1, dflt=b"") + arg2 = StringCol(maxundo, pos=2, dflt=b"") + + self._check_open() + + # Enabling several times is not allowed to avoid the user having + # the illusion that a new implicit mark has been created + # when calling enable_undo for the second time. + + if self.is_undo_enabled(): + raise UndoRedoError("Undo/Redo feature is already enabled!") + + self._markers: dict[str, int] = {} + self._seqmarkers: list[int] = [] + self._nmarks = 0 + self._curtransaction = 0 + self._curmark = -1 # No marks yet + + # Get the Group for keeping user actions + try: + tgroup = self.get_node(_trans_group_path) + except NodeError: + # The file is going to be changed. + self._check_writable() + + # A transaction log group does not exist. Create it + tgroup = self._create_transaction_group() + + # Create a transaction. + self._trans = self._create_transaction( + tgroup, self._curtransaction + ) + + # Create an action log + self._actionlog = ActionLog( + tgroup, + _action_log_name, + ActionLogDesc, + "Action log", + filters=filters, + ) + + # Create an implicit mark + self._actionlog.append([(_op_to_code["MARK"], str(0), "")]) + self._nmarks += 1 + self._seqmarkers.append(0) # current action is 0 + + # Create a group for mark 0 + self._create_mark(self._trans, 0) + # Initialize the marker pointer + self._curmark = int(self._nmarks - 1) + # Initialize the action pointer + self._curaction = self._actionlog.nrows - 1 + else: + # The group seems to exist already + # Get the default transaction + self._trans = tgroup._f_get_child( + _trans_name % self._curtransaction + ) + # Open the action log and go to the end of it + self._actionlog = tgroup.actionlog + for row in self._actionlog: + if row["opcode"] == _op_to_code["MARK"]: + name = row["arg2"].decode("utf-8") + self._markers[name] = self._nmarks + self._seqmarkers.append(row.nrow) + self._nmarks += 1 + # Get the current mark and current action + self._curmark = int(self._actionlog.attrs.CURMARK) + self._curaction = self._actionlog.attrs.CURACTION + + # The Undo/Redo mechanism has been enabled. + self._undoEnabled = True + + def disable_undo(self) -> None: + """Disable the Undo/Redo mechanism. + + Disabling the Undo/Redo mechanism leaves the database in the + current state and forgets past and future database states. This + makes :meth:`File.mark`, :meth:`File.undo`, :meth:`File.redo` and other + methods fail with an UndoRedoError. + + Calling this method when the Undo/Redo mechanism is already + disabled raises an UndoRedoError. + + """ + self._check_open() + + if not self.is_undo_enabled(): + raise UndoRedoError("Undo/Redo feature is already disabled!") + + # The file is going to be changed. + self._check_writable() + + del self._markers + del self._seqmarkers + del self._curmark + del self._curaction + del self._curtransaction + del self._nmarks + del self._actionlog + # Recursively delete the transaction group + tnode = self.get_node(_trans_group_path) + tnode._g_remove(recursive=1) + + # The Undo/Redo mechanism has been disabled. + self._undoEnabled = False + + def mark(self, name: str | None = None) -> int: + """Mark the state of the database. + + Creates a mark for the current state of the database. A unique (and + immutable) identifier for the mark is returned. An optional name (a + string) can be assigned to the mark. Both the identifier of a mark and + its name can be used in :meth:`File.undo` and :meth:`File.redo` + operations. When the name has already been used for another mark, + an UndoRedoError is raised. + + This method can only be called when the Undo/Redo mechanism has been + enabled. Otherwise, an UndoRedoError is raised. + + """ + self._check_open() + self._check_undo_enabled() + + if name is None: + name = "" + else: + if not isinstance(name, str): + raise TypeError( + "Only strings are allowed as mark names. " + "You passed object: '%s'" % name + ) + if name in self._markers: + raise UndoRedoError( + "Name '%s' is already used as a marker " + "name. Try another one." % name + ) + + # The file is going to be changed. + self._check_writable() + + self._markers[name] = self._curmark + 1 + + # Create an explicit mark + # Insert the mark in the action log + self._log("MARK", str(self._curmark + 1), name) + self._curmark += 1 + self._nmarks = self._curmark + 1 + self._seqmarkers.append(self._curaction) + # Create a group for the current mark + self._create_mark(self._trans, self._curmark) + return self._curmark + + def _log(self, action: str, *args) -> None: + """Log an action. + + The `action` must be an all-uppercase string identifying it. + Arguments must also be strings. + + This method should be called once the action has been completed. + + This method can only be called when the Undo/Redo mechanism has + been enabled. Otherwise, an `UndoRedoError` is raised. + + """ + assert self.is_undo_enabled() + + maxundo = self.params["MAX_UNDO_PATH_LENGTH"] + # Check whether we are at the end of the action log or not + if self._curaction != self._actionlog.nrows - 1: + # We are not, so delete the trailing actions + self._actionlog.remove_rows( + self._curaction + 1, self._actionlog.nrows + ) + # Reset the current marker group + mnode = self.get_node( + _mark_path % (self._curtransaction, self._curmark) + ) + mnode._g_reset() + # Delete the marker groups with backup objects + for mark in range(self._curmark + 1, self._nmarks): + mnode = self.get_node( + _mark_path % (self._curtransaction, mark) + ) + mnode._g_remove(recursive=1) + # Update the new number of marks + self._nmarks = self._curmark + 1 + self._seqmarkers = self._seqmarkers[: self._nmarks] + + if action not in _op_to_code: # INTERNAL + raise UndoRedoError( + "Action ``%s`` not in ``_op_to_code`` " + "dictionary: %r" % (action, _op_to_code) + ) + + arg1 = "" + arg2 = "" + if len(args) <= 1: + arg1 = args[0] + elif len(args) <= 2: + arg1 = args[0] + arg2 = args[1] + else: # INTERNAL + raise UndoRedoError( + "Too many parameters for action log: " "%r" + ).with_traceback(args) + if len(arg1) > maxundo or len(arg2) > maxundo: # INTERNAL + raise UndoRedoError( + "Parameter arg1 or arg2 is too long: " + "(%r, %r)" % (arg1, arg2) + ) + # print("Logging-->", (action, arg1, arg2)) + self._actionlog.append( + [(_op_to_code[action], arg1.encode("utf-8"), arg2.encode("utf-8"))] + ) + self._curaction += 1 + + def _get_mark_id(self, mark: int | str) -> int: + """Get an integer markid from a mark sequence number or name.""" + if isinstance(mark, int): + markid = mark + elif isinstance(mark, str): + if mark not in self._markers: + lmarkers = sorted(self._markers) + raise UndoRedoError( + "The mark that you have specified has not " + "been found in the internal marker list: " + "%r" % lmarkers + ) + markid = self._markers[mark] + else: + raise TypeError( + "Parameter mark can only be an integer or a " + "string, and you passed a type <%s>" % type(mark) + ) + # print("markid, self._nmarks:", markid, self._nmarks) + return markid + + def _get_final_action(self, markid: int) -> int: + """Get the action to go. + + It does not touch the self private attributes + + """ + if markid > self._nmarks - 1: + # The required mark is beyond the end of the action log + # The final action is the last row + return self._actionlog.nrows + elif markid <= 0: + # The required mark is the first one + # return the first row + return 0 + + return self._seqmarkers[markid] + + def _doundo(self, finalaction: int, direction: int) -> None: + """Undo/Redo actions up to final action in the specified direction.""" + if direction < 0: + actionlog = self._actionlog[finalaction + 1 : self._curaction + 1][ + ::-1 + ] + else: + actionlog = self._actionlog[self._curaction : finalaction] + + # Uncomment this for debugging + # print("curaction, finalaction, direction", \ + # self._curaction, finalaction, direction) + for i in range(len(actionlog)): + if actionlog["opcode"][i] != _op_to_code["MARK"]: + # undo/redo the action + if direction > 0: + # Uncomment this for debugging + # print("redo-->", \ + # _code_to_op[actionlog['opcode'][i]],\ + # actionlog['arg1'][i],\ + # actionlog['arg2'][i]) + undoredo.redo( + self, + # _code_to_op[actionlog['opcode'][i]], + # The next is a workaround for python < 2.5 + _code_to_op[int(actionlog["opcode"][i])], + actionlog["arg1"][i].decode("utf8"), + actionlog["arg2"][i].decode("utf8"), + ) + else: + # Uncomment this for debugging + # print("undo-->", \ + # _code_to_op[actionlog['opcode'][i]],\ + # actionlog['arg1'][i].decode('utf8'),\ + # actionlog['arg2'][i].decode('utf8')) + undoredo.undo( + self, + # _code_to_op[actionlog['opcode'][i]], + # The next is a workaround for python < 2.5 + _code_to_op[int(actionlog["opcode"][i])], + actionlog["arg1"][i].decode("utf8"), + actionlog["arg2"][i].decode("utf8"), + ) + else: + if direction > 0: + self._curmark = int(actionlog["arg1"][i]) + else: + self._curmark = int(actionlog["arg1"][i]) - 1 + # Protection against negative marks + if self._curmark < 0: + self._curmark = 0 + self._curaction += direction + + def undo(self, mark: int | str | None = None) -> None: + """Go to a past state of the database. + + Returns the database to the state associated with the specified mark. + Both the identifier of a mark and its name can be used. If the mark is + omitted, the last created mark is used. If there are no past + marks, or the specified mark is not older than the current one, an + UndoRedoError is raised. + + This method can only be called when the Undo/Redo mechanism + has been enabled. Otherwise, an UndoRedoError + is raised. + + """ + self._check_open() + self._check_undo_enabled() + + # print("(pre)UNDO: (curaction, curmark) = (%s,%s)" % \ + # (self._curaction, self._curmark)) + if mark is None: + markid = self._curmark + # Correction if we are settled on top of a mark + opcode = self._actionlog.cols.opcode + if opcode[self._curaction] == _op_to_code["MARK"]: + markid -= 1 + else: + # Get the mark ID number + markid = self._get_mark_id(mark) + # Get the final action ID to go + finalaction = self._get_final_action(markid) + if finalaction > self._curaction: + raise UndoRedoError( + "Mark ``%s`` is newer than the current mark. " + "Use `redo()` or `goto()` instead." % (mark,) + ) + + # The file is going to be changed. + self._check_writable() + + # Try to reach this mark by unwinding actions in the log + self._doundo(finalaction - 1, -1) + if self._curaction < self._actionlog.nrows - 1: + self._curaction += 1 + self._curmark = int(self._actionlog.cols.arg1[self._curaction]) + + # print("(post)UNDO: (curaction, curmark) = (%s,%s)" % \ + # (self._curaction, self._curmark)) + + def redo(self, mark: int | str | None = None) -> None: + """Go to a future state of the database. + + Returns the database to the state associated with the specified + mark. Both the identifier of a mark and its name can be used. + If the `mark` is omitted, the next created mark is used. If + there are no future marks, or the specified mark is not newer + than the current one, an UndoRedoError is raised. + + This method can only be called when the Undo/Redo mechanism has + been enabled. Otherwise, an UndoRedoError is raised. + + """ + self._check_open() + self._check_undo_enabled() + + # print("(pre)REDO: (curaction, curmark) = (%s, %s)" % \ + # (self._curaction, self._curmark)) + if self._curaction >= self._actionlog.nrows - 1: + # We are at the end of log, so no action + return + + if mark is None: + mark = self._curmark + 1 + elif mark == -1: + mark = int(self._nmarks) # Go beyond the mark bounds up to the end + # Get the mark ID number + markid = self._get_mark_id(mark) + finalaction = self._get_final_action(markid) + if finalaction < self._curaction + 1: + raise UndoRedoError( + "Mark ``%s`` is older than the current mark. " + "Use `redo()` or `goto()` instead." % (mark,) + ) + + # The file is going to be changed. + self._check_writable() + + # Get the final action ID to go + self._curaction += 1 + + # Try to reach this mark by redoing the actions in the log + self._doundo(finalaction, 1) + # Increment the current mark only if we are not at the end of marks + if self._curmark < self._nmarks - 1: + self._curmark += 1 + if self._curaction > self._actionlog.nrows - 1: + self._curaction = self._actionlog.nrows - 1 + + # print("(post)REDO: (curaction, curmark) = (%s,%s)" % \ + # (self._curaction, self._curmark)) + + def goto(self, mark: int | str) -> None: + """Go to a specific mark of the database. + + Returns the database to the state associated with the specified mark. + Both the identifier of a mark and its name can be used. + + This method can only be called when the Undo/Redo mechanism has been + enabled. Otherwise, an UndoRedoError is raised. + + """ + self._check_open() + self._check_undo_enabled() + + if mark == -1: # Special case + mark = self._nmarks # Go beyond the mark bounds up to the end + # Get the mark ID number + markid = self._get_mark_id(mark) + finalaction = self._get_final_action(markid) + if finalaction < self._curaction: + self.undo(mark) + else: + self.redo(mark) + + def get_current_mark(self) -> int: + """Get the identifier of the current mark. + + Returns the identifier of the current mark. This can be used + to know the state of a database after an application crash, or to + get the identifier of the initial implicit mark after a call + to :meth:`File.enable_undo`. + + This method can only be called when the Undo/Redo mechanism + has been enabled. Otherwise, an UndoRedoError + is raised. + + """ + self._check_open() + self._check_undo_enabled() + return self._curmark + + def _shadow_name(self) -> tuple[Node, str]: + """Compute and return a shadow name. + + Computes the current shadow name according to the current + transaction, mark and action. It returns a tuple with the + shadow parent node and the name of the shadow in it. + + """ + parent = self.get_node( + _shadow_parent % (self._curtransaction, self._curmark) + ) + name = _shadow_name % (self._curaction,) + + return (parent, name) + + def flush(self) -> None: + """Flush all the alive leaves in the object tree.""" + self._check_open() + + # Flush the cache to disk + self._node_manager.flush_nodes() + self._flush_file(0) # 0 means local scope, 1 global (virtual) scope + + def close(self) -> None: + """Flush all the alive leaves in object tree and close the file.""" + # If the file is already closed, return immediately + if not self.isopen: + return + + # If this file has been opened more than once, decrease the + # counter and return + if self._open_count > 1: + self._open_count -= 1 + return + + filename = self.filename + + if self._undoEnabled and self._iswritable(): + # Save the current mark and current action + self._actionlog.attrs._g__setattr("CURMARK", self._curmark) + self._actionlog.attrs._g__setattr("CURACTION", self._curaction) + + # Close all loaded nodes. + self.root._f_close() + + self._node_manager.shutdown() + + # Post-conditions + assert ( + len(self._node_manager.cache) == 0 + ), "cached nodes remain after closing: %s" % list( + self._node_manager.cache + ) + + # No other nodes should have been revived. + assert ( + len(self._node_manager.registry) == 0 + ), "alive nodes remain after closing: %s" % list( + self._node_manager.registry + ) + + # Close the file + self._close_file() + + # After the objects are disconnected, destroy the + # object dictionary using the brute force ;-) + # This should help to the garbage collector + self.__dict__.clear() + + # Set the flag to indicate that the file is closed + self.isopen = 0 + + # Restore the filename attribute that is used by _FileRegistry + self.filename = filename + + # Delete the entry from the registry of opened files + _open_files.remove(self) + + def __enter__(self) -> File: + """Enter a context and return the same file.""" + return self + + def __exit__(self, *exc_info) -> bool: + """Exit a context and close the file.""" + self.close() + return False # do not hide exceptions + + def __str__(self) -> str: + """Return a short string representation of the object tree. + + Examples + -------- + :: + + >>> import tables + >>> f = tables.open_file('tables/tests/Tables_lzo2.h5') + >>> print(f) + tables/tests/Tables_lzo2.h5 (File) 'Table Benchmark' + Last modif.: '...' + Object Tree: + / (RootGroup) 'Table Benchmark' + /tuple0 (Table(100,)lzo(1)) 'This is the table title' + /group0 (Group) '' + /group0/tuple1 (Table(100,)lzo(1)) 'This is the table title' + /group0/group1 (Group) '' + /group0/group1/tuple2 (Table(100,)lzo(1)) 'This is the table title' + /group0/group1/group2 (Group) '' + >>> f.close() + + """ + if not self.isopen: + return "" + + # Print all the nodes (Group and Leaf objects) on object tree + try: + date = datetime.datetime.fromtimestamp( + Path(self.filename).stat().st_mtime, datetime.UTC + ).isoformat(timespec="seconds") + except OSError: + # in-memory file + date = "" + lines = [ + f"{self.filename} (File) {self.title!r}", + f"Last modif.: {date!r}", + "Object Tree: ", + ] + + for group in self.walk_groups("/"): + lines.append(f"{group}") + for kind in self._node_kinds[1:]: + for node in self.list_nodes(group, kind): + lines.append(f"{node}") + return "\n".join(lines) + "\n" + + def __repr__(self) -> str: + """Return a detailed string representation of the object tree.""" + if not self.isopen: + return "" + + # Print all the nodes (Group and Leaf objects) on object tree + lines = [ + f"File(filename={self.filename!s}, title={self.title!r}, " + f"mode={self.mode!r}, root_uep={self.root_uep!r}, " + f"filters={self.filters!r})" + ] + for group in self.walk_groups("/"): + lines.append(f"{group}") + for kind in self._node_kinds[1:]: + for node in self.list_nodes(group, kind): + lines.append(f"{node!r}") + return "\n".join(lines) + "\n" + + def _update_node_locations(self, oldpath: str, newpath: str) -> None: + """Update location information of nodes under `oldpath`. + + This only affects *already loaded* nodes. + + """ + oldprefix = oldpath + "/" # root node can not be renamed, anyway + oldprefix_len = len(oldprefix) + + # Update alive and dead descendents. + for cache in [self._node_manager.cache, self._node_manager.registry]: + for nodepath in list(cache): + if nodepath.startswith(oldprefix) and nodepath != oldprefix: + nodesuffix = nodepath[oldprefix_len:] + newnodepath = join_path(newpath, nodesuffix) + newnodeppath = split_path(newnodepath)[0] + descendent_node = self._get_node(nodepath) + descendent_node._g_update_location(newnodeppath) + + +# If a user hits ^C during a run, it is wise to gracefully close the +# opened files. +atexit.register(_open_files.close_all) diff --git a/venv/Lib/site-packages/tables/filters.py b/venv/Lib/site-packages/tables/filters.py new file mode 100644 index 0000000..77b70f4 --- /dev/null +++ b/venv/Lib/site-packages/tables/filters.py @@ -0,0 +1,474 @@ +"""Functionality related with filters in a PyTables file.""" + +from __future__ import annotations + +import warnings +from typing import Any, Literal, TYPE_CHECKING + +import numpy as np +from packaging.version import Version + +from . import utilsextension +from .exceptions import FiltersWarning + +if TYPE_CHECKING: + from .leaf import Leaf + +blosc_version = Version(utilsextension.which_lib_version("blosc")[1]) +blosc2_version = Version(utilsextension.which_lib_version("blosc2")[1]) +blosc_compcode_to_compname = utilsextension.blosc_compcode_to_compname_ +blosc2_compcode_to_compname = utilsextension.blosc2_compcode_to_compname_ + + +__docformat__ = "reStructuredText" +"""The format of documentation strings in this module.""" + +all_complibs = ["zlib", "lzo", "bzip2", "blosc", "blosc2"] +all_complibs += [ + f"blosc:{cname}" for cname in utilsextension.blosc_compressor_list() +] +all_complibs += [ + f"blosc2:{cname}" for cname in utilsextension.blosc2_compressor_list() +] + + +"""List of all compression libraries.""" + +foreign_complibs = ["szip"] +"""List of known but unsupported compression libraries.""" + +default_complib = "zlib" +"""The default compression library.""" + + +_shuffle_flag = 0x1 +_fletcher32_flag = 0x2 +_rounding_flag = 0x4 +_bitshuffle_flag = 0x8 + + +class Filters: + """Container for filter properties. + + This class is meant to serve as a container that keeps information about + the filter properties associated with the chunked leaves, that is Table, + CArray, EArray and VLArray. + + Instances of this class can be directly compared for equality. + + Parameters + ---------- + complevel : int + Specifies a compression level for data. The allowed + range is 0-9. A value of 0 (the default) disables + compression. + complib : str + Specifies the compression library to be used. Right now, 'zlib' (the + default), 'lzo', 'bzip2', 'blosc' and 'blosc2' are supported. + Additional compressors for Blosc like 'blosc:blosclz' ('blosclz' is + the default in case the additional compressor is not specified), + 'blosc:lz4', 'blosc:lz4hc', 'blosc:zlib' and 'blosc:zstd' are + supported too. + Also, additional compressors for Blosc2 like 'blosc2:blosclz' + ('blosclz' is the default in case the additional compressor is not + specified), 'blosc2:lz4', 'blosc2:lz4hc', 'blosc2:zlib' and + 'blosc2:zstd' are supported too. + Specifying a compression library which is not available + in the system issues a FiltersWarning and sets the library to the + default one. + shuffle : bool + Whether to use the *Shuffle* filter in the HDF5 library. + This is normally used to improve the compression ratio. + A false value disables shuffling and a true one enables + it. The default value depends on whether compression is + enabled or not; if compression is enabled, shuffling defaults + to be enabled, else shuffling is disabled. Shuffling can only + be used when compression is enabled. + bitshuffle : bool + Whether to use the *BitShuffle* filter in the Blosc/Blosc2 + libraries. This is normally used to improve the compression + ratio. A false value disables bitshuffling and a true one + enables it. The default value is disabled. + fletcher32 : bool + Whether to use the *Fletcher32* filter in the HDF5 library. + This is used to add a checksum on each data chunk. A false + value (the default) disables the checksum. + least_significant_digit : int + If specified, data will be truncated (quantized). In conjunction + with enabling compression, this produces 'lossy', but + significantly more efficient compression. For example, if + *least_significant_digit=1*, data will be quantized using + ``around(scale*data)/scale``, where ``scale = 2**bits``, and + bits is determined so that a precision of 0.1 is retained (in + this case bits=4). Default is *None*, or no quantization. + + .. note:: + + quantization is only applied if some form of compression is + enabled + + Examples + -------- + This is a small example on using the Filters class:: + + import numpy as np + import tables as tb + + fileh = tb.open_file('test5.h5', mode='w') + atom = Float32Atom() + filters = Filters(complevel=1, complib='blosc', fletcher32=True) + arr = fileh.create_earray(fileh.root, 'earray', atom, (0,2), + "A growable array", filters=filters) + + # Append several rows in only one call + arr.append(np.array([[1., 2.], + [2., 3.], + [3., 4.]], dtype=np.float32)) + + # Print information on that enlargeable array + print("Result Array:") + print(repr(arr)) + fileh.close() + + This enforces the use of the Blosc library, a compression level of 1 and a + Fletcher32 checksum filter as well. See the output of this example:: + + Result Array: + /earray (EArray(3, 2), fletcher32, shuffle, blosc(1)) 'A growable ... + type = float32 + shape = (3, 2) + itemsize = 4 + nrows = 3 + extdim = 0 + flavor = 'numpy' + byteorder = 'little' + + .. rubric:: Filters attributes + + .. attribute:: fletcher32 + + Whether the *Fletcher32* filter is active or not. + + .. attribute:: complevel + + The compression level (0 disables compression). + + .. attribute:: complib + + The compression filter used (irrelevant when compression is not + enabled). + + .. attribute:: shuffle + + Whether the *Shuffle* filter is active or not. + + .. attribute:: bitshuffle + + Whether the *BitShuffle* filter is active or not (Blosc/Blosc2 only). + + """ + + @property + def shuffle_bitshuffle(self) -> Literal[0, 1, 2]: + """Encode NoShuffle (0), Shuffle (1) and BitShuffle (2) filters.""" + if self.shuffle and self.bitshuffle: + raise ValueError( + "Shuffle and BitShuffle cannot be active at the same time" + ) + if not (self.shuffle or self.bitshuffle): + return 0 + if self.shuffle: + return 1 + if self.bitshuffle: + return 2 + + @classmethod + def _from_leaf(cls, leaf: Leaf) -> Filters: + # Get a dictionary with all the filters + parent = leaf._v_parent + filters_dict = utilsextension.get_filters( + parent._v_objectid, leaf._v_name + ) + if filters_dict is None: + filters_dict = {} # not chunked + + # Keyword arguments are all off + kwargs = { + "complevel": 0, + "shuffle": False, + "bitshuffle": False, + "fletcher32": False, + "least_significant_digit": None, + "_new": False, + } + for name, values in filters_dict.items(): + if name == "deflate": + name = "zlib" + if name in all_complibs: + kwargs["complib"] = name + if name in ("blosc", "blosc2"): + kwargs["complevel"] = values[4] + if values[5] == 1: + # Shuffle filter is internal to blosc/blosc2 + kwargs["shuffle"] = True + elif values[5] == 2: + # Shuffle filter is internal to blosc/blosc2 + kwargs["bitshuffle"] = True + # From Blosc 1.3 on, parameter 6 is used for the compressor + if len(values) > 6: + if name == "blosc": + cname = blosc_compcode_to_compname(values[6]) + kwargs["complib"] = "blosc:%s" % cname + else: + cname = blosc2_compcode_to_compname(values[6]) + kwargs["complib"] = "blosc2:%s" % cname + else: + kwargs["complevel"] = values[0] + elif name in foreign_complibs: + kwargs["complib"] = name + kwargs["complevel"] = 1 # any nonzero value will do + elif name in ["shuffle", "fletcher32"]: + kwargs[name] = True + return cls(**kwargs) + + @classmethod + def _unpack(cls, packed: int) -> Filters: + """Create a new `Filters` object from a packed version. + + >>> Filters._unpack(0) + Filters(complevel=0, shuffle=False, bitshuffle=False, \ +fletcher32=False, least_significant_digit=None) + >>> Filters._unpack(0x101) + Filters(complevel=1, complib='zlib', shuffle=False, \ +bitshuffle=False, fletcher32=False, least_significant_digit=None) + >>> Filters._unpack(0x30109) + Filters(complevel=9, complib='zlib', shuffle=True, \ +bitshuffle=False, fletcher32=True, least_significant_digit=None) + >>> Filters._unpack(0x3010A) + Traceback (most recent call last): + ... + ValueError: compression level must be between 0 and 9 + >>> Filters._unpack(0x1) + Traceback (most recent call last): + ... + ValueError: invalid compression library id: 0 + + """ + kwargs = {"_new": False} + + # Byte 0: compression level. + kwargs["complevel"] = complevel = packed & 0xFF + packed >>= 8 + + # Byte 1: compression library id (0 for none). + if complevel > 0: + complib_id = int(packed & 0xFF) + if not (0 < complib_id <= len(all_complibs)): + raise ValueError( + f"invalid compression library id: {complib_id}" + ) + kwargs["complib"] = all_complibs[complib_id - 1] + packed >>= 8 + + # Byte 2: parameterless filters. + kwargs["shuffle"] = packed & _shuffle_flag + kwargs["bitshuffle"] = packed & _bitshuffle_flag + kwargs["fletcher32"] = packed & _fletcher32_flag + has_rounding = packed & _rounding_flag + packed >>= 8 + + # Byte 3: least significant digit. + if has_rounding: + kwargs["least_significant_digit"] = np.int8(packed & 0xFF) + else: + kwargs["least_significant_digit"] = None + + return cls(**kwargs) + + def _pack(self) -> np.int64: + """Pack the `Filters` object into a 64-bit NumPy integer.""" + packed = np.int64(0) + + # Byte 3: least significant digit. + if self.least_significant_digit is not None: + # assert isinstance(self.least_significant_digit, np.int8) + packed |= self.least_significant_digit + packed <<= 8 + + # Byte 2: parameterless filters. + if self.shuffle: + packed |= _shuffle_flag + if self.bitshuffle: + packed |= _bitshuffle_flag + if self.fletcher32: + packed |= _fletcher32_flag + if self.least_significant_digit: + packed |= _rounding_flag + packed <<= 8 + + # Byte 1: compression library id (0 for none). + if self.complevel > 0: + packed |= all_complibs.index(self.complib) + 1 + packed <<= 8 + + # Byte 0: compression level. + packed |= self.complevel + + return packed + + def __init__( + self, + complevel: int = 0, + complib: Literal[ + "zlib", "lzo", "bzip2", "blosc", "blosc2" + ] = default_complib, + shuffle: bool = True, + bitshuffle: bool = False, + fletcher32: bool = False, + least_significant_digit: int | None = None, + _new: bool = True, + ) -> None: + + if not (0 <= complevel <= 9): + raise ValueError("compression level must be between 0 and 9") + + if _new and complevel > 0: + # These checks are not performed when loading filters from disk. + if complib not in all_complibs: + raise ValueError( + "compression library ``%s`` is not supported; " + "it must be one of: %s" + % (complib, ", ".join(all_complibs)) + ) + if utilsextension.which_lib_version(complib) is None: + warnings.warn( + "compression library ``%s`` is not available; " + "using ``%s`` instead" % (complib, default_complib), + FiltersWarning, + ) + complib = default_complib # always available + + complevel = int(complevel) + complib = str(complib) + shuffle = bool(shuffle) + bitshuffle = bool(bitshuffle) + fletcher32 = bool(fletcher32) + if least_significant_digit is not None: + least_significant_digit = np.int8(least_significant_digit) + + if complevel == 0: + # Override some inputs when compression is not enabled. + complib = None # make it clear there is no compression + shuffle = False # shuffling and not compressing makes no sense + least_significant_digit = None + elif complib not in all_complibs: + # Do not try to use a meaningful level for unsupported libs. + complevel = -1 + + self.complevel = complevel + """The compression level (0 disables compression).""" + + self.complib = complib + """The compression filter used (irrelevant when compression is + not enabled). + """ + + self.shuffle = shuffle + """Whether the *Shuffle* filter is active or not.""" + + self.bitshuffle = bitshuffle + """Whether the *BitShuffle* filter is active or not.""" + + if ( + self.complib + and self.bitshuffle + and not self.complib.startswith("blosc") + ): + raise ValueError("BitShuffle can only be used inside Blosc/Blosc2") + + if self.shuffle and self.bitshuffle: + # BitShuffle has priority in case both are specified + self.shuffle = False + + self.fletcher32 = fletcher32 + """Whether the *Fletcher32* filter is active or not.""" + + self.least_significant_digit = least_significant_digit + """The least significant digit to which data shall be truncated.""" + + def __repr__(self) -> str: + args = [] + if self.complevel >= 0: # meaningful compression level + args.append(f"complevel={self.complevel}") + if self.complevel != 0: # compression enabled (-1 or > 0) + args.append(f"complib={self.complib!r}") + args.append(f"shuffle={self.shuffle}") + args.append(f"bitshuffle={self.bitshuffle}") + args.append(f"fletcher32={self.fletcher32}") + args.append(f"least_significant_digit={self.least_significant_digit}") + return f'{self.__class__.__name__}({", ".join(args)})' + + def __str__(self) -> str: + return repr(self) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, self.__class__): + return False + for attr in self.__dict__: + if getattr(self, attr) != getattr(other, attr): + return False + return True + + # XXX: API incompatible change for PyTables 3 line + # Overriding __eq__ blocks inheritance of __hash__ in 3.x + # def __hash__(self): + # return hash((self.__class__, self.complevel, self.complib, + # self.shuffle, self.bitshuffle, self.fletcher32)) + + def copy(self, **override) -> Filters: + """Get a copy of the filters, possibly overriding some arguments. + + Constructor arguments to be overridden must be passed as keyword + arguments. + + Using this method is recommended over replacing the attributes of an + instance, since instances of this class may become immutable in the + future:: + + >>> filters1 = Filters() + >>> filters2 = filters1.copy() + >>> filters1 == filters2 + True + >>> filters1 is filters2 + False + >>> filters3 = filters1.copy(complevel=1) #doctest: +ELLIPSIS + Traceback (most recent call last): + ... + ValueError: compression library ``None`` is not supported... + >>> filters3 = filters1.copy(complevel=1, complib='zlib') + >>> print(filters1) + Filters(complevel=0, shuffle=False, bitshuffle=False, \ +fletcher32=False, least_significant_digit=None) + >>> print(filters3) + Filters(complevel=1, complib='zlib', shuffle=False, \ +bitshuffle=False, fletcher32=False, least_significant_digit=None) + >>> filters1.copy(foobar=42) #doctest: +ELLIPSIS + Traceback (most recent call last): + ... + TypeError: ...__init__() got an unexpected keyword argument ... + + """ + newargs = self.__dict__.copy() + newargs.update(override) + return self.__class__(**newargs) + + +def _test() -> None: + """Run ``doctest`` on this module.""" + import doctest + + doctest.testmod() + + +if __name__ == "__main__": + _test() diff --git a/venv/Lib/site-packages/tables/flavor.py b/venv/Lib/site-packages/tables/flavor.py new file mode 100644 index 0000000..496e176 --- /dev/null +++ b/venv/Lib/site-packages/tables/flavor.py @@ -0,0 +1,456 @@ +"""Utilities for handling different array flavors in PyTables. + +Variables +========= + +`__docformat`__ + The format of documentation strings in this module. +`internal_flavor` + The flavor used internally by PyTables. +`all_flavors` + List of all flavors available to PyTables. +`alias_map` + Maps old flavor names to the most similar current flavor. +`description_map` + Maps flavors to short descriptions of their supported objects. +`identifier_map` + Maps flavors to functions that can identify their objects. + + The function associated with a given flavor will return a true + value if the object passed to it can be identified as being of + that flavor. + + See the `flavor_of()` function for a friendlier interface to + flavor identification. + +`converter_map` + Maps (source, destination) flavor pairs to converter functions. + + Converter functions get an array of the source flavor and return + an array of the destination flavor. + + See the `array_of_flavor()` and `flavor_to_flavor()` functions for + friendlier interfaces to flavor conversion. + +""" + +from __future__ import annotations + +import warnings +from typing import Any, Literal +from collections.abc import Callable, Sequence + +import numpy as np +import numpy.typing as npt + +from .exceptions import FlavorError, FlavorWarning + +# @compatibility with numpy < 1.25 +try: + from numpy.exceptions import VisibleDeprecationWarning +except ImportError: + from numpy import VisibleDeprecationWarning + +__docformat__ = "reStructuredText" +"""The format of documentation strings in this module.""" + +FlavorType = Literal["numpy", "python"] + +internal_flavor: FlavorType = "numpy" +"""The flavor used internally by PyTables.""" + +# This is very slightly slower than a set for a small number of values +# in terms of (infrequent) lookup time, but allows `flavor_of()` +# (which may be called much more frequently) to check for flavors in +# order, beginning with the most common one. +all_flavors: list[FlavorType] = [] # filled as flavors are registered +"""List of all flavors available to PyTables.""" + +alias_map: dict[str, FlavorType] = {} # filled as flavors are registered +"""Maps old flavor names to the most similar current flavor.""" + +description_map: dict[FlavorType, str] = {} # filled as flavors are registered +"""Maps flavors to short descriptions of their supported objects.""" + +# filled as flavors are registered +identifier_map: dict[FlavorType, Callable[[Any], FlavorType]] = {} +"""Maps flavors to functions that can identify their objects. + +The function associated with a given flavor will return a true value +if the object passed to it can be identified as being of that flavor. + +See the `flavor_of()` function for a friendlier interface to flavor +identification. +""" + +# filled as flavors are registered +converter_map: dict[tuple[FlavorType, FlavorType], Callable[[Any], Any]] = {} +"""Maps (source, destination) flavor pairs to converter functions. + +Converter functions get an array of the source flavor and return an +array of the destination flavor. + +See the `array_of_flavor()` and `flavor_to_flavor()` functions for +friendlier interfaces to flavor conversion. +""" + + +def check_flavor(flavor: FlavorType) -> None: + """Raise a ``FlavorError`` if the `flavor` is not valid.""" + if flavor not in all_flavors: + available_flavs = ", ".join(flav for flav in all_flavors) + raise FlavorError( + "flavor ``%s`` is unsupported or unavailable; " + "available flavors in this system are: %s" + % (flavor, available_flavs) + ) + + +def array_of_flavor2( + array: npt.ArrayLike, src_flavor: FlavorType, dst_flavor: FlavorType +) -> Any | list[Any] | np.ndarray: + """Get a version of the given `array` in a different flavor. + + The input `array` must be of the given `src_flavor`, and the + returned array will be of the indicated `dst_flavor`. Both + flavors may be the same, but it is not guaranteed that the + returned array will be the same object as the input one in this + case. + + If the conversion is not supported, a ``FlavorError`` is raised. + + """ + convkey = (src_flavor, dst_flavor) + if convkey not in converter_map: + raise FlavorError( + "conversion from flavor ``%s`` to flavor ``%s`` " + "is unsupported or unavailable in this system" + % (src_flavor, dst_flavor) + ) + + convfunc = converter_map[convkey] + return convfunc(array) + + +def flavor_to_flavor( + array: npt.ArrayLike, src_flavor: FlavorType, dst_flavor: FlavorType +) -> Any | list[Any] | np.ndarray | npt.ArrayLike: + """Get a version of the given `array` in a different flavor. + + The input `array` must be of the given `src_flavor`, and the + returned array will be of the indicated `dst_flavor` (see below + for an exception to this). Both flavors may be the same, but it + is not guaranteed that the returned array will be the same object + as the input one in this case. + + If the conversion is not supported, a `FlavorWarning` is issued + and the input `array` is returned as is. + + """ + try: + return array_of_flavor2(array, src_flavor, dst_flavor) + except FlavorError as fe: + warnings.warn( + "%s; returning an object of the ``%s`` flavor instead" + % (fe.args[0], src_flavor), + FlavorWarning, + ) + return array + + +def internal_to_flavor( + array: npt.ArrayLike, dst_flavor: FlavorType +) -> Any | list[Any] | np.ndarray: + """Get a version of the given `array` in a different `dst_flavor`. + + The input `array` must be of the internal flavor, and the returned + array will be of the given `dst_flavor`. See `flavor_to_flavor()` + for more information. + + """ + return flavor_to_flavor(array, internal_flavor, dst_flavor) + + +def array_as_internal( + array: npt.ArrayLike, src_flavor: FlavorType +) -> np.ndarray: + """Get a version of the given `array` in the internal flavor. + + The input `array` must be of the given `src_flavor`, and the + returned array will be of the internal flavor. + + If the conversion is not supported, a ``FlavorError`` is raised. + + """ + return array_of_flavor2(array, src_flavor, internal_flavor) + + +def flavor_of(array: npt.ArrayLike) -> FlavorType: + """Identify the flavor of a given `array`. + + If the `array` can not be matched with any flavor, a ``TypeError`` + is raised. + + """ + for flavor in all_flavors: + if identifier_map[flavor](array): + return flavor + type_name = type(array).__name__ + supported_descs = "; ".join(description_map[fl] for fl in all_flavors) + raise TypeError( + "objects of type ``%s`` are not supported in this context, sorry; " + "supported objects are: %s" % (type_name, supported_descs) + ) + + +def array_of_flavor( + array: npt.ArrayLike, dst_flavor: FlavorType +) -> Any | list[Any] | np.ndarray: + """Get a version of the given `array` in a different `dst_flavor`. + + The flavor of the input `array` is guessed, and the returned array + will be of the given `dst_flavor`. + + If the conversion is not supported, a ``FlavorError`` is raised. + + """ + return array_of_flavor2(array, flavor_of(array), dst_flavor) + + +def restrict_flavors(keep: Sequence[FlavorType] = ("python",)) -> None: + """Disable all flavors except those in keep. + + Providing an empty keep sequence implies disabling all flavors (but the + internal one). If the sequence is not specified, only optional flavors are + disabled. + + .. important:: Once you disable a flavor, it can not be enabled again. + + """ + remove = set(all_flavors) - set(keep) - {internal_flavor} + for flavor in remove: + _disable_flavor(flavor) + + +# Flavor registration +# +# The order in which flavors appear in `all_flavors` determines the +# order in which they will be tested for by `flavor_of()`, so place +# most frequent flavors first. +all_flavors.append("numpy") # this is the internal flavor + +all_flavors.append("python") # this is always supported + + +def _register_aliases() -> None: + """Register aliases of *available* flavors.""" + for flavor in all_flavors: + aliases = eval("_%s_aliases" % flavor) + for alias in aliases: + alias_map[alias] = flavor + + +def _register_descriptions() -> None: + """Register descriptions of *available* flavors.""" + for flavor in all_flavors: + description_map[flavor] = eval("_%s_desc" % flavor) + + +def _register_identifiers() -> None: + """Register identifier functions of *available* flavors.""" + for flavor in all_flavors: + identifier_map[flavor] = eval("_is_%s" % flavor) + + +def _register_converters() -> None: + """Register converter functions between *available* flavors.""" + + def identity(array: Any) -> Any: + return array + + for src_flavor in all_flavors: + for dst_flavor in all_flavors: + # Converters with the same source and destination flavor + # are used when available, since they may perform some + # optimizations on the resulting array (e.g. making it + # contiguous). Otherwise, an identity function is used. + convfunc = None + try: + convfunc = eval(f"_conv_{src_flavor}_to_{dst_flavor}") + except NameError: + if src_flavor == dst_flavor: + convfunc = identity + if convfunc: + converter_map[(src_flavor, dst_flavor)] = convfunc + + +def _register_all() -> None: + """Register all *available* flavors.""" + _register_aliases() + _register_descriptions() + _register_identifiers() + _register_converters() + + +def _deregister_aliases(flavor: FlavorType) -> None: + """Deregister aliases of a given `flavor` (no checks).""" + rm_aliases = [] + for an_alias, a_flavor in alias_map.items(): + if a_flavor == flavor: + rm_aliases.append(an_alias) + for an_alias in rm_aliases: + del alias_map[an_alias] + + +def _deregister_description(flavor: FlavorType) -> None: + """Deregister description of a given `flavor` (no checks).""" + del description_map[flavor] + + +def _deregister_identifier(flavor: FlavorType) -> None: + """Deregister identifier function of a given `flavor` (no checks).""" + del identifier_map[flavor] + + +def _deregister_converters(flavor: FlavorType) -> None: + """Deregister converter functions of a given `flavor` (no checks).""" + rm_flavor_pairs = [] + for flavor_pair in converter_map: + if flavor in flavor_pair: + rm_flavor_pairs.append(flavor_pair) + for flavor_pair in rm_flavor_pairs: + del converter_map[flavor_pair] + + +def _disable_flavor(flavor: FlavorType) -> None: + """Completely disable the given `flavor` (no checks).""" + _deregister_aliases(flavor) + _deregister_description(flavor) + _deregister_identifier(flavor) + _deregister_converters(flavor) + all_flavors.remove(flavor) + + +# Implementation of flavors +_python_aliases: list[str] = [ + "List", + "Tuple", + "Int", + "Float", + "String", + "VLString", + "Object", +] +_python_desc = "homogeneous list or tuple, " "integer, float, complex or bytes" + + +def _is_python(array: Any) -> bool: + return isinstance(array, (tuple, list, int, float, complex, bytes)) + + +_numpy_aliases: list[str] = [] +_numpy_desc = "NumPy array, record or scalar" + + +if np.lib.NumpyVersion(np.__version__) >= np.lib.NumpyVersion("1.19.0"): + + def toarray(array: npt.ArrayLike, *args, **kwargs) -> np.ndarray: + """Convert the input to a numpy array if needed.""" + with warnings.catch_warnings(): + warnings.simplefilter("error") + try: + array = np.array(array, *args, **kwargs) + except VisibleDeprecationWarning: + raise ValueError( + "cannot guess the desired dtype from the input" + ) + + return array + +else: + toarray = np.array + + +def _is_numpy(array: Any) -> bool: + return isinstance(array, (np.ndarray, np.generic)) + + +def _numpy_contiguous( + convfunc: Callable[[npt.ArrayLike], np.ndarray], +) -> Callable[[npt.ArrayLike], np.ndarray]: + """Decorate `convfunc` to return a *contiguous* NumPy array. + + Note: When arrays are 0-strided, the copy is avoided. This allows + to use `array` to still carry info about the dtype and shape. + """ + + def conv_to_numpy(array: npt.ArrayLike) -> np.ndarray: + nparr = convfunc(array) + if ( + hasattr(nparr, "flags") + and not nparr.flags.contiguous + and sum(nparr.strides) != 0 + ): + nparr = nparr.copy() # copying the array makes it contiguous + return nparr + + conv_to_numpy.__name__ = convfunc.__name__ + conv_to_numpy.__doc__ = convfunc.__doc__ + return conv_to_numpy + + +@_numpy_contiguous +def _conv_numpy_to_numpy(array: npt.ArrayLike) -> np.ndarray: + # Passes contiguous arrays through and converts scalars into + # scalar arrays. + nparr = np.asarray(array) + if nparr.dtype.kind == "U": + # from Python 3 loads of common strings are disguised as Unicode + try: + # try to convert to basic 'S' type + return nparr.astype("S") + except UnicodeEncodeError: + pass + # pass on true Unicode arrays downstream in case it can be + # handled in the future + return nparr + + +@_numpy_contiguous +def _conv_python_to_numpy(array: npt.ArrayLike) -> np.ndarray: + nparr = toarray(array) + if nparr.dtype.kind == "U": + # from Python 3 loads of common strings are disguised as Unicode + try: + # try to convert to basic 'S' type + return nparr.astype("S") + except UnicodeEncodeError: + pass + # pass on true Unicode arrays downstream in case it can be + # handled in the future + return nparr + + +def _conv_numpy_to_python(array: np.ndarray) -> Any | list[Any]: + if array.shape != (): + # Lists are the default for returning multidimensional objects + array = array.tolist() + else: + # 0-dim or scalar case + array = array.item() + return array + + +# Now register everything related with *available* flavors. +_register_all() + + +def _test() -> None: + """Run ``doctest`` on this module.""" + import doctest + + doctest.testmod() + + +if __name__ == "__main__": + _test() diff --git a/venv/Lib/site-packages/tables/group.py b/venv/Lib/site-packages/tables/group.py new file mode 100644 index 0000000..56992e2 --- /dev/null +++ b/venv/Lib/site-packages/tables/group.py @@ -0,0 +1,1314 @@ +"""Here is defined the Group class.""" + +from __future__ import annotations + +import os +import weakref +import warnings +from typing import Any, Literal, NoReturn, TYPE_CHECKING +from collections.abc import Iterator + +from . import hdf5extension, utilsextension +from .leaf import Leaf +from .link import Link, SoftLink, ExternalLink +from .node import Node, NotLoggedMixin +from .path import check_name_validity, join_path, isvisiblename +from .filters import Filters +from .registry import class_id_dict, get_class_by_name +from .exceptions import ( + NodeError, + NoSuchNodeError, + NaturalNameWarning, + PerformanceWarning, +) +from .unimplemented import UnImplemented, Unknown +from .misc.proxydict import ProxyDict + +if TYPE_CHECKING: + from .file import File + + +obversion = "1.0" + + +class _ChildrenDict(ProxyDict): + def _get_value_from_container(self, container: Group, key: str) -> Node: + return container._f_get_child(key) + + +class Group(hdf5extension.Group, Node): + """Basic PyTables grouping structure. + + Instances of this class are grouping structures containing *child* + instances of zero or more groups or leaves, together with + supporting metadata. Each group has exactly one *parent* group. + + Working with groups and leaves is similar in many ways to working + with directories and files, respectively, in a Unix filesystem. + As with Unix directories and files, objects in the object tree are + often described by giving their full (or absolute) path names. + This full path can be specified either as a string (like in + '/group1/group2') or as a complete object path written in *natural + naming* schema (like in file.root.group1.group2). + + A collateral effect of the *natural naming* schema is that the + names of members in the Group class and its instances must be + carefully chosen to avoid colliding with existing children node + names. For this reason and to avoid polluting the children + namespace all members in a Group start with some reserved prefix, + like _f_ (for public methods), _g_ (for private ones), _v_ (for + instance variables) or _c_ (for class variables). Any attempt to + create a new child node whose name starts with one of these + prefixes will raise a ValueError exception. + + Another effect of natural naming is that children named after + Python keywords or having names not valid as Python identifiers + (e.g. class, $a or 44) can not be accessed using the node.child + syntax. You will be forced to use node._f_get_child(child) to + access them (which is recommended for programmatic accesses). + + You will also need to use _f_get_child() to access an existing + child node if you set a Python attribute in the Group with the + same name as that node (you will get a NaturalNameWarning when + doing this). + + Parameters + ---------- + parentnode + The parent :class:`Group` object. + name : str + The name of this node in its parent group. + title + The title for this group + new + If this group is new or has to be read from disk + filters : Filters + A Filters instance + + + .. versionchanged:: 3.0 + *parentNode* renamed into *parentnode* + + Notes + ----- + The following documentation includes methods that are automatically + called when a Group instance is accessed in a special way. + + For instance, this class defines the __setattr__, __getattr__, + __delattr__ and __dir__ methods, and they set, get and delete + *ordinary Python attributes* as normally intended. In addition to that, + __getattr__ allows getting *child nodes* by their name for the sake of + easy interaction on the command line, as long as there is no Python + attribute with the same name. Groups also allow the interactive + completion (when using readline) of the names of child nodes. + For instance:: + + # get a Python attribute + nchild = group._v_nchildren + + # Add a Table child called 'table' under 'group'. + h5file.create_table(group, 'table', myDescription) + table = group.table # get the table child instance + group.table = 'foo' # set a Python attribute + + # (PyTables warns you here about using the name of a child node.) + foo = group.table # get a Python attribute + del group.table # delete a Python attribute + table = group.table # get the table child instance again + + Additionally, on interactive python sessions you may get autocompletions + of children named as *valid python identifiers* by pressing the `[Tab]` + key, or to use the dir() global function. + + .. rubric:: Group attributes + + The following instance variables are provided in addition to those + in Node (see :ref:`NodeClassDescr`): + + .. attribute:: _v_children + + Dictionary with all nodes hanging from this group. + + .. attribute:: _v_groups + + Dictionary with all groups hanging from this group. + + .. attribute:: _v_hidden + + Dictionary with all hidden nodes hanging from this group. + + .. attribute:: _v_leaves + + Dictionary with all leaves hanging from this group. + + .. attribute:: _v_links + + Dictionary with all links hanging from this group. + + .. attribute:: _v_unknown + + Dictionary with all unknown nodes hanging from this group. + + """ + + # Class identifier. + _c_classid = "GROUP" + + # Children containers that should be loaded only in a lazy way. + # These are documented in the ``Group._g_add_children_names`` method. + _c_lazy_children_attrs = ( + "__members__", + "_v_children", + "_v_groups", + "_v_leaves", + "_v_links", + "_v_unknown", + "_v_hidden", + ) + + # `_v_nchildren` is a direct read-only shorthand + # for the number of *visible* children in a group. + def _g_getnchildren(self) -> int: + """Return the number of children hanging from this group.""" + return len(self._v_children) + + _v_nchildren = property(_g_getnchildren) + + # `_v_filters` is a direct read-write shorthand for the ``FILTERS`` + # attribute with the default `Filters` instance as a default value. + def _g_getfilters(self) -> Filters: + filters = getattr(self._v_attrs, "FILTERS", None) + if filters is None: + filters = Filters() + return filters + + def _g_setfilters(self, value: Filters) -> None: + if not isinstance(value, Filters): + raise TypeError( + f"value is not an instance of `Filters`: {value!r}" + ) + self._v_attrs.FILTERS = value + + def _g_delfilters(self) -> None: + del self._v_attrs.FILTERS + + _v_filters = property( + _g_getfilters, + _g_setfilters, + _g_delfilters, + """Default filter properties for child nodes. + + You can (and are encouraged to) use this property to get, set and + delete the FILTERS HDF5 attribute of the group, which stores a Filters + instance (see :ref:`FiltersClassDescr`). When the group has no such + attribute, a default Filters instance is used. + """, + ) + + def __init__( + self, + parentnode: Group, + name: str, + title: str = "", + new: bool = False, + filters: Filters | None = None, + _log: bool = True, + ) -> None: + + # Remember to assign these values in the root group constructor + # if it does not use this one! + + # First, set attributes belonging to group objects. + + self._v_version = obversion + """The object version of this group.""" + + self._v_new = new + """Is this the first time the node has been created?""" + + self._v_new_title = title + """New title for this node.""" + + self._v_new_filters = filters + """New default filter properties for child nodes.""" + + self._v_max_group_width = parentnode._v_file.params["MAX_GROUP_WIDTH"] + """Maximum number of children on each group before warning the user. + + .. versionchanged:: 3.0 + The *_v_maxGroupWidth* attribute has been renamed into + *_v_max_group_width*. + + """ + # Finally, set up this object as a node. + super().__init__(parentnode, name, _log) + + def _g_post_init_hook(self) -> None: + if self._v_new: + if self._v_file.params["PYTABLES_SYS_ATTRS"]: + # Save some attributes for the new group on disk. + set_attr = self._v_attrs._g__setattr + # Set the title, class and version attributes. + set_attr("TITLE", self._v_new_title) + set_attr("CLASS", self._c_classid) + set_attr("VERSION", self._v_version) + + # Set the default filter properties. + newfilters = self._v_new_filters + if newfilters is None: + # If no filters have been passed in the constructor, + # inherit them from the parent group, but only if they + # have been inherited or explicitly set. + newfilters = getattr( + self._v_parent._v_attrs, "FILTERS", None + ) + if newfilters is not None: + set_attr("FILTERS", newfilters) + else: + # If the file has PyTables format, get the VERSION attr + if "VERSION" in self._v_attrs._v_attrnamessys: + self._v_version = self._v_attrs.VERSION + else: + self._v_version = "0.0 (unknown)" + # We don't need to get more attributes from disk, + # since the most important ones are defined as properties. + + def __del__(self) -> None: + if ( + self._v_isopen + and self._v_pathname in self._v_file._node_manager.registry + and "_v_children" in self.__dict__ + ): + # The group is going to be killed. Rebuild weak references + # (that Python cancelled just before calling this method) so + # that they are still usable if the object is revived later. + selfref = weakref.ref(self) + self._v_children.containerref = selfref + self._v_groups.containerref = selfref + self._v_leaves.containerref = selfref + self._v_links.containerref = selfref + self._v_unknown.containerref = selfref + self._v_hidden.containerref = selfref + + super().__del__() + + def _g_get_child_group_class(self, childname: str) -> type: + """Get the class of a not-yet-loaded group child. + + `childname` must be the name of a *group* child. + + """ + child_cid = self._g_get_gchild_attr(childname, "CLASS") + if child_cid is not None and not isinstance(child_cid, str): + child_cid = child_cid.decode("utf-8") + + if child_cid in class_id_dict: + return class_id_dict[child_cid] # look up group class + else: + return Group # default group class + + def _g_get_child_leaf_class( + self, childname: str, warn: bool = True + ) -> type: + """Get the class of a not-yet-loaded leaf child. + + `childname` must be the name of a *leaf* child. If the child + belongs to an unknown kind of leaf, or if its kind can not be + guessed, `UnImplemented` will be returned and a warning will be + issued if `warn` is true. + + """ + if self._v_file.params["PYTABLES_SYS_ATTRS"]: + child_cid = self._g_get_lchild_attr(childname, "CLASS") + if child_cid is not None and not isinstance(child_cid, str): + child_cid = child_cid.decode("utf-8") + else: + child_cid = None + + if child_cid in class_id_dict: + return class_id_dict[child_cid] # look up leaf class + else: + # Unknown or no ``CLASS`` attribute, try a guess. + child_cid2 = utilsextension.which_class( + self._v_objectid, childname + ) + if child_cid2 == "UNSUPPORTED": + if warn: + if child_cid is None: + warnings.warn( + "leaf ``%s`` is of an unsupported type; " + "it will become an ``UnImplemented`` node" + % self._g_join(childname) + ) + else: + warnings.warn( + ( + "leaf ``%s`` has an unknown class ID ``%s``; " + "it will become an ``UnImplemented`` node" + ) + % (self._g_join(childname), child_cid) + ) + return UnImplemented + assert child_cid2 in class_id_dict + return class_id_dict[child_cid2] # look up leaf class + + def _g_add_children_names(self) -> None: + """Add children names to the group. + + The method properly takes into account the visibility and kind. + """ + mydict = self.__dict__ + + # The names of the lazy attributes + mydict["__members__"] = members = [] + """The names of visible children nodes for readline-style completion. + """ + mydict["_v_children"] = children = _ChildrenDict(self) + """The number of children hanging from this group.""" + mydict["_v_groups"] = groups = _ChildrenDict(self) + """Dictionary with all groups hanging from this group.""" + mydict["_v_leaves"] = leaves = _ChildrenDict(self) + """Dictionary with all leaves hanging from this group.""" + mydict["_v_links"] = links = _ChildrenDict(self) + """Dictionary with all links hanging from this group.""" + mydict["_v_unknown"] = unknown = _ChildrenDict(self) + """Dictionary with all unknown nodes hanging from this group.""" + mydict["_v_hidden"] = hidden = _ChildrenDict(self) + """Dictionary with all hidden nodes hanging from this group.""" + + # Get the names of *all* child groups and leaves. + group_names, leaf_names, link_names, unknown_names = ( + self._g_list_group(self._v_parent) + ) + + # Separate groups into visible groups and hidden nodes, + # and leaves into visible leaves and hidden nodes. + for childnames, childdict in ( + (group_names, groups), + (leaf_names, leaves), + (link_names, links), + (unknown_names, unknown), + ): + + for childname in childnames: + # See whether the name implies that the node is hidden. + # (Assigned values are entirely irrelevant.) + if isvisiblename(childname): + # Visible node. + members.insert(0, childname) + children[childname] = None + childdict[childname] = None + else: + # Hidden node. + hidden[childname] = None + + def _g_check_has_child( + self, + name: str, + ) -> Literal[ + "ExternalLink", + "Group", + "Leaf", + "NamedType", + "NoSuchNode", + "SoftLink", + "Unknown", + ]: + """Check whether 'name' is a children of 'self' and return its type.""" + # Get the HDF5 name matching the PyTables name. + node_type = self._g_get_objinfo(name) + if node_type == "NoSuchNode": + raise NoSuchNodeError( + "group ``%s`` does not have a child named ``%s``" + % (self._v_pathname, name) + ) + return node_type + + def __iter__(self) -> Iterator[Node]: + """Iterate over the child nodes hanging directly from the group. + + This iterator is *not* recursive. + + Examples + -------- + :: + + # Non-recursively list all the nodes hanging from '/detector' + print("Nodes in '/detector' group:") + for node in h5file.root.detector: + print(node) + + """ + return self._f_iter_nodes() + + def __contains__(self, name: str) -> bool: + """Return True if there is a child with the specified `name`. + + Returns a true value if the group has a child node (visible or + hidden) with the given `name` (a string), false otherwise. + + """ + self._g_check_open() + try: + self._g_check_has_child(name) + except NoSuchNodeError: + return False + return True + + def __getitem__(self, childname: str) -> Node: + """Return the (visible or hidden) child with that `name` (a string). + + Raise IndexError if child not exist. + """ + try: + return self._f_get_child(childname) + except NoSuchNodeError: + raise IndexError(childname) + + def _f_walknodes(self, classname: str | None = None) -> Iterator[Node]: + """Iterate over descendant nodes. + + This method recursively walks *self* top to bottom (preorder), + iterating over child groups in alphanumerical order, and yielding + nodes. If classname is supplied, only instances of the named class are + yielded. + + If *classname* is Group, it behaves like :meth:`Group._f_walk_groups`, + yielding only groups. If you don't want a recursive behavior, + use :meth:`Group._f_iter_nodes` instead. + + Examples + -------- + :: + + # Recursively print all the arrays hanging from '/' + print("Arrays in the object tree '/':") + for array in h5file.root._f_walknodes('Array', recursive=True): + print(array) + + """ + self._g_check_open() + + # For compatibility with old default arguments. + if classname == "": + classname = None + + if classname == "Group": + # Recursive algorithm + yield from self._f_walk_groups() + else: + for group in self._f_walk_groups(): + yield from group._f_iter_nodes(classname) + + def _g_join(self, name: str) -> str: + """Concatenate a name child object with the pathname of this group.""" + if name == "/": + # This case can happen when doing copies + return self._v_pathname + return join_path(self._v_pathname, name) + + def _g_width_warning(self) -> None: + """Issue a :exc:`PerformanceWarning` on too many children.""" + warnings.warn( + """\ +group ``%s`` is exceeding the recommended maximum number of children (%d); \ +be ready to see PyTables asking for *lots* of memory and possibly slow I/O.""" + % (self._v_pathname, self._v_max_group_width), + PerformanceWarning, + ) + + def _g_refnode( + self, childnode: Node, childname: str, validate: bool = True + ) -> None: + """Insert references to a `childnode` via a `childname`. + + Checks that the `childname` is valid and does not exist, then + creates references to the given `childnode` by that `childname`. + The validation of the name can be omitted by setting `validate` + to a false value (this may be useful for adding already existing + nodes to the tree). + + """ + # Check for name validity. + if validate: + check_name_validity(childname) + childnode._g_check_name(childname) + + # Check if there is already a child with the same name. + # This can be triggered because of the user + # (via node construction or renaming/movement). + # Links are not checked here because they are copied and referenced + # using ``File.get_node`` so they already exist in `self`. + if (not isinstance(childnode, Link)) and childname in self: + raise NodeError( + "group ``%s`` already has a child node named ``%s``" + % (self._v_pathname, childname) + ) + + # Show a warning if there is an object attribute with that name. + if childname in self.__dict__: + warnings.warn( + f"group ``{self._v_pathname}`` already has an attribute " + f"named ``{childname}``; you will not be able to use " + f"natural naming to access the child node", + NaturalNameWarning, + ) + + # Check group width limits. + if ( + len(self._v_children) + len(self._v_hidden) + >= self._v_max_group_width + ): + self._g_width_warning() + + # Update members information. + # Insert references to the new child. + # (Assigned values are entirely irrelevant.) + if isvisiblename(childname): + # Visible node. + self.__members__.insert(0, childname) # enable completion + self._v_children[childname] = None # insert node + if isinstance(childnode, Unknown): + self._v_unknown[childname] = None + elif isinstance(childnode, Link): + self._v_links[childname] = None + elif isinstance(childnode, Leaf): + self._v_leaves[childname] = None + elif isinstance(childnode, Group): + self._v_groups[childname] = None + else: + # Hidden node. + self._v_hidden[childname] = None # insert node + + def _g_unrefnode(self, childname: str) -> None: + """Remove references to a node. + + Removes all references to the named node. + + """ + # This can *not* be triggered because of the user. + assert childname in self, ( + f"group ``{self._v_pathname}`` does not have a child node " + f"named ``{childname}``" + ) + + # Update members information, if needed + if "_v_children" in self.__dict__: + if childname in self._v_children: + # Visible node. + members = self.__members__ + member_index = members.index(childname) + del members[member_index] # disables completion + + del self._v_children[childname] # remove node + self._v_unknown.pop(childname, None) + self._v_links.pop(childname, None) + self._v_leaves.pop(childname, None) + self._v_groups.pop(childname, None) + else: + # Hidden node. + del self._v_hidden[childname] # remove node + + def _g_move(self, newparent: Group, newname: str) -> None: + # Move the node to the new location. + oldpath = self._v_pathname + super()._g_move(newparent, newname) + newpath = self._v_pathname + + # Update location information in children. This node shouldn't + # be affected since it has already been relocated. + self._v_file._update_node_locations(oldpath, newpath) + + def _g_copy( + self, + newparent: Group, + newname: str, + recursive: bool, + _log: bool = True, + **kwargs, + ) -> Group: + # Compute default arguments. + title = kwargs.get("title", self._v_title) + filters = kwargs.get("filters", None) + stats = kwargs.get("stats", None) + + # Fix arguments with explicit None values for backwards compatibility. + if title is None: + title = self._v_title + # If no filters have been passed to the call, copy them from the + # source group, but only if inherited or explicitly set. + if filters is None: + filters = getattr(self._v_attrs, "FILTERS", None) + + # Create a copy of the object. + new_node = Group( + newparent, newname, title, new=True, filters=filters, _log=_log + ) + + # Copy user attributes if needed. + if kwargs.get("copyuserattrs", True): + self._v_attrs._g_copy(new_node._v_attrs, copyclass=True) + + # Update statistics if needed. + if stats is not None: + stats["groups"] += 1 + + if recursive: + # Copy child nodes if a recursive copy was requested. + # Some arguments should *not* be passed to children copy ops. + kwargs = kwargs.copy() + kwargs.pop("title", None) + self._g_copy_children(new_node, **kwargs) + + return new_node + + def _g_copy_children(self, newparent: Group, **kwargs) -> None: + """Copy child nodes. + + Copies all nodes descending from this one into the specified + `newparent`. If the new parent has a child node with the same + name as one of the nodes in this group, the copy fails with a + `NodeError`, maybe resulting in a partial copy. Nothing is + logged. + + """ + # Recursive version of children copy. + # for srcchild in self._v_children.itervalues(): + # srcchild._g_copy_as_child(newparent, **kwargs) + + # Non-recursive version of children copy. + use_hardlinks = kwargs.get("use_hardlinks", False) + if use_hardlinks: + address_map = kwargs.setdefault("address_map", {}) + + parentstack = [(self, newparent)] # [(source, destination), ...] + while parentstack: + srcparent, dstparent = parentstack.pop() + + if use_hardlinks: + for srcchild in srcparent._v_children.values(): + addr, rc = srcchild._get_obj_info() + if rc > 1 and addr in address_map: + where, name = address_map[addr][0] + localsrc = os.path.join(where, name) + dstparent._v_file.create_hard_link( + dstparent, srcchild.name, localsrc + ) + address_map[addr].append( + (dstparent._v_pathname, srcchild.name) + ) + + # Update statistics if needed. + stats = kwargs.pop("stats", None) + if stats is not None: + stats["hardlinks"] += 1 + else: + dstchild = srcchild._g_copy_as_child( + dstparent, **kwargs + ) + if isinstance(srcchild, Group): + parentstack.append((srcchild, dstchild)) + + if rc > 1: + address_map[addr] = [ + (dstparent._v_pathname, srcchild.name) + ] + else: + for srcchild in srcparent._v_children.values(): + dstchild = srcchild._g_copy_as_child(dstparent, **kwargs) + if isinstance(srcchild, Group): + parentstack.append((srcchild, dstchild)) + + def _f_get_child(self, childname: str) -> Node: + """Get the child called childname of this group. + + If the child exists (be it visible or not), it is returned. Else, a + NoSuchNodeError is raised. + + Using this method is recommended over getattr() when doing programmatic + accesses to children if childname is unknown beforehand or when its + name is not a valid Python identifier. + + """ + self._g_check_open() + + self._g_check_has_child(childname) + + childpath = join_path(self._v_pathname, childname) + return self._v_file._get_node(childpath) + + def _f_list_nodes(self, classname: str | None = None) -> list[Node]: + """Return a *list* with children nodes. + + This is a list-returning version of :meth:`Group._f_iter_nodes()`. + + """ + return list(self._f_iter_nodes(classname)) + + def _f_iter_nodes(self, classname: str | None = None) -> Iterator[Node]: + """Iterate over children nodes. + + Child nodes are yielded alphanumerically sorted by node name. If the + name of a class derived from Node (see :ref:`NodeClassDescr`) is + supplied in the classname parameter, only instances of that class (or + subclasses of it) will be returned. + + This is an iterator version of :meth:`Group._f_list_nodes`. + + """ + self._g_check_open() + + if not classname: + # Returns all the children alphanumerically sorted + for name in sorted(self._v_children): + yield self._v_children[name] + elif classname == "Group": + # Returns all the groups alphanumerically sorted + for name in sorted(self._v_groups): + yield self._v_groups[name] + elif classname == "Leaf": + # Returns all the leaves alphanumerically sorted + for name in sorted(self._v_leaves): + yield self._v_leaves[name] + elif classname == "Link": + # Returns all the links alphanumerically sorted + for name in sorted(self._v_links): + yield self._v_links[name] + elif classname == "IndexArray": + raise TypeError("listing ``IndexArray`` nodes is not allowed") + else: + class_ = get_class_by_name(classname) + for childname, childnode in sorted(self._v_children.items()): + if isinstance(childnode, class_): + yield childnode + + def _f_walk_groups(self) -> Iterator[Group]: + """Recursively iterate over descendent groups (not leaves). + + This method starts by yielding *self*, and then it goes on to + recursively iterate over all child groups in alphanumerical order, top + to bottom (preorder), following the same procedure. + + """ + self._g_check_open() + + stack = [self] + yield self + # Iterate over the descendants + while stack: + objgroup = stack.pop() + groupnames = sorted(objgroup._v_groups) + # Sort the groups before delivering. This uses the groups names + # for groups in tree (in order to sort() can classify them). + for groupname in groupnames: + # TODO: check recursion + stack.append(objgroup._v_groups[groupname]) + yield objgroup._v_groups[groupname] + + def __delattr__(self, name: str) -> None: + """Delete a Python attribute called name. + + This method only provides an extra warning in case the user + tries to delete a children node using __delattr__. + + To remove a children node from this group use + :meth:`File.remove_node` or :meth:`Node._f_remove`. To delete + a PyTables node attribute use :meth:`File.del_node_attr`, + :meth:`Node._f_delattr` or :attr:`Node._v_attrs``. + + If there is an attribute and a child node with the same name, + the child node will be made accessible again via natural naming. + + """ + try: + super().__delattr__(name) # nothing particular + except AttributeError as ae: + hint = " (use ``node._f_remove()`` if you want to remove a node)" + raise ae.__class__(str(ae) + hint) + + def __dir__(self) -> list[str]: + """Autocomplete only children named as valid python identifiers. + + Only PY3 supports this special method. + """ + subnods = [c for c in self._v_children if c.isidentifier()] + return super().__dir__() + subnods + + def __getattr__(self, name: str) -> Any: + """Get a Python attribute or child node called name. + + If the node has a child node called name it is returned, + else an AttributeError is raised. + """ + if name in self._c_lazy_children_attrs: + self._g_add_children_names() + return self.__dict__[name] + return self._f_get_child(name) + + def __setattr__(self, name: str, value: Any) -> None: + """Set a Python attribute called name with the given value. + + This method stores an *ordinary Python attribute* in the object. It + does *not* store new children nodes under this group; for that, use the + File.create*() methods (see the File class + in :ref:`FileClassDescr`). It does *neither* store a PyTables node + attribute; for that, + use :meth:`File.set_node_attr`, :meth`:Node._f_setattr` + or :attr:`Node._v_attrs`. + + If there is already a child node with the same name, a + NaturalNameWarning will be issued and the child node will not be + accessible via natural naming nor getattr(). It will still be available + via :meth:`File.get_node`, :meth:`Group._f_get_child` and children + dictionaries in the group (if visible). + + """ + # Show a warning if there is a child node with that name. + # + # ..note:: + # + # Using ``if name in self:`` is not right since that would + # require ``_v_children`` and ``_v_hidden`` to be already set + # when the very first attribute assignments are made. + # Moreover, this warning is only concerned about clashes with + # names used in natural naming, i.e. those in ``__members__``. + # + # ..note:: + # + # The check ``'__members__' in myDict`` allows attribute + # assignment to happen before calling `Group.__init__()`, by + # avoiding to look into the still not assigned ``__members__`` + # attribute. This allows subclasses to set up some attributes + # and then call the constructor of the superclass. If the + # check above is disabled, that results in Python entering an + # endless loop on exit! + + mydict = self.__dict__ + if "__members__" in mydict and name in self.__members__: + warnings.warn( + "group ``%s`` already has a child node named ``%s``; " + "you will not be able to use natural naming " + "to access the child node" % (self._v_pathname, name), + NaturalNameWarning, + ) + + super().__setattr__(name, value) + + def _f_flush(self) -> None: + """Flush this Group.""" + self._g_check_open() + self._g_flush_group() + + def _g_close_descendents(self) -> None: + """Close all the *loaded* descendent nodes of this group.""" + node_manager = self._v_file._node_manager + node_manager.close_subtree(self._v_pathname) + + def _g_close(self) -> None: + """Close this (open) group.""" + if self._v_isopen: + # hdf5extension operations: + # Close HDF5 group. + self._g_close_group() + + # Close myself as a node. + super()._f_close() + + def _f_close(self) -> None: + """Close this group and all its descendents. + + This method has the behavior described in :meth:`Node._f_close`. + It should be noted that this operation closes all the nodes + descending from this group. + + You should not need to close nodes manually because they are + automatically opened/closed when they are loaded/evicted from + the integrated LRU cache. + + """ + # If the group is already closed, return immediately + if not self._v_isopen: + return + + # First, close all the descendents of this group, unless a) the + # group is being deleted (evicted from LRU cache) or b) the node + # is being closed during an aborted creation, in which cases + # this is not an explicit close issued by the user. + if not (self._v__deleting or self._v_objectid is None): + self._g_close_descendents() + + # When all the descendents have been closed, close this group. + # This is done at the end because some nodes may still need to + # be loaded during the closing process; thus this node must be + # open until the very end. + self._g_close() + + def _g_remove(self, recursive: bool = False, force: bool = False) -> None: + """Remove (recursively if needed) the Group. + + This version correctly handles both visible and hidden nodes. + + """ + if self._v_nchildren > 0: + if not (recursive or force): + raise NodeError( + "group ``%s`` has child nodes; " + "please set `recursive` or `force` to true " + "to remove it" % (self._v_pathname,) + ) + + # First close all the descendents hanging from this group, + # so that it is not possible to use a node that no longer exists. + self._g_close_descendents() + + # Remove the node itself from the hierarchy. + super()._g_remove(recursive, force) + + def _f_copy( + self, + newparent: Group | None = None, + newname: str | None = None, + overwrite: bool = False, + recursive: bool = False, + createparents: bool = False, + **kwargs, + ) -> Group: + """Copy this node and return the new one. + + This method has the behavior described in :meth:`Node._f_copy`. + In addition, it recognizes the following keyword arguments: + + Parameters + ---------- + title + The new title for the destination. If omitted or None, the + original title is used. This only applies to the topmost + node in recursive copies. + filters : Filters + Specifying this parameter overrides the original filter + properties in the source node. If specified, it must be an + instance of the Filters class (see :ref:`FiltersClassDescr`). + The default is to copy the filter properties from the source + node. + copyuserattrs + You can prevent the user attributes from being copied by setting + thisparameter to False. The default is to copy them. + stats + This argument may be used to collect statistics on the copy + process. When used, it should be a dictionary with keys 'groups', + 'leaves', 'links' and 'bytes' having a numeric value. Their values + will be incremented to reflect the number of groups, leaves and + bytes, respectively, that have been copied during the operation. + + """ + return super()._f_copy( + newparent, newname, overwrite, recursive, createparents, **kwargs + ) + + def _f_copy_children( + self, + dstgroup: Group, + overwrite: bool = False, + recursive: bool = False, + createparents: bool = False, + **kwargs, + ) -> None: + """Copy the children of this group into another group. + + Children hanging directly from this group are copied into dstgroup, + which can be a Group (see :ref:`GroupClassDescr`) object or its + pathname in string form. If createparents is true, the needed groups + for the given destination group path to exist will be created. + + The operation will fail with a NodeError if there is a child node + in the destination group with the same name as one of the copied + children from this one, unless overwrite is true; in this case, + the former child node is recursively removed before copying the + latter. + + By default, nodes descending from children groups of this node + are not copied. If the recursive argument is true, all descendant + nodes of this node are recursively copied. + + Additional keyword arguments may be passed to customize the + copying process. For instance, title and filters may be changed, + user attributes may be or may not be copied, data may be sub-sampled, + stats may be collected, etc. Arguments unknown to nodes are simply + ignored. Check the documentation for copying operations of nodes to + see which options they support. + + """ + self._g_check_open() + + # `dstgroup` is used instead of its path to avoid accepting + # `Node` objects when `createparents` is true. Also, note that + # there is no risk of creating parent nodes and failing later + # because of destination nodes already existing. + dstparent = self._v_file._get_or_create_path(dstgroup, createparents) + self._g_check_group(dstparent) # Is it a group? + + if not overwrite: + # Abort as early as possible when destination nodes exist + # and overwriting is not enabled. + for childname in self._v_children: + if childname in dstparent: + raise NodeError( + "destination group ``%s`` already has " + "a node named ``%s``; " + "you may want to use the ``overwrite`` argument" + % (dstparent._v_pathname, childname) + ) + + use_hardlinks = kwargs.get("use_hardlinks", False) + if use_hardlinks: + address_map = kwargs.setdefault("address_map", {}) + + for child in self._v_children.values(): + addr, rc = child._get_obj_info() + if rc > 1 and addr in address_map: + where, name = address_map[addr][0] + localsrc = os.path.join(where, name) + dstparent._v_file.create_hard_link( + dstparent, child.name, localsrc + ) + address_map[addr].append( + (dstparent._v_pathname, child.name) + ) + + # Update statistics if needed. + stats = kwargs.pop("stats", None) + if stats is not None: + stats["hardlinks"] += 1 + else: + child._f_copy( + dstparent, None, overwrite, recursive, **kwargs + ) + if rc > 1: + address_map[addr] = [ + (dstparent._v_pathname, child.name) + ] + else: + for child in self._v_children.values(): + child._f_copy(dstparent, None, overwrite, recursive, **kwargs) + + def __str__(self) -> str: + """Return a short string representation of the group. + + Examples + -------- + :: + + >>> import tables + >>> f = tables.open_file('tables/tests/Tables_lzo2.h5') + >>> print(f.root.group0) + /group0 (Group) '' + >>> f.close() + + """ + return ( + f"{self._v_pathname} ({self.__class__.__name__}) " + f"{self._v_title!r}" + ) + + def __repr__(self) -> str: + """Return a detailed string representation of the group. + + Examples + -------- + :: + + >>> import tables + >>> f = tables.open_file('tables/tests/Tables_lzo2.h5') + >>> f.root.group0 + /group0 (Group) '' + children := ['group1' (Group), 'tuple1' (Table)] + >>> f.close() + + """ + rep = [ + f"{childname!r} ({child.__class__.__name__})" + for (childname, child) in self._v_children.items() + ] + return f'{self!s}\n children := [{", ".join(rep)}]' + + +# Special definition for group root +class RootGroup(Group): + """Root Group.""" + + def __init__( + self, ptfile: File, name: str, title: str, new: bool, filters: Filters + ) -> None: + mydict = self.__dict__ + + # Set group attributes. + self._v_version = obversion + self._v_new = new + if new: + self._v_new_title = title + self._v_new_filters = filters + else: + self._v_new_title = None + self._v_new_filters = None + + # Set node attributes. + self._v_file = ptfile + self._v_isopen = True # root is always open + self._v_pathname = "/" + self._v_name = "/" + self._v_depth = 0 + self._v_max_group_width = ptfile.params["MAX_GROUP_WIDTH"] + self._v__deleting = False + self._v_objectid: int | None = None # later + + # Only the root node has the file as a parent. + # Bypass __setattr__ to avoid the ``Node._v_parent`` property. + mydict["_v_parent"] = ptfile + ptfile._node_manager.register_node(self, "/") + + # hdf5extension operations (do before setting an AttributeSet): + # Update node attributes. + self._g_new(ptfile, name, init=True) + # Open the node and get its object ID. + self._v_objectid = self._g_open() + + # Set disk attributes and read children names. + # + # This *must* be postponed because this method needs the root node + # to be created and bound to ``File.root``. + # This is an exception to the rule, handled by ``File.__init()__``. + # + # self._g_post_init_hook() + + def _g_load_child( + self, + childname: str, + ) -> ExternalLink | Group | Node | SoftLink | UnImplemented | Unknown: + """Load a child node from disk. + + The child node `childname` is loaded from disk and an adequate + `Node` object is created and returned. If there is no such + child, a `NoSuchNodeError` is raised. + + """ + if self._v_file.root_uep != "/": + childname = join_path(self._v_file.root_uep, childname) + # Is the node a group or a leaf? + node_type = self._g_check_has_child(childname) + + # Nodes that HDF5 report as H5G_UNKNOWN + if node_type == "Unknown": + return Unknown(self, childname) + + # Guess the PyTables class suited to the node, + # build a PyTables node and return it. + if node_type == "Group": + if self._v_file.params["PYTABLES_SYS_ATTRS"]: + child_class = self._g_get_child_group_class(childname) + else: + # Default is a Group class + child_class = Group + return child_class(self, childname, new=False) + elif node_type == "Leaf": + child_class = self._g_get_child_leaf_class(childname, warn=True) + # Building a leaf may still fail because of unsupported types + # and other causes. + # return ChildClass(self, childname) # uncomment for debugging + try: + return child_class(self, childname) + except Exception as exc: # XXX + warnings.warn( + "problems loading leaf ``%s``::\n\n" + " %s\n\n" + "The leaf will become an ``UnImplemented`` node." + % (self._g_join(childname), exc) + ) + # If not, associate an UnImplemented object to it + return UnImplemented(self, childname) + elif node_type == "SoftLink": + return SoftLink(self, childname) + elif node_type == "ExternalLink": + return ExternalLink(self, childname) + else: + return UnImplemented(self, childname) + + def _f_rename(self, newname: str) -> NoReturn: + raise NodeError("the root node can not be renamed") + + def _f_move( + self, + newparent: Group | None = None, + newname: str | None = None, + createparents: bool = False, + ) -> NoReturn: + raise NodeError("the root node can not be moved") + + def _f_remove(self, recursive: bool = False) -> NoReturn: + raise NodeError("the root node can not be removed") + + +class TransactionGroupG(NotLoggedMixin, Group): + """Transaction Group.""" + + _c_classid = "TRANSGROUP" + + def _g_width_warning(self) -> None: + warnings.warn( + f"the number of transactions is exceeding the recommended " + f"maximum ({self._v_max_group_width}); be ready to see PyTables " + f"asking for *lots* of memory and possibly slow I/O", + PerformanceWarning, + ) + + +class TransactionG(NotLoggedMixin, Group): + """Transaction group.""" + + _c_classid = "TRANSG" + + def _g_width_warning(self) -> None: + warnings.warn( + f"transaction ``{self._v_pathname}`` is exceeding the " + f"recommended maximum number of marks " + f"({self._v_max_group_width}); be ready to see PyTables " + f"asking for *lots* of memory and possibly slow I/O", + PerformanceWarning, + ) + + +class MarkG(NotLoggedMixin, Group): + """Mark group.""" + + # Class identifier. + _c_classid = "MARKG" + + import re + + _c_shadow_name_re = re.compile(r"^a[0-9]+$") + + def _g_width_warning(self) -> None: + warnings.warn( + f"mark ``{self._v_pathname}`` is exceeding the recommended " + f"maximum action storage ({self._v_max_group_width} nodes); " + f"be ready to see PyTables asking for *lots* of memory and " + f"possibly slow I/O", + PerformanceWarning, + ) + + def _g_reset(self) -> None: + """Empty action storage (nodes and attributes). + + This method empties all action storage kept in this node: nodes + and attributes. + + """ + # Remove action storage nodes. + for child in list(self._v_children.values()): + child._g_remove(True, True) + + # Remove action storage attributes. + attrs = self._v_attrs + shname = self._c_shadow_name_re + for attrname in attrs._v_attrnamesuser[:]: + if shname.match(attrname): + attrs._g__delattr(attrname) diff --git a/venv/Lib/site-packages/tables/hdf5extension.pxd b/venv/Lib/site-packages/tables/hdf5extension.pxd new file mode 100644 index 0000000..003e197 --- /dev/null +++ b/venv/Lib/site-packages/tables/hdf5extension.pxd @@ -0,0 +1,43 @@ +######################################################################## +# +# License: BSD +# Created: +# Author: Francesc Alted - faltet@pytables.com +# +# $Id$ +# +######################################################################## + +from numpy cimport ndarray + +from .definitions cimport hid_t, hsize_t, hbool_t + + +# Declaration of instance variables for shared classes +cdef class Node: + cdef object name + cdef hid_t parent_id + +cdef class Leaf(Node): + cdef hid_t dataset_id + cdef hid_t type_id + cdef hid_t base_type_id + cdef hid_t disk_type_id + cdef hsize_t *dims # Necessary to be here because of Leaf._g_truncate() + cdef _get_type_ids(self) + cdef _convert_time64(self, ndarray nparr, int sense) + +cdef class Array(Leaf): + cdef int rank + cdef hsize_t *maxdims + cdef hsize_t *dims_chunk + cdef hbool_t blosc2_support_read + cdef hbool_t blosc2_support_write + + +## Local Variables: +## mode: python +## py-indent-offset: 2 +## tab-width: 2 +## fill-column: 78 +## End: diff --git a/venv/Lib/site-packages/tables/hdf5extension.pyd b/venv/Lib/site-packages/tables/hdf5extension.pyd new file mode 100644 index 0000000..4a3ad38 Binary files /dev/null and b/venv/Lib/site-packages/tables/hdf5extension.pyd differ diff --git a/venv/Lib/site-packages/tables/hdf5extension.pyx b/venv/Lib/site-packages/tables/hdf5extension.pyx new file mode 100644 index 0000000..2981b08 --- /dev/null +++ b/venv/Lib/site-packages/tables/hdf5extension.pyx @@ -0,0 +1,2483 @@ +######################################################################## +# +# License: BSD +# Created: September 21, 2002 +# Author: Francesc Alted - faltet@pytables.com +# +# $Id$ +# +######################################################################## + +"""Cython interface between several PyTables classes and HDF5 library. + +Classes (type extensions): + + File + AttributeSet + Node + Leaf + Group + Array + VLArray + UnImplemented + +Functions: + +Misc variables: + +""" + +import os +import sys +import platform +import warnings +from collections import namedtuple + +ObjInfo = namedtuple('ObjInfo', ['addr', 'rc']) +ObjTimestamps = namedtuple('ObjTimestamps', ['atime', 'mtime', + 'ctime', 'btime']) + +import pickle + +import numpy as np + +from .atom import Atom +from .utils import check_file_access, byteorders, correct_byteorder, SizeType +from .exceptions import HDF5ExtError, DataTypeWarning +from .description import descr_from_dtype +from .utilsextension import ( + encode_filename, + set_blosc_max_threads, + set_blosc2_max_threads, + atom_to_hdf5_type, + atom_from_hdf5_type, + hdf5_to_np_ext_type, + create_nested_type, + pttype_to_hdf5, + pt_special_kinds, + npext_prefixes_to_ptkinds, + hdf5_class_to_string, + platform_byteorder, + get_filters, +) + +# Types, constants, functions, classes & other objects from everywhere + +from numpy cimport ( + import_array, + ndarray, + npy_intp, + PyArray_BYTES, + PyArray_DATA, + PyArray_DIMS, + PyArray_NDIM, + PyArray_STRIDE, +) +from libc.stdlib cimport malloc, free +from libc.string cimport strdup, strlen +from cpython.bytes cimport ( + PyBytes_AsString, + PyBytes_FromStringAndSize, + PyBytes_Check, +) +from cpython.unicode cimport PyUnicode_DecodeUTF8 + +from .definitions cimport ( + uintptr_t, + hid_t, + herr_t, + hsize_t, + hvl_t, + uint32_t, + H5S_seloper_t, + H5D_FILL_VALUE_UNDEFINED, + H5O_TYPE_UNKNOWN, + H5O_TYPE_GROUP, + H5O_TYPE_DATASET, + H5O_TYPE_NAMED_DATATYPE, + H5L_TYPE_ERROR, + H5L_TYPE_HARD, + H5L_TYPE_SOFT, + H5L_TYPE_EXTERNAL, + H5T_class_t, + H5T_sign_t, + H5T_NATIVE_INT, + H5T_cset_t, + H5T_CSET_ASCII, + H5T_CSET_UTF8, + H5F_SCOPE_GLOBAL, + H5F_ACC_TRUNC, + H5F_ACC_RDONLY, + H5F_ACC_RDWR, + H5P_DEFAULT, + H5P_FILE_ACCESS, + H5P_FILE_CREATE, + H5T_DIR_DEFAULT, + H5S_SELECT_SET, + H5S_SELECT_AND, + H5S_SELECT_NOTB, + H5Fcreate, + H5Fopen, + H5Fclose, + H5Fflush, + H5Fget_vfd_handle, + H5Fget_filesize, + H5Fget_create_plist, + H5Gcreate, + H5Gopen, + H5Gclose, + H5Ldelete, + H5Lmove, + H5Dopen, + H5Dclose, + H5Dread, + H5Dwrite, + H5Dget_type, + H5Dget_create_plist, + H5Dget_space, + H5Dvlen_reclaim, + H5Dget_storage_size, + H5Dvlen_get_buf_size, + H5Dget_chunk_info_by_coord, + haddr_t, + HADDR_UNDEF, + H5Dread_chunk, + H5Dwrite_chunk, + H5Tget_native_type, + H5Tclose, + H5Tis_variable_str, + H5Tget_sign, + H5Adelete, + H5T_BITFIELD, + H5T_INTEGER, + H5T_FLOAT, + H5T_STRING, + H5Tget_order, + H5Pcreate, + H5Pset_cache, + H5Pclose, + H5Pget_userblock, + H5Pset_userblock, + H5Pset_fapl_sec2, + H5Pset_fapl_log, + H5Pset_fapl_stdio, + H5Pset_fapl_core, + H5Pset_fapl_split, + H5Pget_obj_track_times, + H5Sselect_all, + H5Sselect_elements, + H5Sselect_hyperslab, + H5Screate_simple, + H5Sclose, + H5Oget_info, + H5O_info_t, + H5ATTRset_attribute, + H5ATTRset_attribute_string, + H5ATTRget_attribute, + H5ATTRget_attribute_string, + H5ATTRget_attribute_vlen_string_array, + H5ATTRfind_attribute, + H5ATTRget_type_ndims, + H5ATTRget_dims, + H5ARRAYget_ndims, + H5ARRAYget_info, + set_cache_size, + get_objinfo, + get_linkinfo, + Giterate, + Aiterate, + H5UIget_info, + get_len_of_range, + conv_float64_timeval32, + truncate_dset, + H5_HAVE_DIRECT_DRIVER, + pt_H5Pset_fapl_direct, + H5_HAVE_WINDOWS_DRIVER, + pt_H5Pset_fapl_windows, + H5_HAVE_IMAGE_FILE, + H5Pset_file_image, + H5Fget_file_image, + H5Tget_size, + hobj_ref_t, +) + + +cdef int H5T_CSET_DEFAULT = 16 + +from .utilsextension cimport ( + malloc_dims, + get_native_type, + cstr_to_pystr, + load_reference, +) + +#------------------------------------------------------------------- + +cdef extern from "Python.h": + + object PyByteArray_FromStringAndSize(char *s, Py_ssize_t len) + +cdef extern from "H5ARRAY-opt.h" nogil: + hid_t H5ARRAYOmake( hid_t loc_id, + const char *dset_name, + const char *obversion, + const int rank, + const hsize_t *dims, + int extdim, + hid_t type_id, + hsize_t *dims_chunk, + hsize_t block_size, + void *fill_data, + int compress, + char *complib, + int shuffle, + int fletcher32, + hbool_t track_times, + const void *data); + + + herr_t H5ARRAYOreadSlice(char* filename, + hbool_t blosc2_support, + hid_t dataset_id, + hid_t type_id, + hsize_t *slice_start, + hsize_t *slice_stop, + hsize_t *slice_step, + void *slice_data); + + +# Functions from HDF5 ARRAY (this is not part of HDF5 HL; it's private) +cdef extern from "H5ARRAY.h" nogil: + + herr_t H5ARRAYmake(hid_t loc_id, char *dset_name, char *obversion, + int rank, hsize_t *dims, int extdim, + hid_t type_id, hsize_t *dims_chunk, void *fill_data, + int complevel, char *complib, int shuffle, + int fletcher32, hbool_t track_times, void *data) + + herr_t H5ARRAYappend_records(hid_t dataset_id, hid_t type_id, + int rank, hsize_t *dims_orig, + hsize_t *dims_new, int extdim, void *data ) + + herr_t H5ARRAYwrite_records(hid_t dataset_id, hid_t type_id, + int rank, hsize_t *start, hsize_t *step, + hsize_t *count, void *data) + + herr_t H5ARRAYread(hid_t dataset_id, hid_t type_id, + hsize_t start, hsize_t nrows, hsize_t step, + int extdim, void *data) + + herr_t H5ARRAYreadSlice(hid_t dataset_id, hid_t type_id, + hsize_t *start, hsize_t *stop, + hsize_t *step, void *data) + + herr_t H5ARRAYreadIndex(hid_t dataset_id, hid_t type_id, int notequal, + hsize_t *start, hsize_t *stop, hsize_t *step, + void *data) + + herr_t H5ARRAYget_chunkshape(hid_t dataset_id, int rank, hsize_t *dims_chunk) + + herr_t H5ARRAYget_fill_value( hid_t dataset_id, hid_t type_id, + int *status, void *value) + + +# Functions for dealing with VLArray objects +cdef extern from "H5VLARRAY.h" nogil: + + herr_t H5VLARRAYmake( hid_t loc_id, char *dset_name, char *obversion, + int rank, hsize_t *dims, hid_t type_id, + hsize_t chunk_size, void *fill_data, int complevel, + char *complib, int shuffle, int fletcher32, + hbool_t track_times, void *data) + + herr_t H5VLARRAYappend_records( hid_t dataset_id, hid_t type_id, + int nobjects, hsize_t nrecords, + void *data ) + + herr_t H5VLARRAYmodify_records( hid_t dataset_id, hid_t type_id, + hsize_t nrow, int nobjects, + void *data ) + + herr_t H5VLARRAYget_info( hid_t dataset_id, hid_t type_id, + hsize_t *nrecords, char *base_byteorder) + + +#---------------------------------------------------------------------------- + +# Initialization code + +# The numpy API requires this function to be called before +# using any numpy facilities in an extension module. +import_array() + +#--------------------------------------------------------------------------- + +# Helper functions + +cdef hsize_t *npy_malloc_dims(int rank, npy_intp *pdims): + """Returns a malloced hsize_t dims from a npy_intp *pdims.""" + + cdef int i + cdef hsize_t *dims + + dims = NULL + if rank > 0: + dims = malloc(rank * sizeof(hsize_t)) + for i from 0 <= i < rank: + dims[i] = pdims[i] + return dims + + +cdef object getshape(int rank, hsize_t *dims): + """Return a shape (tuple) from a dims C array of rank dimensions.""" + + cdef int i + cdef object shape + + shape = [] + for i from 0 <= i < rank: + shape.append(SizeType(dims[i])) + + return tuple(shape) + + +# Helper function for quickly fetch an attribute string +cdef object get_attribute_string_or_none(hid_t node_id, char* attr_name): + """Returns a string/unicode attribute if it exists in node_id. + + It returns ``None`` in case it don't exists (or there have been problems + reading it). + + """ + + cdef char *attr_value + cdef int cset = H5T_CSET_DEFAULT + cdef object retvalue + cdef hsize_t size + + attr_value = NULL + retvalue = None # Default value + if H5ATTRfind_attribute(node_id, attr_name): + size = H5ATTRget_attribute_string(node_id, attr_name, &attr_value, &cset) + if size == 0: + if cset == H5T_CSET_UTF8: + retvalue = np.str_('') + else: + retvalue = np.bytes_(b'') + elif cset == H5T_CSET_UTF8: + retvalue = PyUnicode_DecodeUTF8(attr_value, size, NULL) + retvalue = np.str_(retvalue) + else: + retvalue = PyBytes_FromStringAndSize(attr_value, size) + # AV: oct 2012 + # since now we use the string size got form HDF5 we have to strip + # trailing zeros used for padding. + # The entire process is quite odd but due to a bug (??) in the way + # numpy arrays are pickled in python 3 we can't assume that + # strlen(attr_value) is the actual length of the attribute + # and np.bytes_(attr_value) can give a truncated pickle string + retvalue = retvalue.rstrip(b'\x00') + retvalue = np.bytes_(retvalue) + + # Important to release attr_value, because it has been malloc'ed! + if attr_value: + free(attr_value) + + return retvalue + + +# Get the numpy dtype scalar attribute from an HDF5 type as fast as possible +cdef object get_dtype_scalar(hid_t type_id, H5T_class_t class_id, + size_t itemsize): + cdef H5T_sign_t sign + cdef object stype + + if class_id == H5T_BITFIELD: + stype = "b1" + elif class_id == H5T_INTEGER: + # Get the sign + sign = H5Tget_sign(type_id) + if (sign > 0): + stype = "i%s" % (itemsize) + else: + stype = "u%s" % (itemsize) + elif class_id == H5T_FLOAT: + stype = "f%s" % (itemsize) + elif class_id == H5T_STRING: + if H5Tis_variable_str(type_id): + raise TypeError("variable length strings are not supported yet") + stype = "S%s" % (itemsize) + + # Try to get a NumPy type. If this can't be done, return None. + try: + ntype = np.dtype(stype) + except TypeError: + ntype = None + return ntype + + +_supported_drivers = ( + "H5FD_SEC2", + "H5FD_DIRECT", + #"H5FD_LOG", + "H5FD_WINDOWS", + "H5FD_STDIO", + "H5FD_CORE", + #"H5FD_FAMILY", + #"H5FD_MULTI", + "H5FD_SPLIT", + #"H5FD_MPIO", + #"H5FD_MPIPOSIX", + #"H5FD_STREAM", +) + +HAVE_DIRECT_DRIVER = bool(H5_HAVE_DIRECT_DRIVER) +HAVE_WINDOWS_DRIVER = bool(H5_HAVE_WINDOWS_DRIVER) + +# Type extensions declarations (these are subclassed by PyTables +# Python classes) + +cdef class File: + cdef hid_t file_id + cdef hid_t access_plist + cdef object name + + def _g_new(self, name, pymode, **params): + cdef herr_t err = 0 + cdef hid_t access_plist, create_plist = H5P_DEFAULT + cdef hid_t meta_plist_id = H5P_DEFAULT, raw_plist_id = H5P_DEFAULT + cdef size_t img_buf_len = 0, user_block_size = 0 + cdef void *img_buf_p = NULL + cdef bytes encname + #cdef bytes logfile_name + + # Check if we can handle the driver + driver = params["DRIVER"] + if driver is not None and driver not in _supported_drivers: + raise ValueError("Invalid or not supported driver: '%s'" % driver) + if driver == "H5FD_SPLIT": + meta_ext = params.get("DRIVER_SPLIT_META_EXT", "-m.h5") + raw_ext = params.get("DRIVER_SPLIT_RAW_EXT", "-r.h5") + meta_name = meta_ext % name if "%s" in meta_ext else name + meta_ext + raw_name = raw_ext % name if "%s" in raw_ext else name + raw_ext + enc_meta_ext = encode_filename(meta_ext) + enc_raw_ext = encode_filename(raw_ext) + + # Create a new file using default properties + self.name = name + + # Encode the filename in case it is unicode + encname = encode_filename(name) + + # These fields can be seen from Python. + self._v_new = None # this will be computed later + # """Is this file going to be created from scratch?""" + + self._isPTFile = True # assume a PyTables file by default + # """Does this HDF5 file have a PyTables format?""" + + assert pymode in ('r', 'r+', 'a', 'w'), ("an invalid mode string ``%s`` " + "passed the ``check_file_access()`` test; " + "please report this to the authors" % pymode) + + image = params.get('DRIVER_CORE_IMAGE') + if image: + if driver != "H5FD_CORE": + warnings.warn("The DRIVER_CORE_IMAGE parameter will be ignored by " + "the '%s' driver" % driver) + elif not PyBytes_Check(image): + raise TypeError("The DRIVER_CORE_IMAGE must be a string of bytes") + + # After the following check we can be quite sure + # that the file or directory exists and permissions are right. + if driver == "H5FD_SPLIT": + for n in meta_name, raw_name: + check_file_access(n, pymode) + else: + backing_store = params.get("DRIVER_CORE_BACKING_STORE", 1) + if driver != "H5FD_CORE" or backing_store: + check_file_access(name, pymode) + + # Should a new file be created? + if image: + exists = True + elif driver == "H5FD_SPLIT": + exists = os.path.exists(meta_name) and os.path.exists(raw_name) + else: + exists = os.path.exists(name) + self._v_new = not (pymode in ('r', 'r+') or (pymode == 'a' and exists)) + + user_block_size = params.get("USER_BLOCK_SIZE", 0) + if user_block_size and not self._v_new: + warnings.warn("The HDF5 file already esists: the USER_BLOCK_SIZE " + "will be ignored") + elif user_block_size: + user_block_size = int(user_block_size) + is_pow_of_2 = ((user_block_size & (user_block_size - 1)) == 0) + if user_block_size < 512 or not is_pow_of_2: + raise ValueError("The USER_BLOCK_SIZE must be a power od 2 greather " + "than 512 or zero") + + # File creation property list + create_plist = H5Pcreate(H5P_FILE_CREATE) + err = H5Pset_userblock(create_plist, user_block_size) + if err < 0: + H5Pclose(create_plist) + raise HDF5ExtError("Unable to set the user block size") + + # File access property list + access_plist = H5Pcreate(H5P_FILE_ACCESS) + + # Set parameters for chunk cache + H5Pset_cache(access_plist, 0, + params["CHUNK_CACHE_NELMTS"], + params["CHUNK_CACHE_SIZE"], + params["CHUNK_CACHE_PREEMPT"]) + + # Set the I/O driver + if driver == "H5FD_SEC2": + err = H5Pset_fapl_sec2(access_plist) + elif driver == "H5FD_DIRECT": + if not H5_HAVE_DIRECT_DRIVER: + H5Pclose(create_plist) + H5Pclose(access_plist) + raise RuntimeError("The H5FD_DIRECT driver is not available") + err = pt_H5Pset_fapl_direct(access_plist, + params["DRIVER_DIRECT_ALIGNMENT"], + params["DRIVER_DIRECT_BLOCK_SIZE"], + params["DRIVER_DIRECT_CBUF_SIZE"]) + #elif driver == "H5FD_LOG": + # if "DRIVER_LOG_FILE" not in params: + # H5Pclose(access_plist) + # raise ValueError("The DRIVER_LOG_FILE parameter is required for " + # "the H5FD_LOG driver") + # logfile_name = encode_filename(params["DRIVER_LOG_FILE"]) + # err = H5Pset_fapl_log(access_plist, + # logfile_name, + # params["DRIVER_LOG_FLAGS"], + # params["DRIVER_LOG_BUF_SIZE"]) + elif driver == "H5FD_WINDOWS": + if not H5_HAVE_WINDOWS_DRIVER: + H5Pclose(access_plist) + H5Pclose(create_plist) + raise RuntimeError("The H5FD_WINDOWS driver is not available") + err = pt_H5Pset_fapl_windows(access_plist) + elif driver == "H5FD_STDIO": + err = H5Pset_fapl_stdio(access_plist) + elif driver == "H5FD_CORE": + err = H5Pset_fapl_core(access_plist, + params["DRIVER_CORE_INCREMENT"], + backing_store) + if image: + img_buf_len = len(image) + img_buf_p = PyBytes_AsString(image) + err = H5Pset_file_image(access_plist, img_buf_p, img_buf_len) + if err < 0: + H5Pclose(create_plist) + H5Pclose(access_plist) + raise HDF5ExtError("Unable to set the file image") + + #elif driver == "H5FD_FAMILY": + # H5Pset_fapl_family(access_plist, + # params["DRIVER_FAMILY_MEMB_SIZE"], + # fapl_id) + #elif driver == "H5FD_MULTI": + # err = H5Pset_fapl_multi(access_plist, memb_map, memb_fapl, memb_name, + # memb_addr, relax) + elif driver == "H5FD_SPLIT": + err = H5Pset_fapl_split(access_plist, enc_meta_ext, meta_plist_id, + enc_raw_ext, raw_plist_id) + if err < 0: + e = HDF5ExtError("Unable to set the file access property list") + H5Pclose(create_plist) + H5Pclose(access_plist) + raise e + + if pymode == 'r': + self.file_id = H5Fopen(encname, H5F_ACC_RDONLY, access_plist) + elif pymode == 'r+': + self.file_id = H5Fopen(encname, H5F_ACC_RDWR, access_plist) + elif pymode == 'a': + if exists: + # A test for logging. + ## H5Pset_sieve_buf_size(access_plist, 0) + ## H5Pset_fapl_log (access_plist, "test.log", H5FD_LOG_LOC_WRITE, 0) + self.file_id = H5Fopen(encname, H5F_ACC_RDWR, access_plist) + else: + self.file_id = H5Fcreate(encname, H5F_ACC_TRUNC, create_plist, + access_plist) + elif pymode == 'w': + self.file_id = H5Fcreate(encname, H5F_ACC_TRUNC, create_plist, + access_plist) + + if self.file_id < 0: + e = HDF5ExtError("Unable to open/create file '%s'" % name) + H5Pclose(create_plist) + H5Pclose(access_plist) + raise e + + H5Pclose(create_plist) + H5Pclose(access_plist) + + # Set the cache size + set_cache_size(self.file_id, params["METADATA_CACHE_SIZE"]) + + # Set the maximum number of threads for Blosc + set_blosc_max_threads(params["MAX_BLOSC_THREADS"]) + set_blosc2_max_threads(params["MAX_BLOSC_THREADS"]) + + # XXX: add the possibility to pass a pre-allocated buffer + def get_file_image(self): + """Retrieves an in-memory image of an existing, open HDF5 file. + + .. versionadded:: 3.0 + + """ + + cdef ssize_t size = 0 + cdef size_t buf_len = 0 + cdef bytes image + cdef char* cimage + + self.flush() + + # retrieve the size of the buffer for the file image + size = H5Fget_file_image(self.file_id, NULL, buf_len) + if size < 0: + raise HDF5ExtError("Unable to retrieve the size of the buffer for the " + "file image. Plese note that not all drivers " + "provide support for image files.") + + # allocate the memory buffer + image = PyBytes_FromStringAndSize(NULL, size) + if not image: + raise RuntimeError("Unable to allecote meomory fir the file image") + + cimage = image + buf_len = size + size = H5Fget_file_image(self.file_id, cimage, buf_len) + if size < 0: + raise HDF5ExtError("Unable to retrieve the file image. " + "Plese note that not all drivers provide support " + "for image files.") + + return image + + def get_filesize(self): + """Returns the size of an HDF5 file. + + The returned size is that of the entire file, as opposed to only + the HDF5 portion of the file. I.e., size includes the user block, + if any, the HDF5 portion of the file, and any data that may have + been appended beyond the data written through the HDF5 Library. + + .. versionadded:: 3.0 + + """ + + cdef herr_t err = 0 + cdef hsize_t size = 0 + + err = H5Fget_filesize(self.file_id, &size) + if err < 0: + raise HDF5ExtError("Unable to retrieve the HDF5 file size") + + return size + + def get_userblock_size(self): + """Retrieves the size of a user block. + + .. versionadded:: 3.0 + + """ + + cdef herr_t err = 0 + cdef hsize_t size = 0 + cdef hid_t create_plist + + create_plist = H5Fget_create_plist(self.file_id) + if create_plist < 0: + raise HDF5ExtError("Unable to get the creation property list") + + err = H5Pget_userblock(create_plist, &size) + if err < 0: + H5Pclose(create_plist) + raise HDF5ExtError("unable to retrieve the user block size") + + H5Pclose(create_plist) + + return size + + # Accessor definitions + def _get_file_id(self): + return self.file_id + + def fileno(self): + """Return the underlying OS integer file descriptor. + + This is needed for lower-level file interfaces, such as the ``fcntl`` + module. + + """ + + cdef void *file_handle + cdef uintptr_t *descriptor + cdef herr_t err + err = H5Fget_vfd_handle(self.file_id, H5P_DEFAULT, &file_handle) + if err < 0: + raise HDF5ExtError( + "Problems getting file descriptor for file ``%s``" % self.name) + # Convert the 'void *file_handle' into an 'int *descriptor' + descriptor = file_handle + return descriptor[0] + + + def _flush_file(self, scope): + # Close the file + H5Fflush(self.file_id, scope) + + + def _close_file(self): + # Close the file + H5Fclose( self.file_id ) + self.file_id = 0 # Means file closed + + + # This method is moved out of scope, until we provide code to delete + # the memory booked by this extension types + def __dealloc__(self): + cdef int ret + if self.file_id > 0: + # Close the HDF5 file because user didn't do that! + ret = H5Fclose(self.file_id) + if ret < 0: + raise HDF5ExtError("Problems closing the file '%s'" % self.name) + + +cdef class AttributeSet: + cdef object name + + def _g_new(self, node): + self.name = node._v_name + + def _g_list_attr(self, node): + """Return a tuple with the attribute list""" + a = Aiterate(node._v_objectid) + return a + + + def _g_setattr(self, node, name, object value): + """Save Python or NumPy objects as HDF5 attributes. + + Scalar Python objects, scalar NumPy & 0-dim NumPy objects will all be + saved as H5T_SCALAR type. N-dim NumPy objects will be saved as H5T_ARRAY + type. + + """ + + cdef int ret + cdef hid_t dset_id, type_id + cdef hsize_t *dims + cdef ndarray ndv + cdef object byteorder, rabyteorder, baseatom + cdef char* cname = NULL + cdef bytes encoded_name + cdef int cset = H5T_CSET_DEFAULT + + encoded_name = name.encode('utf-8') + # get the C pointer + cname = encoded_name + + # The dataset id of the node + dset_id = node._v_objectid + + # Convert a NumPy scalar into a NumPy 0-dim ndarray + if isinstance(value, np.generic): + value = np.array(value) + + # Check if value is a NumPy ndarray and of a supported type + if (isinstance(value, np.ndarray) and + value.dtype.kind in ('V', 'S', 'b', 'i', 'u', 'f', 'c')): + # get a contiguous array: fixes #270 and gh-176 + #value = np.ascontiguousarray(value) + value = value.copy() + if value.dtype.kind == 'V': + description, rabyteorder = descr_from_dtype(value.dtype, ptparams=node._v_file.params) + byteorder = byteorders[rabyteorder] + type_id = create_nested_type(description, byteorder) + # Make sure the value is consistent with offsets of the description + value = value.astype(description._v_dtype) + else: + # Get the associated native HDF5 type of the scalar type + baseatom = Atom.from_dtype(value.dtype.base) + byteorder = byteorders[value.dtype.byteorder] + type_id = atom_to_hdf5_type(baseatom, byteorder) + # Get dimensionality info + ndv = value + dims = npy_malloc_dims(PyArray_NDIM(ndv), PyArray_DIMS(ndv)) + # Actually write the attribute + ret = H5ATTRset_attribute(dset_id, cname, type_id, + PyArray_NDIM(ndv), dims, PyArray_BYTES(ndv)) + if ret < 0: + raise HDF5ExtError("Can't set attribute '%s' in node:\n %s." % + (name, self._v_node)) + # Release resources + free(dims) + H5Tclose(type_id) + else: + # Object cannot be natively represented in HDF5. + if (isinstance(value, np.ndarray) and + value.dtype.kind == 'U' and + value.shape == ()): + value = value[()].encode('utf-8') + cset = H5T_CSET_UTF8 + else: + # Convert this object to a null-terminated string + # (binary pickles are not supported at this moment) + value = pickle.dumps(value, 0) + + ret = H5ATTRset_attribute_string(dset_id, cname, value, len(value), cset) + if ret < 0: + raise HDF5ExtError("Can't set attribute '%s' in node:\n %s." % + (name, self._v_node)) + + + # Get attributes + def _g_getattr(self, node, attrname): + """Get HDF5 attributes and retrieve them as NumPy objects. + + H5T_SCALAR types will be retrieved as scalar NumPy. + H5T_ARRAY types will be retrieved as ndarray NumPy objects. + + """ + + cdef hsize_t *dims + cdef H5T_class_t class_id + cdef size_t type_size + cdef hid_t mem_type, dset_id, type_id, native_type + cdef int rank, ret, enumtype + cdef void *rbuf + cdef char *str_value + cdef char **str_values = NULL + cdef ndarray ndvalue + cdef object shape, stype_atom, shape_atom, retvalue + cdef int i, nelements + cdef char* cattrname = NULL + cdef bytes encoded_attrname + cdef int cset = H5T_CSET_DEFAULT + + encoded_attrname = attrname.encode('utf-8') + # Get the C pointer + cattrname = encoded_attrname + + # The dataset id of the node + dset_id = node._v_objectid + dims = NULL + + ret = H5ATTRget_type_ndims(dset_id, cattrname, &type_id, &class_id, + &type_size, &rank ) + if ret < 0: + raise HDF5ExtError("Can't get type info on attribute %s in node %s." % + (attrname, self.name)) + + # Call a fast function for scalar values and typical class types + if (rank == 0 and class_id == H5T_STRING): + type_size = H5ATTRget_attribute_string(dset_id, cattrname, &str_value, + &cset) + if type_size == 0: + if cset == H5T_CSET_UTF8: + retvalue = np.str_('') + else: + retvalue = np.bytes_(b'') + + elif cset == H5T_CSET_UTF8: + retvalue = PyUnicode_DecodeUTF8(str_value, type_size, NULL) + retvalue = np.str_(retvalue) + else: + retvalue = PyBytes_FromStringAndSize(str_value, type_size) + # AV: oct 2012 + # since now we use the string size got form HDF5 we have to strip + # trailing zeros used for padding. + # The entire process is quite odd but due to a bug (??) in the way + # numpy arrays are pickled in python 3 we can't assume that + # strlen(attr_value) is the actual length of the attibute + # and np.bytes_(attr_value) can give a truncated pickle sting + retvalue = retvalue.rstrip(b'\x00') + retvalue = np.bytes_(retvalue) # bytes + # Important to release attr_value, because it has been malloc'ed! + if str_value: + free(str_value) + H5Tclose(type_id) + return retvalue + elif (rank == 0 and class_id in (H5T_BITFIELD, H5T_INTEGER, H5T_FLOAT)): + dtype_ = get_dtype_scalar(type_id, class_id, type_size) + if dtype_ is None: + warnings.warn("Unsupported type for attribute '%s' in node '%s'. " + "Offending HDF5 class: %d" % (attrname, self.name, + class_id), DataTypeWarning) + self._v_unimplemented.append(attrname) + return None + shape = () + else: + # General case + + # Get the dimensional info + dims = malloc(rank * sizeof(hsize_t)) + ret = H5ATTRget_dims(dset_id, cattrname, dims) + if ret < 0: + raise HDF5ExtError("Can't get dims info on attribute %s in node %s." % + (attrname, self.name)) + shape = getshape(rank, dims) + # dims is not needed anymore + free( dims) + + # Get the NumPy dtype from the type_id + try: + stype_, shape_ = hdf5_to_np_ext_type(type_id, pure_numpy_types=True, ptparams=node._v_file.params) + dtype_ = np.dtype((stype_, shape_)) + except TypeError: + if class_id == H5T_STRING and H5Tis_variable_str(type_id): + nelements = H5ATTRget_attribute_vlen_string_array(dset_id, cattrname, + &str_values, &cset) + if nelements < 0: + raise HDF5ExtError("Can't read attribute %s in node %s." % + (attrname, self.name)) + + # The following generator expressions do not work with Cython 0.15.1 + if cset == H5T_CSET_UTF8: + #retvalue = np.fromiter( + # PyUnicode_DecodeUTF8(str_values[i], + # strlen(str_values[i]), + # NULL) + # for i in range(nelements), "O8") + retvalue = np.array([ + PyUnicode_DecodeUTF8(str_values[i], + strlen(str_values[i]), + NULL) + for i in range(nelements)], "O8") + + else: + #retvalue = np.fromiter( + # str_values[i] for i in range(nelements), "O8") + retvalue = np.array( + [str_values[i] for i in range(nelements)], "O8") + retvalue.shape = shape + + # Important to release attr_value, because it has been malloc'ed! + for i in range(nelements): + free(str_values[i]) + free(str_values) + + return retvalue + + # This class is not supported. Instead of raising a TypeError, issue a + # warning explaining the problem. This will allow to continue browsing + # native HDF5 files, while informing the user about the problem. + warnings.warn("Unsupported type for attribute '%s' in node '%s'. " + "Offending HDF5 class: %d" % (attrname, self.name, + class_id), DataTypeWarning) + self._v_unimplemented.append(attrname) + return None + + # Get the container for data + ndvalue = np.empty(dtype=dtype_, shape=shape) + # Get the pointer to the buffer data area + rbuf = PyArray_DATA(ndvalue) + # Actually read the attribute from disk + ret = H5ATTRget_attribute(dset_id, cattrname, type_id, rbuf) + if ret < 0: + raise HDF5ExtError("Attribute %s exists in node %s, but can't get it." % + (attrname, self.name)) + H5Tclose(type_id) + + if rank > 0: # multidimensional case + retvalue = ndvalue + else: + retvalue = ndvalue[()] # 0-dim ndarray becomes a NumPy scalar + + return retvalue + + + def _g_remove(self, node, attrname): + cdef int ret + cdef hid_t dset_id + cdef char *cattrname = NULL + cdef bytes encoded_attrname + + encoded_attrname = attrname.encode('utf-8') + # Get the C pointer + cattrname = encoded_attrname + + # The dataset id of the node + dset_id = node._v_objectid + + ret = H5Adelete(dset_id, cattrname) + if ret < 0: + raise HDF5ExtError("Attribute '%s' exists in node '%s', but cannot be " + "deleted." % (attrname, self.name)) + + +cdef class Node: + # Instance variables declared in .pxd + + def _g_new(self, where, name, init): + self.name = name + # """The name of this node in its parent group.""" + self.parent_id = where._v_objectid + # """The identifier of the parent group.""" + + def _g_delete(self, parent): + cdef int ret + cdef bytes encoded_name + + encoded_name = self.name.encode('utf-8') + + # Delete this node + ret = H5Ldelete(parent._v_objectid, encoded_name, H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("problems deleting the node ``%s``" % self.name) + return ret + + def __dealloc__(self): + self.parent_id = 0 + + def _get_obj_info(self): + cdef herr_t ret = 0 + cdef H5O_info_t oinfo + + ret = H5Oget_info(self._v_objectid, &oinfo) + if ret < 0: + raise HDF5ExtError("Unable to get object info for '%s'" % + self. _v_pathname) + + return ObjInfo(oinfo.addr, oinfo.rc) + + def _get_obj_timestamps(self): + cdef herr_t ret = 0 + cdef H5O_info_t oinfo + + ret = H5Oget_info(self._v_objectid, &oinfo) + if ret < 0: + raise HDF5ExtError("Unable to get object info for '%s'" % + self. _v_pathname) + + return ObjTimestamps(oinfo.atime, oinfo.mtime, oinfo.ctime, + oinfo.btime) + + +cdef class Group(Node): + cdef hid_t group_id + + def _g_create(self): + cdef hid_t ret + cdef bytes encoded_name + + encoded_name = self.name.encode('utf-8') + + # @TODO: set property list --> utf-8 + + # Create a new group + ret = H5Gcreate(self.parent_id, encoded_name, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("Can't create the group %s." % self.name) + self.group_id = ret + return self.group_id + + def _g_open(self): + cdef hid_t ret + cdef bytes encoded_name + + encoded_name = self.name.encode('utf-8') + + ret = H5Gopen(self.parent_id, encoded_name, H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("Can't open the group: '%s'." % self.name) + self.group_id = ret + return self.group_id + + def _g_get_objinfo(self, object h5name): + """Check whether 'name' is a children of 'self' and return its type.""" + + cdef int ret + cdef object node_type + cdef bytes encoded_name + cdef char *cname + + encoded_name = h5name.encode('utf-8') + # Get the C pointer + cname = encoded_name + + ret = get_linkinfo(self.group_id, cname) + if ret == -2 or ret == H5L_TYPE_ERROR: + node_type = "NoSuchNode" + elif ret == H5L_TYPE_SOFT: + node_type = "SoftLink" + elif ret == H5L_TYPE_EXTERNAL: + node_type = "ExternalLink" + elif ret == H5L_TYPE_HARD: + ret = get_objinfo(self.group_id, cname) + if ret == -2: + node_type = "NoSuchNode" + elif ret == H5O_TYPE_UNKNOWN: + node_type = "Unknown" + elif ret == H5O_TYPE_GROUP: + node_type = "Group" + elif ret == H5O_TYPE_DATASET: + node_type = "Leaf" + elif ret == H5O_TYPE_NAMED_DATATYPE: + node_type = "NamedType" # Not supported yet + #else H5O_TYPE_LINK: + # # symbolic link + # raise RuntimeError('unexpected object type') + else: + node_type = "Unknown" + return node_type + + def _g_list_group(self, parent): + """Return a tuple with the groups and the leaves hanging from self.""" + + cdef bytes encoded_name + + encoded_name = self.name.encode('utf-8') + + return Giterate(parent._v_objectid, self._v_objectid, encoded_name) + + + def _g_get_gchild_attr(self, group_name, attr_name): + """Return an attribute of a child `Group`. + + If the attribute does not exist, ``None`` is returned. + + """ + + cdef hid_t gchild_id + cdef object retvalue + cdef bytes encoded_group_name + cdef bytes encoded_attr_name + + encoded_group_name = group_name.encode('utf-8') + encoded_attr_name = attr_name.encode('utf-8') + + # Open the group + retvalue = None # Default value + gchild_id = H5Gopen(self.group_id, encoded_group_name, H5P_DEFAULT) + if gchild_id < 0: + raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" % + (group_name, self._v_pathname)) + retvalue = get_attribute_string_or_none(gchild_id, encoded_attr_name) + # Close child group + H5Gclose(gchild_id) + + return retvalue + + + def _g_get_lchild_attr(self, leaf_name, attr_name): + """Return an attribute of a child `Leaf`. + + If the attribute does not exist, ``None`` is returned. + + """ + + cdef hid_t leaf_id + cdef object retvalue + cdef bytes encoded_leaf_name + cdef bytes encoded_attr_name + + encoded_leaf_name = leaf_name.encode('utf-8') + encoded_attr_name = attr_name.encode('utf-8') + + # Open the dataset + leaf_id = H5Dopen(self.group_id, encoded_leaf_name, H5P_DEFAULT) + if leaf_id < 0: + raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" % + (leaf_name, self._v_pathname)) + retvalue = get_attribute_string_or_none(leaf_id, encoded_attr_name) + # Close the dataset + H5Dclose(leaf_id) + return retvalue + + + def _g_flush_group(self): + # Close the group + H5Fflush(self.group_id, H5F_SCOPE_GLOBAL) + + + def _g_close_group(self): + cdef int ret + + ret = H5Gclose(self.group_id) + if ret < 0: + raise HDF5ExtError("Problems closing the Group %s" % self.name) + self.group_id = 0 # indicate that this group is closed + + + def _g_move_node(self, hid_t oldparent, oldname, hid_t newparent, newname, + oldpathname, newpathname): + cdef int ret + cdef bytes encoded_oldname, encoded_newname + + encoded_oldname = oldname.encode('utf-8') + encoded_newname = newname.encode('utf-8') + + ret = H5Lmove(oldparent, encoded_oldname, newparent, encoded_newname, + H5P_DEFAULT, H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("Problems moving the node %s to %s" % + (oldpathname, newpathname) ) + return ret + + + +cdef class Leaf(Node): + # Instance variables declared in .pxd + + def _get_storage_size(self): + return H5Dget_storage_size(self.dataset_id) + + def _get_obj_track_times(self): + """Get track_times boolean for dataset + + Uses H5Pget_obj_track_times to determine if the dataset was + created with the track_times property. If the leaf is not a + dataset, this will fail with HDF5ExtError. + + The track times dataset creation property does not seem to survive + closing and reopening as of HDF5 1.8.17. Currently, it may be + more accurate to test whether the ctime for the dataset is 0: + track_times = (leaf._get_obj_timestamps().ctime == 0) + """ + cdef: + hbool_t track_times = True + + if self.dataset_id < 0: + raise ValueError('Invalid dataset id %s' % self.dataset_id) + + plist_id = H5Dget_create_plist(self.dataset_id) + if plist_id < 0: + raise HDF5ExtError("Could not get dataset creation property list " + "from dataset id %s" % self.dataset_id) + + try: + # Get track_times boolean for dataset + if H5Pget_obj_track_times(plist_id, &track_times) < 0: + raise HDF5ExtError("Could not get dataset track_times property " + "from dataset id %s" % self.dataset_id) + finally: + H5Pclose(plist_id) + + return bool(track_times) + + def _g_new(self, where, name, init): + if init: + # Put this info to 0 just when the class is initialized + self.dataset_id = -1 + self.type_id = -1 + self.base_type_id = -1 + self.disk_type_id = -1 + super()._g_new(where, name, init) + + def _g_chunk_info(self, ndarray coords): + """Get storage information about chunk at `coords`. + + Return ``(filter_mask, offset, size)``, where items are ``None`` if the + chunk is missing. + + """ + cdef herr_t ret + cdef hsize_t *offset + cdef unsigned filter_mask + cdef haddr_t addr + cdef hsize_t size + + # Get the pointer to the buffer data area of the coords array + with nogil: + offset = PyArray_DATA(coords) + ret = H5Dget_chunk_info_by_coord(self.dataset_id, offset, + &filter_mask, &addr, &size) + if ret < 0: + raise HDF5ExtError("Problems getting chunk info for ``%s``" + % self._v_pathname) + return ((filter_mask, addr, size) if addr != HADDR_UNDEF + else (None, None, None)) + + def _g_read_chunk(self, ndarray coords, ndarray out): + """Read the raw chunk at `coords` (into `out`). + + Return a new array of bytes if `out` is ``None``, `out` itself otherwise. + Return ``None`` if the chunk is missing. + + """ + cdef ndarray rarr + cdef herr_t ret + cdef hsize_t *offset + cdef uint32_t filters = 0 + cdef void *rbuf + + _, addr, size = self._g_chunk_info(coords) + if addr is None: + return None # missing chunk + if out is not None and len(out) < size: + raise ValueError(f"Output buffer is too short: {len(out)} < {size}") + + rarr = np.empty((size,), dtype='u1') if out is None else out + with nogil: + rbuf = PyArray_DATA(rarr) + offset = PyArray_DATA(coords) + ret = H5Dread_chunk(self.dataset_id, H5P_DEFAULT, offset, + &filters, rbuf) + if ret < 0: + raise HDF5ExtError("Problems reading chunk from ``%s``" + % self._v_pathname) + return rarr + + def _g_write_chunk(self, ndarray coords, ndarray data, uint32_t filters): + """Write the raw `data` to the chunk in `coords`. + + The `filters` mask indicates which filters of the pipeline have not been + used to create the `data`. + + """ + cdef herr_t ret + cdef hsize_t *offset + cdef size_t data_size + cdef void *wbuf + + data_size = data.size + with nogil: + wbuf = PyArray_DATA(data) + offset = PyArray_DATA(coords) + ret = H5Dwrite_chunk(self.dataset_id, H5P_DEFAULT, filters, + offset, data_size, wbuf) + if ret < 0: + raise HDF5ExtError("Problems writing chunk to ``%s``" + % self._v_pathname) + + cdef _get_type_ids(self): + """Get the disk and native HDF5 types associated with this leaf. + + It is guaranteed that both disk and native types are not the same + descriptor (so that it is safe to close them separately). + + """ + + cdef hid_t disk_type_id, native_type_id + + disk_type_id = H5Dget_type(self.dataset_id) + native_type_id = get_native_type(disk_type_id) + return disk_type_id, native_type_id + + cdef _convert_time64(self, ndarray nparr, int sense): + """Converts a NumPy of Time64 elements between NumPy and HDF5 formats. + + NumPy to HDF5 conversion is performed when 'sense' is 0. Otherwise, HDF5 + to NumPy conversion is performed. The conversion is done in place, + i.e. 'nparr' is modified. + + """ + + cdef void *t64buf + cdef long byteoffset, bytestride, nelements + cdef hsize_t nrecords + + byteoffset = 0 # NumPy objects doesn't have an offset + if (nparr).shape == (): + # 0-dim array does contain *one* element + nrecords = 1 + bytestride = 8 + else: + nrecords = len(nparr) + bytestride = PyArray_STRIDE(nparr, 0) # supports multi-dimensional recarray + nelements = nparr.size // nrecords + t64buf = PyArray_DATA(nparr) + + conv_float64_timeval32( + t64buf, byteoffset, bytestride, nrecords, nelements, sense) + + # can't do since cdef'd + + def _g_truncate(self, hsize_t size): + """Truncate a Leaf to `size` nrows.""" + + cdef hsize_t ret + + ret = truncate_dset(self.dataset_id, self.maindim, size) + if ret < 0: + raise HDF5ExtError("Problems truncating the leaf: %s" % self) + + classname = self.__class__.__name__ + if classname in ('EArray', 'CArray'): + # Update the new dimensionality + self.dims[self.maindim] = size + # Update the shape + shape = list(self.shape) + shape[self.maindim] = SizeType(size) + self.shape = tuple(shape) + elif classname in ('Table', 'VLArray'): + self.nrows = size + else: + raise ValueError("Unexpected classname: %s" % classname) + + def _g_flush(self): + # Flush the dataset (in fact, the entire buffers in file!) + if self.dataset_id >= 0: + H5Fflush(self.dataset_id, H5F_SCOPE_GLOBAL) + + def _g_close(self): + # Close dataset in HDF5 space + # Release resources + if self.type_id >= 0: + H5Tclose(self.type_id) + if self.disk_type_id >= 0: + H5Tclose(self.disk_type_id) + if self.base_type_id >= 0: + H5Tclose(self.base_type_id) + if self.dataset_id >= 0: + H5Dclose(self.dataset_id) + + +cdef void* _array_data(ndarray arr): + # When the object is not a 0-d ndarray and its strides == 0, that + # means that the array does not contain actual data + cdef npy_intp i, ndim + + ndim = PyArray_NDIM(arr) + if ndim == 0: + return PyArray_DATA(arr) + for i in range(ndim): + if PyArray_STRIDE(arr, i) > 0: + return PyArray_DATA(arr) + return NULL + +def _supports_opt_blosc2_read_write(byteorder, filter_list, file_mode): + if len(filter_list) == 1: # Blosc2 must be the only filter + opt_write = ((byteorder == sys.byteorder) + and ((filter_list[0] or "").startswith("blosc2"))) + else: + opt_write = False + # For reading, Windows does not support re-opening a file twice + # in not read-only mode (for good reason), so we cannot use the + # blosc2 opt + opt_read = (opt_write + and ((platform.system().lower() != 'windows') or + (file_mode == 'r'))) + return (opt_read, opt_write) + +cdef class Array(Leaf): + # Instance variables declared in .pxd + + def _create_array(self, ndarray nparr, object title, object atom): + cdef int i + cdef herr_t ret + cdef void *rbuf + cdef bytes complib, version, class_ + cdef object dtype_, atom_, shape + cdef ndarray dims + cdef bytes encoded_title, encoded_name + cdef H5T_cset_t cset = H5T_CSET_ASCII + + encoded_title = title.encode('utf-8') + encoded_name = self.name.encode('utf-8') + + # Get the HDF5 type associated with this numpy type + shape = (nparr).shape + if atom is None or atom.shape == (): + dtype_ = nparr.dtype.base + atom_ = Atom.from_dtype(dtype_) + else: + atom_ = atom + shape = shape[:-len(atom_.shape)] + self.disk_type_id = atom_to_hdf5_type(atom_, self.byteorder) + if self.disk_type_id < 0: + raise HDF5ExtError( + "Problems creating the %s: invalid disk type ID for atom %s" % ( + self.__class__.__name__, atom_)) + + # Allocate space for the dimension axis info and fill it + dims = np.array(shape, dtype=np.intp) + self.rank = len(shape) + self.dims = npy_malloc_dims(self.rank, PyArray_DATA(dims)) + rbuf = _array_data(nparr) + + # Blosc2 optimized operations cannot be used (no chunking nor filters). + self.blosc2_support_read = False + self.blosc2_support_wirte = False + + # Save the array + complib = (self.filters.complib or '').encode('utf-8') + version = self._v_version.encode('utf-8') + class_ = self._c_classid.encode('utf-8') + self.dataset_id = H5ARRAYmake(self.parent_id, encoded_name, version, + self.rank, self.dims, + self.extdim, self.disk_type_id, NULL, NULL, + self.filters.complevel, complib, + self.filters.shuffle_bitshuffle, + self.filters.fletcher32, + self._want_track_times, + rbuf) + if self.dataset_id < 0: + raise HDF5ExtError("Problems creating the %s." % self.__class__.__name__) + + if self._v_file.params['PYTABLES_SYS_ATTRS']: + cset = H5T_CSET_UTF8 + # Set the conforming array attributes + H5ATTRset_attribute_string(self.dataset_id, "CLASS", class_, + len(class_), cset) + H5ATTRset_attribute_string(self.dataset_id, "VERSION", version, + len(version), cset) + H5ATTRset_attribute_string(self.dataset_id, "TITLE", encoded_title, + len(encoded_title), cset) + + # Get the native type (so that it is HDF5 who is the responsible to deal + # with non-native byteorders on-disk) + self.type_id = get_native_type(self.disk_type_id) + + return self.dataset_id, shape, atom_ + + + def _create_carray(self, object title): + cdef int i + cdef herr_t ret + cdef void *rbuf + cdef bytes complib, version, class_ + cdef ndarray dflts + cdef void *fill_data + cdef ndarray extdim + cdef object atom + cdef bytes encoded_title, encoded_name + + encoded_title = title.encode('utf-8') + encoded_name = self.name.encode('utf-8') + + atom = self.atom + self.disk_type_id = atom_to_hdf5_type(atom, self.byteorder) + + self.rank = len(self.shape) + self.dims = malloc_dims(self.shape) + if self.chunkshape: + self.dims_chunk = malloc_dims(self.chunkshape) + + # Decide whether Blosc2 optimized operations can be used. + (self.blosc2_support_read, self.blosc2_support_write) = ( + _supports_opt_blosc2_read_write(self.byteorder, [self.filters.complib], + self._v_file.mode)) + + rbuf = NULL # The data pointer. We don't have data to save initially + # Encode strings + complib = (self.filters.complib or '').encode('utf-8') + version = self._v_version.encode('utf-8') + class_ = self._c_classid.encode('utf-8') + + # Get the fill values + if isinstance(atom.dflt, np.ndarray) or atom.dflt: + dflts = np.array(atom.dflt, dtype=atom.dtype) + fill_data = PyArray_DATA(dflts) + else: + dflts = np.zeros((), dtype=atom.dtype) + fill_data = NULL + if atom.shape == (): + # The default is preferred as a scalar value instead of 0-dim array + atom.dflt = dflts[()] + else: + atom.dflt = dflts + + cdef hsize_t blocksize = int(os.environ.get("PT_DEFAULT_B2_BLOCKSIZE", "0")) + # Create the CArray/EArray + self.dataset_id = H5ARRAYOmake(self.parent_id, encoded_name, version, + self.rank, self.dims, self.extdim, + self.disk_type_id, self.dims_chunk, + blocksize, fill_data, + self.filters.complevel, complib, + self.filters.shuffle_bitshuffle, + self.filters.fletcher32, + self._want_track_times, + rbuf) + if self.dataset_id < 0: + raise HDF5ExtError("Problems creating the %s." % self.__class__.__name__) + + if self._v_file.params['PYTABLES_SYS_ATTRS']: + # Set the conforming array attributes + H5ATTRset_attribute_string(self.dataset_id, "CLASS", class_, + len(class_), H5T_CSET_ASCII) + H5ATTRset_attribute_string(self.dataset_id, "VERSION", version, + len(version), H5T_CSET_ASCII) + H5ATTRset_attribute_string(self.dataset_id, "TITLE", encoded_title, + len(encoded_title), H5T_CSET_ASCII) + if self.extdim >= 0: + extdim = np.array([self.extdim], dtype="int32") + # Attach the EXTDIM attribute in case of enlargeable arrays + H5ATTRset_attribute(self.dataset_id, "EXTDIM", H5T_NATIVE_INT, + 0, NULL, PyArray_BYTES(extdim)) + + # Get the native type (so that it is HDF5 who is the responsible to deal + # with non-native byteorders on-disk) + self.type_id = get_native_type(self.disk_type_id) + + return self.dataset_id + + + def _open_array(self): + cdef size_t type_size, type_precision + cdef H5T_class_t class_id + cdef char cbyteorder[11] # "irrelevant" fits easily here + cdef int i + cdef int extdim + cdef herr_t ret + cdef object shape, chunkshapes, atom + cdef int fill_status + cdef ndarray dflts + cdef void *fill_data + cdef bytes encoded_name + cdef str byteorder + + encoded_name = self.name.encode('utf-8') + + # Open the dataset + self.dataset_id = H5Dopen(self.parent_id, encoded_name, H5P_DEFAULT) + if self.dataset_id < 0: + raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" % + (self.name, self._v_parent._v_pathname)) + # Get the datatype handles + self.disk_type_id, self.type_id = self._get_type_ids() + # Get the atom for this type + atom = atom_from_hdf5_type(self.type_id) + + # Get the rank for this array object + if H5ARRAYget_ndims(self.dataset_id, &self.rank) < 0: + raise HDF5ExtError("Problems getting ndims!") + # Allocate space for the dimension axis info + self.dims = malloc(self.rank * sizeof(hsize_t)) + self.maxdims = malloc(self.rank * sizeof(hsize_t)) + # Get info on dimensions, class and type (of base class) + ret = H5ARRAYget_info(self.dataset_id, self.disk_type_id, + self.dims, self.maxdims, + &class_id, cbyteorder) + if ret < 0: + raise HDF5ExtError("Unable to get array info.") + + byteorder = cstr_to_pystr(cbyteorder) + + # Get the extendable dimension (if any) + self.extdim = -1 # default is non-extensible Array + for i from 0 <= i < self.rank: + if self.maxdims[i] == -1: + self.extdim = i + break + + # Get the shape as a python tuple + shape = getshape(self.rank, self.dims) + + # Allocate space for the dimension chunking info + self.dims_chunk = malloc(self.rank * sizeof(hsize_t)) + if H5ARRAYget_chunkshape(self.dataset_id, self.rank, self.dims_chunk) < 0: + # The Array class is not chunked! + chunkshapes = None + # Blosc2 optimized operations cannot be used (no chunking nor filters). + self.blosc2_support_read = False + self.blosc2_support_write = False + else: + # Get the chunkshape as a python tuple + chunkshapes = getshape(self.rank, self.dims_chunk) + # Decide whether Blosc2 optimized operations can be used. + filters = get_filters(self.parent_id, self.name) or {} + (self.blosc2_support_read, self.blosc2_support_write) = ( + _supports_opt_blosc2_read_write(byteorder, list(filters), + self._v_file.mode)) + + # object arrays should not be read directly into memory + if atom.dtype != object: + # Get the fill value + dflts = np.zeros((), dtype=atom.dtype) + fill_data = PyArray_DATA(dflts) + H5ARRAYget_fill_value(self.dataset_id, self.type_id, + &fill_status, fill_data); + if fill_status == H5D_FILL_VALUE_UNDEFINED: + # This can only happen with datasets created with other libraries + # than PyTables. + dflts = None + if dflts is not None and atom.shape == (): + # The default is preferred as a scalar value instead of 0-dim array + atom.dflt = dflts[()] + else: + atom.dflt = dflts + + # Get the byteorder + self.byteorder = correct_byteorder(atom.type, byteorder) + + return self.dataset_id, atom, shape, chunkshapes + + + def _append(self, ndarray nparr): + cdef int ret, extdim + cdef hsize_t *dims_arr + cdef void *rbuf + cdef object shape + + if self.atom.kind == "reference": + raise ValueError("Cannot append to the reference types") + + # Allocate space for the dimension axis info + dims_arr = npy_malloc_dims(self.rank, PyArray_DIMS(nparr)) + # Get the pointer to the buffer data area + rbuf = PyArray_DATA(nparr) + # Convert some NumPy types to HDF5 before storing. + if self.atom.type == 'time64': + self._convert_time64(nparr, 0) + + # Append the records + extdim = self.extdim + with nogil: + ret = H5ARRAYappend_records(self.dataset_id, self.type_id, self.rank, + self.dims, dims_arr, extdim, rbuf) + + if ret < 0: + raise HDF5ExtError("Problems appending the elements") + + free(dims_arr) + # Update the new dimensionality + shape = list(self.shape) + shape[self.extdim] = SizeType(self.dims[self.extdim]) + self.shape = tuple(shape) + + def _read_array(self, hsize_t start, hsize_t stop, hsize_t step, + ndarray nparr): + cdef herr_t ret + cdef void *rbuf + cdef hsize_t nrows + cdef int extdim + cdef size_t item_size = H5Tget_size(self.type_id) + cdef void * refbuf = NULL + + # Number of rows to read + nrows = get_len_of_range(start, stop, step) + + # Get the pointer to the buffer data area + if self.atom.kind == "reference": + refbuf = malloc(nrows * item_size) + rbuf = refbuf + else: + rbuf = PyArray_DATA(nparr) + + if hasattr(self, "extdim"): + extdim = self.extdim + else: + extdim = -1 + + # Do the physical read + with nogil: + ret = H5ARRAYread(self.dataset_id, self.type_id, start, nrows, step, + extdim, rbuf) + + try: + if ret < 0: + raise HDF5ExtError("Problems reading the array data.") + + # Get the pointer to the buffer data area + if self.atom.kind == "reference": + load_reference(self.dataset_id, rbuf, item_size, nparr) + finally: + if refbuf: + free(refbuf) + refbuf = NULL + + if self.atom.kind == 'time': + # Swap the byteorder by hand (this is not currently supported by HDF5) + if H5Tget_order(self.type_id) != platform_byteorder: + nparr.byteswap(True) + + # Convert some HDF5 types to NumPy after reading. + if self.atom.type == 'time64': + self._convert_time64(nparr, 1) + + return + + + def _g_read_slice(self, ndarray startl, ndarray stopl, ndarray stepl, + ndarray nparr): + cdef herr_t ret + cdef hsize_t *start + cdef hsize_t *stop + cdef hsize_t *step + cdef void *rbuf + cdef size_t item_size = H5Tget_size(self.type_id) + cdef void * refbuf = NULL + + # Get the pointer to the buffer data area of startl, stopl and stepl arrays + start = PyArray_DATA(startl) + stop = PyArray_DATA(stopl) + step = PyArray_DATA(stepl) + + # Get the pointer to the buffer data area + if self.atom.kind == "reference": + refbuf = malloc(nparr.size * item_size) + rbuf = refbuf + else: + rbuf = PyArray_DATA(nparr) + + cdef bytes fname = self._v_file.filename.encode('utf8') + cdef char *filename = fname + # Do the physical read + with nogil: + ret = H5ARRAYOreadSlice(filename, self.blosc2_support_read, self.dataset_id, self.type_id, + start, stop, step, rbuf) + try: + if ret < 0: + raise HDF5ExtError("Internal error reading the elements " + "(H5ARRAYOreadSlice returned errorcode %i)" % ret) + + # Get the pointer to the buffer data area + if self.atom.kind == "reference": + load_reference(self.dataset_id, rbuf, item_size, nparr) + finally: + if refbuf: + free(refbuf) + refbuf = NULL + + if self.atom.kind == 'time': + # Swap the byteorder by hand (this is not currently supported by HDF5) + if H5Tget_order(self.type_id) != platform_byteorder: + nparr.byteswap(True) + + # Convert some HDF5 types to NumPy after reading + if self.atom.type == 'time64': + self._convert_time64(nparr, 1) + + return + + + def _g_read_coords(self, ndarray coords, ndarray nparr): + """Read coordinates in an already created NumPy array.""" + + cdef herr_t ret + cdef hid_t space_id + cdef hid_t mem_space_id + cdef hsize_t size + cdef void *rbuf + cdef object mode + cdef size_t item_size = H5Tget_size(self.type_id) + cdef void * refbuf = NULL + + # Get the dataspace handle + space_id = H5Dget_space(self.dataset_id) + # Create a memory dataspace handle + size = nparr.size + mem_space_id = H5Screate_simple(1, &size, NULL) + + # Select the dataspace to be read + H5Sselect_elements(space_id, H5S_SELECT_SET, + size, PyArray_DATA(coords)) + + # Get the pointer to the buffer data area + if self.atom.kind == "reference": + refbuf = malloc(nparr.size * item_size) + rbuf = refbuf + else: + rbuf = PyArray_DATA(nparr) + + # Do the actual read + with nogil: + ret = H5Dread(self.dataset_id, self.type_id, mem_space_id, space_id, + H5P_DEFAULT, rbuf) + + try: + if ret < 0: + raise HDF5ExtError("Problems reading the array data.") + + # Get the pointer to the buffer data area + if self.atom.kind == "reference": + load_reference(self.dataset_id, rbuf, item_size, nparr) + finally: + if refbuf: + free(refbuf) + refbuf = NULL + + # Terminate access to the memory dataspace + H5Sclose(mem_space_id) + # Terminate access to the dataspace + H5Sclose(space_id) + + if self.atom.kind == 'time': + # Swap the byteorder by hand (this is not currently supported by HDF5) + if H5Tget_order(self.type_id) != platform_byteorder: + nparr.byteswap(True) + + # Convert some HDF5 types to NumPy after reading + if self.atom.type == 'time64': + self._convert_time64(nparr, 1) + + return + + + def perform_selection(self, space_id, start, count, step, idx, mode): + """Performs a selection using start/count/step in the given axis. + + All other axes have their full range selected. The selection is + added to the current `space_id` selection using the given mode. + + Note: This is a backport from the h5py project. + + """ + + cdef int select_mode + cdef ndarray start_, count_, step_ + cdef hsize_t *startp + cdef hsize_t *countp + cdef hsize_t *stepp + + # Build arrays for the selection parameters + startl, countl, stepl = [], [], [] + for i, x in enumerate(self.shape): + if i != idx: + startl.append(0) + countl.append(x) + stepl.append(1) + else: + startl.append(start) + countl.append(count) + stepl.append(step) + start_ = np.array(startl, dtype="i8") + count_ = np.array(countl, dtype="i8") + step_ = np.array(stepl, dtype="i8") + + # Get the pointers to array data + startp = PyArray_DATA(start_) + countp = PyArray_DATA(count_) + stepp = PyArray_DATA(step_) + + # Do the actual selection + select_modes = {"AND": H5S_SELECT_AND, "NOTB": H5S_SELECT_NOTB} + assert mode in select_modes + select_mode = select_modes[mode] + H5Sselect_hyperslab(space_id, select_mode, + startp, stepp, countp, NULL) + + def _g_read_selection(self, object selection, ndarray nparr): + """Read a selection in an already created NumPy array.""" + + cdef herr_t ret + cdef hid_t space_id + cdef hid_t mem_space_id + cdef hsize_t size + cdef void *rbuf + cdef object mode + cdef size_t item_size = H5Tget_size(self.type_id) + cdef void * refbuf = NULL + + # Get the dataspace handle + space_id = H5Dget_space(self.dataset_id) + # Create a memory dataspace handle + size = nparr.size + mem_space_id = H5Screate_simple(1, &size, NULL) + + # Select the dataspace to be read + # Start by selecting everything + H5Sselect_all(space_id) + # Now refine with outstanding selections + for args in selection: + self.perform_selection(space_id, *args) + + # Get the pointer to the buffer data area + if self.atom.kind == "reference": + refbuf = malloc(nparr.size * item_size) + rbuf = refbuf + else: + rbuf = PyArray_DATA(nparr) + + # Do the actual read + with nogil: + ret = H5Dread(self.dataset_id, self.type_id, mem_space_id, space_id, + H5P_DEFAULT, rbuf) + + try: + if ret < 0: + raise HDF5ExtError("Problems reading the array data.") + + # Get the pointer to the buffer data area + if self.atom.kind == "reference": + load_reference(self.dataset_id, rbuf, item_size, nparr) + finally: + if refbuf: + free(refbuf) + refbuf = NULL + + # Terminate access to the memory dataspace + H5Sclose(mem_space_id) + # Terminate access to the dataspace + H5Sclose(space_id) + + if self.atom.kind == 'time': + # Swap the byteorder by hand (this is not currently supported by HDF5) + if H5Tget_order(self.type_id) != platform_byteorder: + nparr.byteswap(True) + + # Convert some HDF5 types to NumPy after reading + if self.atom.type == 'time64': + self._convert_time64(nparr, 1) + + return + + + def _g_write_slice(self, ndarray startl, ndarray stepl, ndarray countl, + ndarray nparr): + """Write a slice in an already created NumPy array.""" + + cdef int ret + cdef void *rbuf + cdef void *temp + cdef hsize_t *start + cdef hsize_t *step + cdef hsize_t *count + + if self.atom.kind == "reference": + raise ValueError("Cannot write reference types yet") + # Get the pointer to the buffer data area + rbuf = PyArray_DATA(nparr) + # Get the start, step and count values + start = PyArray_DATA(startl) + step = PyArray_DATA(stepl) + count = PyArray_DATA(countl) + + # Convert some NumPy types to HDF5 before storing. + if self.atom.type == 'time64': + self._convert_time64(nparr, 0) + + # Modify the elements: + with nogil: + ret = H5ARRAYwrite_records(self.dataset_id, self.type_id, self.rank, + start, step, count, rbuf) + + if ret < 0: + raise HDF5ExtError("Internal error modifying the elements " + "(H5ARRAYwrite_records returned errorcode %i)" % ret) + + return + + + def _g_write_coords(self, ndarray coords, ndarray nparr): + """Write a selection in an already created NumPy array.""" + + cdef herr_t ret + cdef hid_t space_id + cdef hid_t mem_space_id + cdef hsize_t size + cdef void *rbuf + cdef object mode + + if self.atom.kind == "reference": + raise ValueError("Cannot write reference types yet") + # Get the dataspace handle + space_id = H5Dget_space(self.dataset_id) + # Create a memory dataspace handle + size = nparr.size + mem_space_id = H5Screate_simple(1, &size, NULL) + + # Select the dataspace to be written + H5Sselect_elements(space_id, H5S_SELECT_SET, + size, PyArray_DATA(coords)) + + # Get the pointer to the buffer data area + rbuf = PyArray_DATA(nparr) + + # Convert some NumPy types to HDF5 before storing. + if self.atom.type == 'time64': + self._convert_time64(nparr, 0) + + # Do the actual write + with nogil: + ret = H5Dwrite(self.dataset_id, self.type_id, mem_space_id, space_id, + H5P_DEFAULT, rbuf) + + if ret < 0: + raise HDF5ExtError("Problems writing the array data.") + + # Terminate access to the memory dataspace + H5Sclose(mem_space_id) + # Terminate access to the dataspace + H5Sclose(space_id) + + return + + + def _g_write_selection(self, object selection, ndarray nparr): + """Write a selection in an already created NumPy array.""" + + cdef herr_t ret + cdef hid_t space_id + cdef hid_t mem_space_id + cdef hsize_t size + cdef void *rbuf + cdef object mode + + if self.atom.kind == "reference": + raise ValueError("Cannot write reference types yet") + # Get the dataspace handle + space_id = H5Dget_space(self.dataset_id) + # Create a memory dataspace handle + size = nparr.size + mem_space_id = H5Screate_simple(1, &size, NULL) + + # Select the dataspace to be written + # Start by selecting everything + H5Sselect_all(space_id) + # Now refine with outstanding selections + for args in selection: + self.perform_selection(space_id, *args) + + # Get the pointer to the buffer data area + rbuf = PyArray_DATA(nparr) + + # Convert some NumPy types to HDF5 before storing. + if self.atom.type == 'time64': + self._convert_time64(nparr, 0) + + # Do the actual write + with nogil: + ret = H5Dwrite(self.dataset_id, self.type_id, mem_space_id, space_id, + H5P_DEFAULT, rbuf) + + if ret < 0: + raise HDF5ExtError("Problems writing the array data.") + + # Terminate access to the memory dataspace + H5Sclose(mem_space_id) + # Terminate access to the dataspace + H5Sclose(space_id) + + return + + + def __dealloc__(self): + if self.dims: + free(self.dims) + if self.maxdims: + free(self.maxdims) + if self.dims_chunk: + free(self.dims_chunk) + + +cdef class VLArray(Leaf): + # Instance variables + cdef hsize_t nrecords + + def _create_array(self, object title): + cdef int rank + cdef hsize_t *dims + cdef herr_t ret + cdef void *rbuf + cdef bytes complib, version, class_ + cdef object type_, itemsize, atom, scatom + cdef bytes encoded_title, encoded_name + cdef H5T_cset_t cset = H5T_CSET_ASCII + + encoded_title = title.encode('utf-8') + encoded_name = self.name.encode('utf-8') + + atom = self.atom + if not hasattr(atom, 'size'): # it is a pseudo-atom + atom = atom.base + + # Get the HDF5 type of the *scalar* atom + scatom = atom.copy(shape=()) + self.base_type_id = atom_to_hdf5_type(scatom, self.byteorder) + if self.base_type_id < 0: + raise HDF5ExtError( + "Problems creating the %s: invalid base type ID for atom %s" % ( + self.__class__.__name__, scatom)) + + # Allocate space for the dimension axis info + rank = len(atom.shape) + dims = malloc_dims(atom.shape) + + rbuf = NULL # We don't have data to save initially + + # Encode strings + complib = (self.filters.complib or '').encode('utf-8') + version = self._v_version.encode('utf-8') + class_ = self._c_classid.encode('utf-8') + + # Create the vlarray + self.dataset_id = H5VLARRAYmake(self.parent_id, encoded_name, version, + rank, dims, self.base_type_id, + self.chunkshape[0], rbuf, + self.filters.complevel, complib, + self.filters.shuffle_bitshuffle, + self.filters.fletcher32, + self._want_track_times, rbuf) + if dims: + free(dims) + if self.dataset_id < 0: + raise HDF5ExtError("Problems creating the VLArray.") + self.nrecords = 0 # Initialize the number of records saved + + if self._v_file.params['PYTABLES_SYS_ATTRS']: + cset = H5T_CSET_UTF8 + # Set the conforming array attributes + H5ATTRset_attribute_string(self.dataset_id, "CLASS", class_, + len(class_), cset) + H5ATTRset_attribute_string(self.dataset_id, "VERSION", version, + len(version), cset) + H5ATTRset_attribute_string(self.dataset_id, "TITLE", encoded_title, + len(encoded_title), cset) + + # Get the datatype handles + self.disk_type_id, self.type_id = self._get_type_ids() + + return self.dataset_id + + + def _open_array(self): + cdef char cbyteorder[11] # "irrelevant" fits easily here + cdef int i, enumtype + cdef int rank + cdef herr_t ret + cdef hsize_t nrecords, chunksize + cdef object shape, type_ + cdef bytes encoded_name + cdef str byteorder + + encoded_name = self.name.encode('utf-8') + + # Open the dataset + self.dataset_id = H5Dopen(self.parent_id, encoded_name, H5P_DEFAULT) + if self.dataset_id < 0: + raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" % + (self.name, self._v_parent._v_pathname)) + # Get the datatype handles + self.disk_type_id, self.type_id = self._get_type_ids() + # Get the atom for this type + atom = atom_from_hdf5_type(self.type_id) + + # Get info on dimensions & types (of base class) + H5VLARRAYget_info(self.dataset_id, self.disk_type_id, &nrecords, + cbyteorder) + + byteorder = cstr_to_pystr(cbyteorder) + + # Get some properties of the atomic type + self._atomicdtype = atom.dtype + self._atomictype = atom.type + self._atomicshape = atom.shape + self._atomicsize = atom.size + + # Get the byteorder + self.byteorder = correct_byteorder(atom.type, byteorder) + + # Get the chunkshape (VLArrays are unidimensional entities) + H5ARRAYget_chunkshape(self.dataset_id, 1, &chunksize) + + self.nrecords = nrecords # Initialize the number of records saved + return self.dataset_id, SizeType(nrecords), (SizeType(chunksize),), atom + + + def _append(self, ndarray nparr, int nobjects): + cdef int ret + cdef void *rbuf + + # Get the pointer to the buffer data area + if nobjects: + rbuf = PyArray_DATA(nparr) + # Convert some NumPy types to HDF5 before storing. + if self.atom.type == 'time64': + self._convert_time64(nparr, 0) + else: + rbuf = NULL + + # Append the records: + with nogil: + ret = H5VLARRAYappend_records(self.dataset_id, self.type_id, + nobjects, self.nrecords, rbuf) + + if ret < 0: + raise HDF5ExtError("Problems appending the records.") + + self.nrecords = self.nrecords + 1 + + def _modify(self, hsize_t nrow, ndarray nparr, int nobjects): + cdef int ret + cdef void *rbuf + + # Get the pointer to the buffer data area + rbuf = PyArray_DATA(nparr) + if nobjects: + # Convert some NumPy types to HDF5 before storing. + if self.atom.type == 'time64': + self._convert_time64(nparr, 0) + + # Append the records: + with nogil: + ret = H5VLARRAYmodify_records(self.dataset_id, self.type_id, + nrow, nobjects, rbuf) + + if ret < 0: + raise HDF5ExtError("Problems modifying the record.") + + return nobjects + + # Because the size of each "row" is unknown, there is no easy way to + # calculate this value + def _get_memory_size(self): + cdef hid_t space_id + cdef hsize_t size + cdef herr_t ret + + if self.nrows == 0: + size = 0 + else: + # Get the dataspace handle + space_id = H5Dget_space(self.dataset_id) + # Return the size of the entire dataset + ret = H5Dvlen_get_buf_size(self.dataset_id, self.type_id, space_id, + &size) + if ret < 0: + size = -1 + + # Terminate access to the dataspace + H5Sclose(space_id) + + return size + + def _read_array(self, hsize_t start, hsize_t stop, hsize_t step): + cdef int i + cdef size_t vllen + cdef herr_t ret + cdef hvl_t *rdata + cdef hsize_t nrows + cdef hid_t space_id + cdef hid_t mem_space_id + cdef object buf, nparr, shape, datalist + + # Compute the number of rows to read + nrows = get_len_of_range(start, stop, step) + if start + nrows > self.nrows: + raise HDF5ExtError( + "Asking for a range of rows exceeding the available ones!.", + h5bt=False) + + # Now, read the chunk of rows + with nogil: + # Allocate the necessary memory for keeping the row handlers + rdata = malloc(nrows*sizeof(hvl_t)) + # Get the dataspace handle + space_id = H5Dget_space(self.dataset_id) + # Create a memory dataspace handle + mem_space_id = H5Screate_simple(1, &nrows, NULL) + # Select the data to be read + H5Sselect_hyperslab(space_id, H5S_SELECT_SET, &start, &step, &nrows, + NULL) + # Do the actual read + ret = H5Dread(self.dataset_id, self.type_id, mem_space_id, space_id, + H5P_DEFAULT, rdata) + + if ret < 0: + raise HDF5ExtError( + "VLArray._read_array: Problems reading the array data.") + + datalist = [] + for i in range(nrows): + # Number of atoms in row + vllen = rdata[i].len + # Get the pointer to the buffer data area + if vllen > 0: + # Create a buffer to keep this info. It is important to do a + # copy, because we will dispose the buffer memory later on by + # calling the H5Dvlen_reclaim. PyByteArray_FromStringAndSize does this. + buf = PyByteArray_FromStringAndSize(rdata[i].p, + vllen*self._atomicsize) + else: + # Case where there is info with zero lentgh + buf = None + # Compute the shape for the read array + shape = list(self._atomicshape) + shape.insert(0, vllen) # put the length at the beginning of the shape + nparr = np.ndarray( + buffer=buf, dtype=self._atomicdtype.base, shape=shape) + # Set the writeable flag for this ndarray object + nparr.flags.writeable = True + if self.atom.kind == 'time': + # Swap the byteorder by hand (this is not currently supported by HDF5) + if H5Tget_order(self.type_id) != platform_byteorder: + nparr.byteswap(True) + # Convert some HDF5 types to NumPy after reading. + if self.atom.type == 'time64': + self._convert_time64(nparr, 1) + # Append this array to the output list + datalist.append(nparr) + + # Release resources + # Reclaim all the (nested) VL data + ret = H5Dvlen_reclaim(self.type_id, mem_space_id, H5P_DEFAULT, rdata) + if ret < 0: + raise HDF5ExtError("VLArray._read_array: error freeing the data buffer.") + # Terminate access to the memory dataspace + H5Sclose(mem_space_id) + # Terminate access to the dataspace + H5Sclose(space_id) + # Free the amount of row pointers to VL row data + free(rdata) + + return datalist + + + def get_row_size(self, row): + """Return the total size in bytes of all the elements contained in a given row.""" + + cdef hid_t space_id + cdef hsize_t size + cdef herr_t ret + + cdef hsize_t offset[1] + cdef hsize_t count[1] + + if row >= self.nrows: + raise HDF5ExtError( + "Asking for a range of rows exceeding the available ones!.", + h5bt=False) + + # Get the dataspace handle + space_id = H5Dget_space(self.dataset_id) + + offset[0] = row + count[0] = 1 + + ret = H5Sselect_hyperslab(space_id, H5S_SELECT_SET, offset, NULL, count, NULL); + if ret < 0: + size = -1 + + ret = H5Dvlen_get_buf_size(self.dataset_id, self.type_id, space_id, &size) + if ret < 0: + size = -1 + + # Terminate access to the dataspace + H5Sclose(space_id) + + return size + + +cdef class UnImplemented(Leaf): + + def _open_unimplemented(self): + cdef object shape + cdef char cbyteorder[11] # "irrelevant" fits easily here + cdef bytes encoded_name + cdef str byteorder + + encoded_name = self.name.encode('utf-8') + + # Get info on dimensions + shape = H5UIget_info(self.parent_id, encoded_name, cbyteorder) + shape = tuple(map(SizeType, shape)) + self.dataset_id = H5Dopen(self.parent_id, encoded_name, H5P_DEFAULT) + byteorder = cstr_to_pystr(cbyteorder) + + return (shape, byteorder, self.dataset_id) + + def _g_close(self): + H5Dclose(self.dataset_id) + + +## Local Variables: +## mode: python +## py-indent-offset: 2 +## tab-width: 2 +## fill-column: 78 +## End: diff --git a/venv/Lib/site-packages/tables/idxutils.py b/venv/Lib/site-packages/tables/idxutils.py new file mode 100644 index 0000000..c63d6d7 --- /dev/null +++ b/venv/Lib/site-packages/tables/idxutils.py @@ -0,0 +1,505 @@ +"""Utilities to be used mainly by the Index class.""" + +from __future__ import annotations + +import math +from typing import Literal, TYPE_CHECKING + +import numpy as np + +if TYPE_CHECKING: + from .index import Index + + +# Hints for chunk/slice/block/superblock computations: +# - The slicesize should not exceed 2**32 elements (because of +# implementation reasons). Such an extreme case would make the +# sorting algorithms to consume up to 64 GB of memory. +# - In general, one should favor a small chunksize ( < 128 KB) if one +# wants to reduce the latency for indexed queries. However, keep in +# mind that a very low value of chunksize for big datasets may hurt +# the performance by requiring the HDF5 to use a lot of memory and CPU +# for its internal B-Tree. + + +def csformula(nrows: int) -> float: + """Return the fitted chunksize (a float value) for nrows.""" + # This formula has been computed using two points: + # 2**12 = m * 2**(n + log10(10**6)) + # 2**15 = m * 2**(n + log10(10**9)) + # where 2**12 and 2**15 are reasonable values for chunksizes for indexes + # with 10**6 and 10**9 elements respectively. + # Yes, return a floating point number! + return 64 * 2 ** math.log10(nrows) + + +def limit_er(expectedrows: int) -> int: + """Protection against creating too small or too large chunks or slices.""" + if expectedrows < 10**5: + expectedrows = 10**5 + elif expectedrows > 10**12: + expectedrows = 10**12 + return expectedrows + + +def computechunksize(expectedrows: int) -> int: + """Get the optimum chunksize based on expectedrows.""" + expectedrows = limit_er(expectedrows) + zone = int(math.log10(expectedrows)) + nrows = 10**zone + return int(csformula(nrows)) + + +def computeslicesize(expectedrows: int, memlevel: int) -> int: + """Get the optimum slicesize based on expectedrows and memorylevel.""" + expectedrows = limit_er(expectedrows) + # First, the optimum chunksize + cs = csformula(expectedrows) + # Now, the actual chunksize + chunksize = computechunksize(expectedrows) + # The optimal slicesize + ss = int(cs * memlevel**2) + # We *need* slicesize to be an exact multiple of the actual chunksize + ss = (ss // chunksize) * chunksize + ss *= 4 # slicesize should be at least divisible by 4 + # ss cannot be bigger than 2**31 - 1 elements because of fundamental + # reasons (this limitation comes mainly from the way of compute + # indices for indexes, but also because C keysort is not implemented + # yet for the string type). Besides, it cannot be larger than + # 2**30, because limitations of the optimized binary search code + # (in idx-opt.c, the line ``mid = lo + (hi-lo)/2;`` will overflow + # for values of ``lo`` and ``hi`` >= 2**30). Finally, ss must be a + # multiple of 4, so 2**30 must definitely be an upper limit. + if ss > 2**30: + ss = 2**30 + return ss + + +def computeblocksize( + expectedrows: int, compoundsize: int, lowercompoundsize: int +) -> int: + """Calculate the optimum number of superblocks made from compounds blocks. + + This is useful for computing the sizes of both blocks and + superblocks (using the PyTables terminology for blocks in indexes). + + """ + nlowerblocks = (expectedrows // lowercompoundsize) + 1 + if nlowerblocks > 2**20: + # Protection against too large number of compound blocks + nlowerblocks = 2**20 + size = int(lowercompoundsize * nlowerblocks) + # We *need* superblocksize to be an exact multiple of the actual + # compoundblock size (a ceil must be performed here!) + size = ((size // compoundsize) + 1) * compoundsize + return size + + +def calc_chunksize( + expectedrows: int, + optlevel: int = 6, + indsize: int = 4, + memlevel: int = 4, + node: Index | None = None, +) -> tuple[int, int, int, int]: + """Calculate the HDF5 chunk size for index and sorted arrays. + + The logic to do that is based purely in experiments playing with + different chunksizes and compression flag. It is obvious that using + big chunks optimizes the I/O speed, but if they are too large, the + uncompressor takes too much time. This might (should) be further + optimized by doing more experiments. + + """ + chunksize = computechunksize(expectedrows) + slicesize = computeslicesize(expectedrows, memlevel) + + # Avoid excessive slicesize in Indexes, + # see https://github.com/PyTables/PyTables/issues/879 + if node is not None: + maxsize = ( + node._v_file.params["BUFFER_TIMES"] + * node._v_file.params["IO_BUFFER_SIZE"] + ) + while (slicesize * node.dtype.itemsize) > maxsize: + slicesize = slicesize // 2 + + # Correct the slicesize and the chunksize based on optlevel + if indsize == 1: # ultralight + chunksize, slicesize = ccs_ultralight(optlevel, chunksize, slicesize) + elif indsize == 2: # light + chunksize, slicesize = ccs_light(optlevel, chunksize, slicesize) + elif indsize == 4: # medium + chunksize, slicesize = ccs_medium(optlevel, chunksize, slicesize) + elif indsize == 8: # full + chunksize, slicesize = ccs_full(optlevel, chunksize, slicesize) + + # Finally, compute blocksize and superblocksize + blocksize = computeblocksize(expectedrows, slicesize, chunksize) + superblocksize = computeblocksize(expectedrows, blocksize, slicesize) + # The size for different blocks information + sizes = (superblocksize, blocksize, slicesize, chunksize) + return sizes + + +def ccs_ultralight( + optlevel: int, chunksize: int, slicesize: int +) -> tuple[int, int]: + """Correct the slicesize and the chunksize based on optlevel.""" + if optlevel in (0, 1, 2): + slicesize //= 2 + slicesize += optlevel * slicesize + elif optlevel in (3, 4, 5): + slicesize *= optlevel - 1 + elif optlevel in (6, 7, 8): + slicesize *= optlevel - 1 + elif optlevel == 9: + slicesize *= optlevel - 1 + return chunksize, slicesize + + +def ccs_light( + optlevel: int, chunksize: int, slicesize: int +) -> tuple[int, int]: + """Correct the slicesize and the chunksize based on optlevel.""" + if optlevel in (0, 1, 2): + slicesize //= 2 + elif optlevel in (3, 4, 5): + pass + elif optlevel in (6, 7, 8): + chunksize //= 2 + elif optlevel == 9: + # Reducing the chunksize and enlarging the slicesize is the + # best way to reduce the entropy with the current algorithm. + chunksize //= 2 + slicesize *= 2 + return chunksize, slicesize + + +def ccs_medium( + optlevel: int, chunksize: int, slicesize: int +) -> tuple[int, int]: + """Correct the slicesize and the chunksize based on optlevel.""" + if optlevel in (0, 1, 2): + slicesize //= 2 + elif optlevel in (3, 4, 5): + pass + elif optlevel in (6, 7, 8): + chunksize //= 2 + elif optlevel == 9: + # Reducing the chunksize and enlarging the slicesize is the + # best way to reduce the entropy with the current algorithm. + chunksize //= 2 + slicesize *= 2 + return chunksize, slicesize + + +def ccs_full(optlevel: int, chunksize: int, slicesize: int) -> tuple[int, int]: + """Correct the slicesize and the chunksize based on optlevel.""" + if optlevel in (0, 1, 2): + slicesize //= 2 + elif optlevel in (3, 4, 5): + pass + elif optlevel in (6, 7, 8): + chunksize //= 2 + elif optlevel == 9: + # Reducing the chunksize and enlarging the slicesize is the + # best way to reduce the entropy with the current algorithm. + chunksize //= 2 + slicesize *= 2 + return chunksize, slicesize + + +def calcoptlevels( + nblocks: int, optlevel: int, indsize: int +) -> tuple[bool, bool, bool, bool]: + """Compute the optimizations to be done. + + The calculation is based on the number of blocks, optlevel and + indexing mode. + + """ + if indsize == 2: # light + return col_light(nblocks, optlevel) + elif indsize == 4: # medium + return col_medium(nblocks, optlevel) + elif indsize == 8: # full + return col_full(nblocks, optlevel) + + +def col_light(nblocks: int, optlevel: int) -> tuple[bool, bool, bool, bool]: + """Compute the optimizations to be done for light indexes.""" + optmedian, optstarts, optstops, optfull = (False,) * 4 + + if 0 < optlevel <= 3: + optmedian = True + elif 3 < optlevel <= 6: + optmedian, optstarts = (True, True) + elif 6 < optlevel <= 9: + optmedian, optstarts, optstops = (True, True, True) + + return optmedian, optstarts, optstops, optfull + + +def col_medium(nblocks: int, optlevel: int) -> tuple[bool, bool, bool, bool]: + """Compute the optimizations to be done for medium indexes.""" + optmedian, optstarts, optstops, optfull = (False,) * 4 + + # Medium case + if nblocks <= 1: + if 0 < optlevel <= 3: + optmedian = True + elif 3 < optlevel <= 6: + optmedian, optstarts = (True, True) + elif 6 < optlevel <= 9: + optfull = 1 + else: # More than a block + if 0 < optlevel <= 3: + optfull = 1 + elif 3 < optlevel <= 6: + optfull = 2 + elif 6 < optlevel <= 9: + optfull = 3 + + return optmedian, optstarts, optstops, optfull + + +def col_full(nblocks: int, optlevel: int) -> tuple[bool, bool, bool, bool]: + """Compute the optimizations to be done for full indexes.""" + optmedian, optstarts, optstops, optfull = (False,) * 4 + + # Full case + if nblocks <= 1: + if 0 < optlevel <= 3: + optmedian = True + elif 3 < optlevel <= 6: + optmedian, optstarts = (True, True) + elif 6 < optlevel <= 9: + optfull = 1 + else: # More than a block + if 0 < optlevel <= 3: + optfull = 1 + elif 3 < optlevel <= 6: + optfull = 2 + elif 6 < optlevel <= 9: + optfull = 3 + + return optmedian, optstarts, optstops, optfull + + +def get_reduction_level( + indsize: int, optlevel: int, slicesize: int, chunksize: int +) -> int: + """Compute the reduction level based on indsize and optlevel.""" + rlevels = [ + [8, 8, 8, 8, 4, 4, 4, 2, 2, 1], # 8-bit indices (ultralight) + [4, 4, 4, 4, 2, 2, 2, 1, 1, 1], # 16-bit indices (light) + [2, 2, 2, 2, 1, 1, 1, 1, 1, 1], # 32-bit indices (medium) + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], # 64-bit indices (full) + ] + isizes = {1: 0, 2: 1, 4: 2, 8: 3} + rlevel = rlevels[isizes[indsize]][optlevel] + # The next cases should only happen in tests + if rlevel >= slicesize: + rlevel = 1 + if slicesize <= chunksize * rlevel: + rlevel = 1 + if indsize == 8: + # Ensure that, for full indexes we will never perform a reduction. + # This is required because of implementation assumptions. + assert rlevel == 1 + return rlevel + + +# Python implementations of NextAfter and NextAfterF +# +# These implementations exist because the standard function +# nextafterf is not available on Microsoft platforms. +# +# These implementations are based on the IEEE representation of +# floats and doubles. +# Author: Shack Toms - shack@livedata.com +# +# Thanks to Shack Toms shack@livedata.com for NextAfter and NextAfterF +# implementations in Python. 2004-10-01 +# epsilon = math.ldexp(1.0, -53) # smallest double such that +# # 0.5 + epsilon != 0.5 +# epsilonF = math.ldexp(1.0, -24) # smallest float such that 0.5 + epsilonF +# != 0.5 +# maxFloat = float(2**1024 - 2**971) # From the IEEE 754 standard +# maxFloatF = float(2**128 - 2**104) # From the IEEE 754 standard +# minFloat = math.ldexp(1.0, -1022) # min positive normalized double +# minFloatF = math.ldexp(1.0, -126) # min positive normalized float +# smallEpsilon = math.ldexp(1.0, -1074) # smallest increment for +# # doubles < minFloat +# smallEpsilonF = math.ldexp(1.0, -149) # smallest increment for +# # floats < minFloatF +infinity = math.ldexp(1.0, 1023) * 2 +infinityf = math.ldexp(1.0, 128) +# Finf = float("inf") # Infinite in the IEEE 754 standard (not avail in Win) + +# A portable representation of NaN +# if sys.byteorder == "little": +# testNaN = struct.unpack("d", '\x01\x00\x00\x00\x00\x00\xf0\x7f')[0] +# elif sys.byteorder == "big": +# testNaN = struct.unpack("d", '\x7f\xf0\x00\x00\x00\x00\x00\x01')[0] +# else: +# raise ValueError("Byteorder '%s' not supported!" % sys.byteorder) +# This one seems better +# testNaN = infinity - infinity + +# "infinity" for several types +infinitymap = { + "bool": [0, 1], + "int8": [-(2**7), 2**7 - 1], + "uint8": [0, 2**8 - 1], + "int16": [-(2**15), 2**15 - 1], + "uint16": [0, 2**16 - 1], + "int32": [-(2**31), 2**31 - 1], + "uint32": [0, 2**32 - 1], + "int64": [-(2**63), 2**63 - 1], + "uint64": [0, 2**64 - 1], + "float32": [-infinityf, infinityf], + "float64": [-infinity, infinity], +} + +if hasattr(np, "float16"): + infinitymap["float16"] = [-np.float16(np.inf), np.float16(np.inf)] +if hasattr(np, "float96"): + infinitymap["float96"] = [-np.float96(np.inf), np.float96(np.inf)] +if hasattr(np, "float128"): + infinitymap["float128"] = [-np.float128(np.inf), np.float128(np.inf)] + +# Utility functions + + +def inftype( + dtype: np.dtype, itemsize: int, sign: Literal[-1, 1] = 1 +) -> bytes | float | int: + """Return a superior limit for maximum representable data type.""" + assert sign in [-1, +1] + + if dtype.kind == "S": + if sign < 0: + return b"\x00" * itemsize + else: + return b"\xff" * itemsize + try: + return infinitymap[dtype.name][sign >= 0] + except KeyError: + raise TypeError("Type %s is not supported" % dtype.name) + + +def string_next_after( + x: bytes, direction: Literal[-1, 1], itemsize: int +) -> bytes: + """Return the next neighbor of x in the specified direction.""" + assert direction in [-1, +1] + + # Pad the string with \x00 chars until itemsize completion + padsize = itemsize - len(x) + if padsize > 0: + x += b"\x00" * padsize + # int.to_bytes is not available in Python < 3.2 + # xlist = [i.to_bytes(1, sys.byteorder) for i in x] + xlist = [bytes([i]) for i in x] + xlist.reverse() + i = 0 + if direction > 0: + if xlist == b"\xff" * itemsize: + # Maximum value, return this + return b"".join(xlist) + for xchar in xlist: + if ord(xchar) < 0xFF: + xlist[i] = chr(ord(xchar) + 1).encode("ascii") + break + else: + xlist[i] = b"\x00" + i += 1 + else: + if xlist == b"\x00" * itemsize: + # Minimum value, return this + return b"".join(xlist) + for xchar in xlist: + if ord(xchar) > 0x00: + xlist[i] = chr(ord(xchar) - 1).encode("ascii") + break + else: + xlist[i] = b"\xff" + i += 1 + xlist.reverse() + return b"".join(xlist) + + +def int_type_next_after( + x: float | int, direction: Literal[-1, 1], itemsize: int +) -> int: + """Return the next neighbor of x in the specified direction.""" + assert direction in [-1, +1] + + # x is guaranteed to be either an int or a float + if direction < 0: + if isinstance(x, int): + return x - 1 + else: + # return int(PyNextAfter(x, x - 1)) + return int(np.nextafter(x, x - 1)) + else: + if isinstance(x, int): + return x + 1 + else: + # return int(PyNextAfter(x,x + 1)) + 1 + return int(np.nextafter(x, x + 1)) + 1 + + +def bool_type_next_after( + x: bool, direction: Literal[-1, 1], itemsize: int +) -> bool: + """Return the next representable neighbor of x in the specified direction.""" + assert direction in [-1, +1] + + # x is guaranteed to be either a boolean + if direction < 0: + return False + else: + return True + + +def nextafter( + x: bool | bytes | float | int, + direction: Literal[-1, 0, 1], + dtype: np.dtype, + itemsize: int, +) -> bool | bytes | int | float: + """Return the next representable neighbor of x in the specified direction.""" + assert direction in [-1, 0, +1] + assert dtype.kind == "S" or type(x) in (bool, float, int) + + if direction == 0: + return x + + if dtype.kind == "S": + return string_next_after(x, direction, itemsize) + + if dtype.kind in ["b"]: + return bool_type_next_after(x, direction, itemsize) + elif dtype.kind in ["i", "u"]: + return int_type_next_after(x, direction, itemsize) + elif dtype.kind == "f": + if direction < 0: + return np.nextafter(x, x - 1) + else: + return np.nextafter(x, x + 1) + + # elif dtype.name == "float32": + # if direction < 0: + # return PyNextAfterF(x,x-1) + # else: + # return PyNextAfterF(x,x + 1) + # elif dtype.name == "float64": + # if direction < 0: + # return PyNextAfter(x,x-1) + # else: + # return PyNextAfter(x,x + 1) + + raise TypeError("data type ``%s`` is not supported" % dtype) diff --git a/venv/Lib/site-packages/tables/index.py b/venv/Lib/site-packages/tables/index.py new file mode 100644 index 0000000..e285716 --- /dev/null +++ b/venv/Lib/site-packages/tables/index.py @@ -0,0 +1,2551 @@ +"""Here is defined the Index class.""" + +from __future__ import annotations + +import os +import sys +import math +import operator +import tempfile +import warnings +from time import perf_counter as clock +from time import process_time as cpuclock +from typing import Literal, TYPE_CHECKING +from pathlib import Path + +import numpy as np +import numpy.typing as npt + +from . import indexesextension +from .atom import UIntAtom, Atom +from .leaf import Filters +from .node import NotLoggedMixin +from .path import join_path +from .group import Group +from .utils import is_idx, idx2long, lazyattr +from .carray import CArray +from .earray import EArray +from .indexes import CacheArray, LastRowArray, IndexArray +from .idxutils import ( + calc_chunksize, + calcoptlevels, + get_reduction_level, + nextafter, + inftype, +) +from .exceptions import PerformanceWarning +from .utilsextension import ( + nan_aware_gt, + nan_aware_ge, + nan_aware_lt, + nan_aware_le, + bisect_left, + bisect_right, +) +from .lrucacheextension import ObjectCache + +# default version for INDEX objects +# obversion = "1.0" # Version of indexes in PyTables 1.x series +# obversion = "2.0" # Version of indexes in PyTables Pro 2.0 series +obversion = "2.1" # Version of indexes in PyTables Pro 2.1 and up series, +# # including the join 2.3 Std + Pro version + +debug = False +# debug = True # Uncomment this for printing sizes purposes +profile = False +# profile = True # Uncomment for profiling +if profile: + from .utils import show_stats + +if TYPE_CHECKING: + from .array import Array + from .group import RootGroup + from .table import Cols, Table + +# The default method for sorting +# defsort = "quicksort" +# Changing to mergesort to fix #441 +defsort = "mergesort" + +# Default policy for automatically updating indexes after a table +# append operation, or automatically reindexing after an +# index-invalidating operation like removing or modifying table rows. +default_auto_index = True +# Keep in sync with ``Table.autoindex`` docstring. + +# Default filters used to compress indexes. This is quite fast and +# compression is pretty good. +# Remember to keep these defaults in sync with the docstrings and UG. +default_index_filters = Filters( + complevel=1, complib="zlib", shuffle=True, fletcher32=False +) + + +# The list of types for which an optimised search in cython and C has +# been implemented. Always add here the name of a new optimised type. +opt_search_types = ( + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float32", + "float64", +) + +# The upper limit for uint32 ints +max32 = 2**32 + + +def _table_column_pathname_of_index(indexpathname: str) -> tuple[str, str]: + names = indexpathname.split("/") + for i, name in enumerate(names): + if name.startswith("_i_"): + break + tablepathname = "/".join(names[:i]) + "/" + name[3:] + colpathname = "/".join(names[i + 1 :]) + return (tablepathname, colpathname) + + +class Index(NotLoggedMixin, Group, indexesextension.Index): + """Represents the index of a column in a table. + + This class is used to keep the indexing information for columns in a Table + dataset (see :ref:`TableClassDescr`). It is actually a descendant of the + Group class (see :ref:`GroupClassDescr`), with some added functionality. An + Index is always associated with one and only one column in the table. + + .. note:: + + This class is mainly intended for internal use, but some of its + documented attributes and methods may be interesting for the + programmer. + + Parameters + ---------- + parentnode + The parent :class:`Group` object. + + .. versionchanged:: 3.0 + Renamed from *parentNode* to *parentnode*. + + name : str + The name of this node in its parent group. + atom : Atom + An Atom object representing the shape and type of the atomic objects to + be saved. Only scalar atoms are supported. + title + Sets a TITLE attribute of the Index entity. + kind + The desired kind for this index. The 'full' kind specifies a complete + track of the row position (64-bit), while the 'medium', 'light' or + 'ultralight' kinds only specify in which chunk the row is (using + 32-bit, 16-bit and 8-bit respectively). + optlevel + The desired optimization level for this index. + filters : Filters + An instance of the Filters class that provides information about the + desired I/O filters to be applied during the life of this object. + tmp_dir + The directory for the temporary files. + expectedrows + Represents an user estimate about the number of row slices that will be + added to the growable dimension in the IndexArray object. + byteorder + The byteorder of the index datasets *on-disk*. + blocksizes + The four main sizes of the compound blocks in index datasets (a low + level parameter). + new + Whether this Index is new or has to be read from disk. + + """ + + _c_classid = "INDEX" + + @property + def kind(self) -> Literal["ultralight", "light", "medium", "full"]: + """Index kind.""" + return {1: "ultralight", 2: "light", 4: "medium", 8: "full"}[ + self.indsize + ] + + @property + def filters(self) -> Filters: + """Filter properties for this index. + + See Filters in :ref:`FiltersClassDescr`. + """ + return self._v_filters + + @property + def dirty(self) -> bool: + """Whether the index is dirty or not. + + Dirty indexes are out of sync with column data, so they exist but they + are not usable. + """ + # If there is no ``DIRTY`` attribute, index should be clean. + return getattr(self._v_attrs, "DIRTY", False) + + @dirty.setter + def dirty(self, dirty: bool) -> None: + wasdirty, isdirty = self.dirty, bool(dirty) + self._v_attrs.DIRTY = dirty + # If an *actual* change in dirtiness happens, + # notify the condition cache by setting or removing a nail. + conditioncache = self.table._condition_cache + if not wasdirty and isdirty: + conditioncache.nail() + if wasdirty and not isdirty: + conditioncache.unnail() + + @property + def column(self) -> Cols: + """Column instance for the indexed column. + + See :ref:`ColumnClassDescr`. + """ + tablepath, columnpath = _table_column_pathname_of_index( + self._v_pathname + ) + table = self._v_file._get_node(tablepath) + column = table.cols._g_col(columnpath) + return column + + @property + def table(self) -> Table: + """Accessor for the `Table` object of this index.""" + tablepath, columnpath = _table_column_pathname_of_index( + self._v_pathname + ) + table = self._v_file._get_node(tablepath) + return table + + @property + def nblockssuperblock(self) -> int: + """Property providing the number of blocks in a superblock.""" + return self.superblocksize // self.blocksize + + @property + def nslicesblock(self) -> int: + """Property providing the number of slices in a block.""" + return self.blocksize // self.slicesize + + @property + def nchunkslice(self) -> int: + """Property providing the number of chunks in a slice.""" + return self.slicesize // self.chunksize + + @property + def nsuperblocks(self) -> int: + """Total number of superblocks in index.""" + # Last row should not be considered as a superblock + nelements = self.nelements - self.nelementsILR + nblocks = nelements // self.superblocksize + if nelements % self.blocksize > 0: + nblocks += 1 + return nblocks + + @property + def nblocks(self) -> int: + """Total number of blocks in index.""" + # Last row should not be considered as a block + nelements = self.nelements - self.nelementsILR + nblocks = nelements // self.blocksize + if nelements % self.blocksize > 0: + nblocks += 1 + return nblocks + + @property + def nslices(self) -> int: + """Return the number of complete slices in index.""" + return self.nelements // self.slicesize + + @property + def nchunks(self) -> int: + """Return the number of complete chunks in index.""" + return self.nelements // self.chunksize + + @property + def shape(self) -> tuple[int, int]: + """Shape of this index (in slices and elements).""" + return self.nrows, self.slicesize + + @property + def temp_required(self) -> bool: + """Whether a temporary file for indexes is required or not.""" + return ( + self.indsize > 1 + and self.optlevel > 0 + and self.table.nrows > self.slicesize + ) + + @property + def want_complete_sort(self) -> bool: + """Whether we should try to build a completely sorted index or not.""" + return self.indsize == 8 and self.optlevel == 9 + + @property + def is_csi(self) -> bool: + """Whether the index is completely sorted or not. + + .. versionchanged:: 3.0 + The *is_CSI* property has been renamed into *is_csi*. + + """ + if self.nelements == 0: + # An index with 0 indexed elements is not a CSI one (by definition) + return False + if self.indsize < 8: + # An index that is not full cannot be completely sorted + return False + # Try with the 'is_csi' attribute + if "is_csi" in self._v_attrs: + return self._v_attrs.is_csi + # If not, then compute the overlaps manually + # (the attribute 'is_csi' will be set there) + self.compute_overlaps(self, None, False) + return self.noverlaps == 0 + + @lazyattr + def nrowsinchunk(self) -> int: + """Return the number of rows that fits in a *table* chunk.""" + return self.table.chunkshape[0] + + @lazyattr + def lbucket(self) -> int: + """Return the length of a bucket based index type.""" + # Avoid to set a too large lbucket size (mainly useful for tests) + lbucket = min(self.nrowsinchunk, self.chunksize) + if self.indsize == 1: + # For ultra-light, we will never have to keep track of a + # bucket outside of a slice. + maxnb = 2**8 + if self.slicesize > maxnb * lbucket: + lbucket = math.ceil(self.slicesize / maxnb) + elif self.indsize == 2: + # For light, we will never have to keep track of a + # bucket outside of a block. + maxnb = 2**16 + if self.blocksize > maxnb * lbucket: + lbucket = math.ceil(self.blocksize / maxnb) + else: + # For medium and full indexes there should not be a need to + # increase lbucket + pass + return lbucket + + def __init__( + self, + parentnode: Group, + name: str, + atom: Atom | None = None, + title: str = "", + kind: Literal["ultralight", "light", "medium", "full"] | None = None, + optlevel: int | None = None, + filters: Filters | None = None, + tmp_dir: str | None = None, + expectedrows: int = 0, + byteorder: str | None = None, + blocksizes: tuple[int, int, int, int] | None = None, + new: bool = True, + ) -> None: + + self._v_version: str | None = None + """The object version of this index.""" + self.optlevel = optlevel + """The optimization level for this index.""" + self.tmp_dir = tmp_dir + """The directory for the temporary files.""" + self.expectedrows = int(expectedrows) + """The expected number of items of index arrays.""" + if byteorder in ["little", "big"]: + self.byteorder = byteorder + else: + self.byteorder = sys.byteorder + """The byteorder of the index datasets.""" + if atom is not None: + self.dtype = atom.dtype.base + self.type = atom.type + """The datatypes to be stored by the sorted index array.""" + # ############## Important note ########################### + # The datatypes saved as index values are NumPy native + # types, so we get rid of type metainfo like Time* or Enum* + # that belongs to HDF5 types (actually, this metainfo is + # not needed for sorting and looking-up purposes). + # ######################################################### + indsize = {"ultralight": 1, "light": 2, "medium": 4, "full": 8}[ + kind + ] + assert indsize in (1, 2, 4, 8), "indsize should be 1, 2, 4 or 8!" + self.indsize = indsize + """The itemsize for the indices part of the index.""" + + self.nrows: int | None = None + """The total number of slices in the index.""" + self.nelements: int | None = None + """The number of currently indexed rows for this column.""" + self.blocksizes = blocksizes + """The four main sizes of the compound blocks (if specified).""" + self.dirtycache = True + """Dirty cache (for ranges, bounds & sorted) flag.""" + self.superblocksize: int | None = None + """Size of the superblock for this index.""" + self.blocksize: int | None = None + """Size of the block for this index.""" + self.slicesize: int | None = None + """Size of the slice for this index.""" + self.chunksize: int | None = None + """Size of the chunk for this index.""" + self.tmpfilename: str | None = None + """Filename for temporary bounds.""" + self.opt_search_types = opt_search_types + """The types for which and optimized search has been implemented.""" + self.noverlaps = -1 + """The number of overlaps in an index. 0 means a completely + sorted index. -1 means that this number is not computed yet.""" + self.tprof = 0 + """Time counter for benchmarking purposes.""" + + from .file import open_file + + self._openFile = open_file + """The `open_file()` function, to avoid a circular import.""" + + super().__init__(parentnode, name, title, new, filters) + + def _g_post_init_hook(self) -> None: + if self._v_new: + # The version for newly created indexes + self._v_version = obversion + super()._g_post_init_hook() + + # Index arrays must only be created for new indexes + if not self._v_new: + idxversion = self._v_version + # Set-up some variables from info on disk and return + attrs = self._v_attrs + # Coerce NumPy scalars to Python scalars in order + # to avoid undesired upcasting operations. + self.superblocksize = int(attrs.superblocksize) + self.blocksize = int(attrs.blocksize) + self.slicesize = int(attrs.slicesize) + self.chunksize = int(attrs.chunksize) + self.blocksizes = ( + self.superblocksize, + self.blocksize, + self.slicesize, + self.chunksize, + ) + self.optlevel = int(attrs.optlevel) + sorted_ = self.sorted + indices = self.indices + self.dtype = sorted_.atom.dtype + self.type = sorted_.atom.type + self.indsize = indices.atom.itemsize + # Some sanity checks for slicesize, chunksize and indsize + assert self.slicesize == indices.shape[1], "Wrong slicesize" + assert ( + self.chunksize == indices._v_chunkshape[1] + ), "Wrong chunksize" + assert self.indsize in (1, 2, 4, 8), "Wrong indices itemsize" + if idxversion > "2.0": + self.reduction = int(attrs.reduction) + nelements_slr = int(self.sortedLR.attrs.nelements) + nelements_ilr = int(self.indicesLR.attrs.nelements) + else: + self.reduction = 1 + nelements_ilr = self.indicesLR[-1] + nelements_slr = nelements_ilr + self.nrows = sorted_.nrows + self.nelements = self.nrows * self.slicesize + nelements_ilr + self.nelementsSLR = nelements_slr + self.nelementsILR = nelements_ilr + if nelements_ilr > 0: + self.nrows += 1 + # Get the bounds as a cache (this has to remain here!) + rchunksize = self.chunksize // self.reduction + nbounds_lr = (nelements_slr - 1) // rchunksize + if nbounds_lr < 0: + nbounds_lr = 0 # correction for -1 bounds + nbounds_lr += 2 # bounds + begin + end + # All bounds values (+begin + end) are at the end of sortedLR + self.bebounds = self.sortedLR[ + nelements_slr : nelements_slr + nbounds_lr + ] + return + + # The index is new. Initialize the values + self.nrows = 0 + self.nelements = 0 + self.nelementsSLR = 0 + self.nelementsILR = 0 + + # The atom + atom = Atom.from_dtype(self.dtype) + + # The filters + filters = self.filters + + # Compute the superblocksize, blocksize, slicesize and chunksize values + # (in case these parameters haven't been passed to the constructor) + if self.blocksizes is None: + self.blocksizes = calc_chunksize( + self.expectedrows, self.optlevel, self.indsize, node=self + ) + ( + self.superblocksize, + self.blocksize, + self.slicesize, + self.chunksize, + ) = self.blocksizes + if debug: + print("blocksizes:", self.blocksizes) + # Compute the reduction level + self.reduction = get_reduction_level( + self.indsize, self.optlevel, self.slicesize, self.chunksize + ) + rchunksize = self.chunksize // self.reduction + rslicesize = self.slicesize // self.reduction + + # Save them on disk as attributes + self._v_attrs.superblocksize = np.uint64(self.superblocksize) + self._v_attrs.blocksize = np.uint64(self.blocksize) + self._v_attrs.slicesize = np.uint32(self.slicesize) + self._v_attrs.chunksize = np.uint32(self.chunksize) + # Save the optlevel as well + self._v_attrs.optlevel = self.optlevel + # Save the reduction level + self._v_attrs.reduction = self.reduction + + # Create the IndexArray for sorted values + sorted_ = IndexArray( + self, "sorted", atom, "Sorted Values", filters, self.byteorder + ) + + # Create the IndexArray for index values + IndexArray( + self, + "indices", + UIntAtom(itemsize=self.indsize), + "Number of chunk in table", + filters, + self.byteorder, + ) + + # Create the cache for range values (1st order cache) + CacheArray( + self, + "ranges", + atom, + (0, 2), + "Range Values", + filters, + self.expectedrows // self.slicesize, + byteorder=self.byteorder, + ) + # median ranges + EArray( + self, + "mranges", + atom, + (0,), + "Median ranges", + filters, + byteorder=self.byteorder, + _log=False, + ) + + # Create the cache for boundary values (2nd order cache) + nbounds_inslice = (rslicesize - 1) // rchunksize + CacheArray( + self, + "bounds", + atom, + (0, nbounds_inslice), + "Boundary Values", + filters, + self.nchunks, + (1, nbounds_inslice), + byteorder=self.byteorder, + ) + + # begin, end & median bounds (only for numerical types) + EArray( + self, + "abounds", + atom, + (0,), + "Start bounds", + filters, + byteorder=self.byteorder, + _log=False, + ) + EArray( + self, + "zbounds", + atom, + (0,), + "End bounds", + filters, + byteorder=self.byteorder, + _log=False, + ) + EArray( + self, + "mbounds", + atom, + (0,), + "Median bounds", + filters, + byteorder=self.byteorder, + _log=False, + ) + + # Create the Array for last (sorted) row values + bounds + shape = (rslicesize + 2 + nbounds_inslice,) + sorted_lr = LastRowArray( + self, + "sortedLR", + atom, + shape, + "Last Row sorted values + bounds", + filters, + (rchunksize,), + byteorder=self.byteorder, + ) + + # Create the Array for the number of chunk in last row + shape = (self.slicesize,) # enough for indexes and length + indices_lr = LastRowArray( + self, + "indicesLR", + UIntAtom(itemsize=self.indsize), + shape, + "Last Row indices", + filters, + (self.chunksize,), + byteorder=self.byteorder, + ) + + # The number of elements in LR will be initialized here + sorted_lr.attrs.nelements = 0 + indices_lr.attrs.nelements = 0 + + # All bounds values (+begin + end) are uninitialized in creation time + self.bebounds = None + + # The starts and lengths initialization + self.starts = np.empty(shape=self.nrows, dtype=np.int32) + """Where the values fulfiling conditions starts for every slice.""" + self.lengths = np.empty(shape=self.nrows, dtype=np.int32) + """Lengths of the values fulfilling conditions for every slice.""" + + # Finally, create a temporary file for indexes if needed + if self.temp_required: + self.create_temp() + + def initial_append( + self, xarr: list[np.ndarray], nrow: int, reduction: int + ) -> tuple[int, np.ndarray, np.ndarray]: + """Compute an initial indices arrays for data to be indexed.""" + if profile: + tref = clock() + if profile: + show_stats("Entering initial_append", tref) + arr = xarr.pop() + indsize = self.indsize + slicesize = self.slicesize + nelements_ilr = self.nelementsILR + if profile: + show_stats("Before creating idx", tref) + if indsize == 8: + # Casting to int "nrow * slicesize" fixes #1185 + idx = np.arange(0, len(arr), dtype="uint64") + int( + nrow * slicesize + ) + elif indsize == 4: + # For medium (32-bit) all the rows in tables should be + # directly reachable. But as len(arr) < 2**31, we can + # choose uint32 for representing indices. In this way, we + # consume far less memory during the keysort process. The + # offset will be added in self.final_idx32() later on. + # + # This optimization also prevents the values in LR to + # participate in the ``swap_chunks`` process, and this is + # the main reason to not allow the medium indexes to create + # completely sorted indexes. However, I don't find this to + # be a big limitation, as probably fully indexes are much + # more suitable for producing completely sorted indexes + # because in this case the indices part is usable for + # getting the reverse indices of the index, and I forsee + # this to be a common requirement in many operations (for + # example, in table sorts). + # + # F. Alted 2008-09-15 + idx = np.arange(0, len(arr), dtype="uint32") + else: + idx = np.empty(len(arr), "uint%d" % (indsize * 8)) + lbucket = self.lbucket + # Fill the idx with the bucket indices + offset = int(lbucket - ((nrow * (slicesize % lbucket)) % lbucket)) + idx[0:offset] = 0 + for i in range(offset, slicesize, lbucket): + idx[i : i + lbucket] = (i + lbucket - 1) // lbucket + if indsize == 2: + # Add a second offset in this case + # First normalize the number of rows + offset2 = (nrow % self.nslicesblock) * slicesize // lbucket + assert offset2 < 2 ** (indsize * 8) + idx += np.asarray(offset2).astype(idx.dtype) + # Add the last row at the beginning of arr & idx (if needed) + if indsize == 8 and nelements_ilr > 0: + # It is possible that the values in LR are already sorted. + # Fetch them and override existing values in arr and idx. + assert len(arr) > nelements_ilr + self.read_slice_lr(self.sortedLR, arr[:nelements_ilr]) + self.read_slice_lr(self.indicesLR, idx[:nelements_ilr]) + # In-place sorting + if profile: + show_stats("Before keysort", tref) + indexesextension.keysort(arr, idx) + larr = arr[-1] + if reduction > 1: + # It's important to do a copy() here in order to ensure that + # sorted._append() will receive a contiguous array. + if profile: + show_stats("Before reduction", tref) + reduc = arr[::reduction].copy() + if profile: + show_stats("After reduction", tref) + arr = reduc + if profile: + show_stats("After arr <-- reduc", tref) + # A completely sorted index is no longer possible after an + # append of an index with already one slice. + if nrow > 0: + self._v_attrs.is_csi = False + if profile: + show_stats("Exiting initial_append", tref) + return larr, arr, idx + + def final_idx32(self, idx: np.ndarray, offset: int) -> np.ndarray: + """Perform final operations in 32-bit indices.""" + if profile: + tref = clock() + if profile: + show_stats("Entering final_idx32", tref) + # Do an upcast first in order to add the offset. + idx = idx.astype("uint64") + idx += np.asarray(offset).astype(idx.dtype) + # The next partition is valid up to table sizes of + # 2**30 * 2**18 = 2**48 bytes, that is, 256 Tera-elements, + # which should be a safe figure, at least for a while. + idx //= np.asarray(self.lbucket).astype(idx.dtype) + # After the division, we can downsize the indexes to 'uint32' + idx = idx.astype("uint32") + if profile: + show_stats("Exiting final_idx32", tref) + return idx + + def append(self, xarr: list[np.ndarray], update: bool = False) -> None: + """Append the array to the index objects.""" + if profile: + tref = clock() + if profile: + show_stats("Entering append", tref) + if not update and self.temp_required: + where = self.tmp + # The reduction will take place *after* the optimization process + reduction = 1 + else: + where = self + reduction = self.reduction + sorted_ = where.sorted + indices = where.indices + ranges = where.ranges + mranges = where.mranges + bounds = where.bounds + mbounds = where.mbounds + abounds = where.abounds + zbounds = where.zbounds + sorted_lr = where.sortedLR + indices_lr = where.indicesLR + nrows = sorted_.nrows # before sorted.append() + larr, arr, idx = self.initial_append(xarr, nrows, reduction) + # Save the sorted array + sorted_.append(arr.reshape(1, arr.size)) + cs = self.chunksize // reduction + ncs = self.nchunkslice + # Save ranges & bounds + ranges.append([[arr[0], larr]]) + bounds.append([arr[cs::cs]]) + abounds.append(arr[0::cs]) + zbounds.append(arr[cs - 1 :: cs]) + # Compute the medians + smedian = arr[cs // 2 :: cs] + mbounds.append(smedian) + mranges.append([smedian[ncs // 2]]) + if profile: + show_stats("Before deleting arr & smedian", tref) + del arr, smedian # delete references + if profile: + show_stats("After deleting arr & smedian", tref) + # Now that arr is gone, we can upcast the indices and add the offset + if self.indsize == 4: + idx = self.final_idx32(idx, nrows * self.slicesize) + indices.append(idx.reshape(1, idx.size)) + if profile: + show_stats("Before deleting idx", tref) + del idx + # Update counters after a successful append + self.nrows = nrows + 1 + self.nelements = self.nrows * self.slicesize + self.nelementsSLR = 0 # reset the counter of the last row index to 0 + self.nelementsILR = 0 # reset the counter of the last row index to 0 + # The number of elements will be saved as an attribute. + # This is necessary in case the LR arrays can remember its values + # after a possible node preemtion/reload. + sorted_lr.attrs.nelements = self.nelementsSLR + indices_lr.attrs.nelements = self.nelementsILR + self.dirtycache = True # the cache is dirty now + if profile: + show_stats("Exiting append", tref) + + def append_last_row( + self, xarr: list[np.ndarray], update: bool = False + ) -> None: + """Append the array to the last row index objects.""" + if profile: + tref = clock() + if profile: + show_stats("Entering appendLR", tref) + # compute the elements in the last row sorted & bounds array + nrows = self.nslices + if not update and self.temp_required: + where = self.tmp + # The reduction will take place *after* the optimization process + reduction = 1 + else: + where = self + reduction = self.reduction + indices_lr = where.indicesLR + sorted_lr = where.sortedLR + larr, arr, idx = self.initial_append(xarr, nrows, reduction) + nelements_slr = len(arr) + nelements_ilr = len(idx) + # Build the cache of bounds + rchunksize = self.chunksize // reduction + self.bebounds = np.concatenate((arr[::rchunksize], [larr])) + # The number of elements will be saved as an attribute + sorted_lr.attrs.nelements = nelements_slr + indices_lr.attrs.nelements = nelements_ilr + # Save the number of elements, bounds and sorted values + # at the end of the sorted array + offset2 = len(self.bebounds) + sorted_lr[nelements_slr : nelements_slr + offset2] = self.bebounds + sorted_lr[:nelements_slr] = arr + del arr + # Now that arr is gone, we can upcast the indices and add the offset + if self.indsize == 4: + idx = self.final_idx32(idx, nrows * self.slicesize) + # Save the reverse index array + indices_lr[: len(idx)] = idx + del idx + # Update counters after a successful append + self.nrows = nrows + 1 + self.nelements = nrows * self.slicesize + nelements_ilr + self.nelementsILR = nelements_ilr + self.nelementsSLR = nelements_slr + self.dirtycache = True # the cache is dirty now + if profile: + show_stats("Exiting appendLR", tref) + + def optimize(self, verbose: bool = False) -> None: + """Optimize an index so as to allow faster searches. + + verbose + If True, messages about the progress of the + optimization process are printed out. + + """ + if not self.temp_required: + return + + if verbose: + self.verbose = True + else: + self.verbose = debug + + # Initialize last_tover and last_nover + self.last_tover = 0 + self.last_nover = 0 + + # Compute the correct optimizations for current optim level + opts = calcoptlevels(self.nblocks, self.optlevel, self.indsize) + optmedian, optstarts, optstops, optfull = opts + + if debug: + print("optvalues:", opts) + + self.create_temp2() + # Start the optimization process + while True: + if optfull: + for niter in range(optfull): + if self.swap("chunks", "median"): + break + if self.nblocks > 1: + # Swap slices only in the case that we have + # several blocks + if self.swap("slices", "median"): + break + if self.swap("chunks", "median"): + break + if self.swap("chunks", "start"): + break + if self.swap("chunks", "stop"): + break + else: + if optmedian: + if self.swap("chunks", "median"): + break + if optstarts: + if self.swap("chunks", "start"): + break + if optstops: + if self.swap("chunks", "stop"): + break + break # If we reach this, exit the loop + + # Check if we require a complete sort. Important: this step + # should be carried out *after* the optimization process has + # been completed (this is to guarantee that the complete sort + # does not take too much memory). + if self.want_complete_sort: + if self.noverlaps > 0: + self.do_complete_sort() + # Check that we have effectively achieved the complete sort + if self.noverlaps > 0: + warnings.warn( + "OPSI was not able to achieve a completely sorted index." + " Please report this to the authors.", + UserWarning, + ) + + # Close and delete the temporal optimization index file + self.cleanup_temp() + return + + def do_complete_sort(self) -> None: + """Bring an already optimized index into a complete sorted state.""" + if self.verbose: + t1 = clock() + c1 = cpuclock() + ss = self.slicesize + tmp = self.tmp + ranges = tmp.ranges[:] + nslices = self.nslices + + nelements_lr = self.nelementsILR + if nelements_lr > 0: + # Add the ranges corresponding to the last row + rangeslr = np.array([self.bebounds[0], self.bebounds[-1]]) + ranges = np.concatenate((ranges, [rangeslr])) + nslices += 1 + + sorted_ = tmp.sorted + indices = tmp.indices + sorted_lr = tmp.sortedLR + indices_lr = tmp.indicesLR + sremain = np.array([], dtype=self.dtype) + iremain = np.array([], dtype="u%d" % self.indsize) + starts = np.zeros(shape=nslices, dtype=np.int_) + for i in range(nslices): + # Find the overlapping elements for slice i + sover = np.array([], dtype=self.dtype) + iover = np.array([], dtype="u%d" % self.indsize) + prev_end = ranges[i, 1] + for j in range(i + 1, nslices): + stj = starts[j] + if (j < self.nslices and stj == ss) or ( + j == self.nslices and stj == nelements_lr + ): + # This slice has been already dealt with + continue + if j < self.nslices: + assert ( + stj < ss + ), "Two slices cannot overlap completely at this stage!" + next_beg = sorted_[j, stj] + else: + assert ( + stj < nelements_lr + ), "Two slices cannot overlap completely at this stage!" + next_beg = sorted_lr[stj] + next_end = ranges[j, 1] + if prev_end > next_end: + # Complete overlapping case + if j < self.nslices: + sover = np.concatenate((sover, sorted_[j, stj:])) + iover = np.concatenate((iover, indices[j, stj:])) + starts[j] = ss + else: + n = nelements_lr + sover = np.concatenate((sover, sorted_lr[stj:n])) + iover = np.concatenate((iover, indices_lr[stj:n])) + starts[j] = nelements_lr + elif prev_end > next_beg: + idx = self.search_item_lt(tmp, prev_end, j, ranges[j], stj) + if j < self.nslices: + sover = np.concatenate((sover, sorted_[j, stj:idx])) + iover = np.concatenate((iover, indices[j, stj:idx])) + else: + sover = np.concatenate((sover, sorted_lr[stj:idx])) + iover = np.concatenate((iover, indices_lr[stj:idx])) + starts[j] = idx + # Build the extended slices to sort out + if i < self.nslices: + ssorted = np.concatenate( + (sremain, sorted_[i, starts[i] :], sover) + ) + sindices = np.concatenate( + (iremain, indices[i, starts[i] :], iover) + ) + else: + ssorted = np.concatenate( + (sremain, sorted_lr[starts[i] : nelements_lr], sover) + ) + sindices = np.concatenate( + (iremain, indices_lr[starts[i] : nelements_lr], iover) + ) + # Sort the extended slices + indexesextension.keysort(ssorted, sindices) + # Save the first elements of extended slices in the slice i + if i < self.nslices: + sorted_[i] = ssorted[:ss] + indices[i] = sindices[:ss] + # Update caches for this slice + self.update_caches(i, ssorted[:ss]) + # Save the remaining values in a separate array + send = len(sover) + len(sremain) + sremain = ssorted[ss : ss + send] + iremain = sindices[ss : ss + send] + else: + # Still some elements remain for the last row + n = len(ssorted) + assert n == nelements_lr + send = 0 + sorted_lr[:n] = ssorted + indices_lr[:n] = sindices + # Update the caches for last row + sortedlr = sorted_lr[:nelements_lr] + bebounds = np.concatenate( + (sortedlr[:: self.chunksize], [sortedlr[-1]]) + ) + sorted_lr[nelements_lr : nelements_lr + len(bebounds)] = ( + bebounds + ) + self.bebounds = bebounds + + # Verify that we have dealt with all the remaining values + assert send == 0 + + # Compute the overlaps in order to verify that we have achieved + # a complete sort. This has to be executed always (and not only + # in verbose mode!). + self.compute_overlaps(self.tmp, "do_complete_sort()", self.verbose) + if self.verbose: + print(f"time: {clock() - t1:.4f}. clock: {cpuclock() - c1:.4f}") + + def swap( + self, + what: Literal["chunks", "slices"], + mode: Literal["start", "stop", "median"] | None = None, + ) -> bool: + """Swap chunks or slices using a certain bounds reference.""" + # Thresholds for avoiding continuing the optimization + # thnover = 4 * self.slicesize # minimum number of overlapping + # # elements + thnover = 40 + thmult = 0.1 # minimum ratio of multiplicity (a 10%) + thtover = 0.01 # minimum overlaping index for slices (a 1%) + + if self.verbose: + t1 = clock() + c1 = cpuclock() + if what == "chunks": + self.swap_chunks(mode) + elif what == "slices": + self.swap_slices(mode) + if mode: + message = f"swap_{what}({mode})" + else: + message = f"swap_{what}" + nover, mult, tover = self.compute_overlaps( + self.tmp, message, self.verbose + ) + rmult = len(mult.nonzero()[0]) / len(mult) + if self.verbose: + print(f"time: {clock() - t1:.4f}. clock: {cpuclock() - c1:.4f}") + # Check that entropy is actually decreasing + if what == "chunks" and self.last_tover > 0 and self.last_nover > 0: + tover_var = (self.last_tover - tover) / self.last_tover + nover_var = (self.last_nover - nover) / self.last_nover + if tover_var < 0.05 and nover_var < 0.05: + # Less than a 5% of improvement is too few + return True + self.last_tover = tover + self.last_nover = nover + # Check if some threshold has met + if nover < thnover: + return True + if rmult < thmult: + return True + # Additional check for the overlap ratio + if 0 <= tover < thtover: + return True + return False + + def create_temp(self) -> None: + """Create some temporary objects for slice sorting purposes.""" + # The index will be dirty during the index optimization process + self.dirty = True + # Build the name of the temporary file + fd, self.tmpfilename = tempfile.mkstemp( + ".tmp", "pytables-", self.tmp_dir + ) + # Close the file descriptor so as to avoid leaks + os.close(fd) + # Create the proper PyTables file + self.tmpfile = self._openFile(self.tmpfilename, "w") + self.tmp = tmp = self.tmpfile.root + cs = self.chunksize + ss = self.slicesize + filters = self.filters + # temporary sorted & indices arrays + shape = (0, ss) + atom = Atom.from_dtype(self.dtype) + EArray( + tmp, + "sorted", + atom, + shape, + "Temporary sorted", + filters, + chunkshape=(1, cs), + ) + EArray( + tmp, + "indices", + UIntAtom(itemsize=self.indsize), + shape, + "Temporary indices", + filters, + chunkshape=(1, cs), + ) + # temporary bounds + nbounds_inslice = (ss - 1) // cs + shape = (0, nbounds_inslice) + EArray( + tmp, + "bounds", + atom, + shape, + "Temp chunk bounds", + filters, + chunkshape=(cs, nbounds_inslice), + ) + shape = (0,) + EArray( + tmp, + "abounds", + atom, + shape, + "Temp start bounds", + filters, + chunkshape=(cs,), + ) + EArray( + tmp, + "zbounds", + atom, + shape, + "Temp end bounds", + filters, + chunkshape=(cs,), + ) + EArray( + tmp, + "mbounds", + atom, + shape, + "Median bounds", + filters, + chunkshape=(cs,), + ) + # temporary ranges + EArray( + tmp, + "ranges", + atom, + (0, 2), + "Temporary range values", + filters, + chunkshape=(cs, 2), + ) + EArray( + tmp, + "mranges", + atom, + (0,), + "Median ranges", + filters, + chunkshape=(cs,), + ) + # temporary last row (sorted) + shape = (ss + 2 + nbounds_inslice,) + CArray( + tmp, + "sortedLR", + atom, + shape, + "Temp Last Row sorted values + bounds", + filters, + chunkshape=(cs,), + ) + # temporary last row (indices) + shape = (ss,) + CArray( + tmp, + "indicesLR", + UIntAtom(itemsize=self.indsize), + shape, + "Temp Last Row indices", + filters, + chunkshape=(cs,), + ) + + def create_temp2(self) -> None: + """Create some temporary objects for slice sorting purposes.""" + # The algorithms for doing the swap can be optimized so that + # one should be necessary to create temporaries for keeping just + # the contents of a single superblock. + # F. Alted 2007-01-03 + cs = self.chunksize + ss = self.slicesize + filters = self.filters + # temporary sorted & indices arrays + shape = (self.nslices, ss) + atom = Atom.from_dtype(self.dtype) + tmp = self.tmp + CArray( + tmp, + "sorted2", + atom, + shape, + "Temporary sorted 2", + filters, + chunkshape=(1, cs), + ) + CArray( + tmp, + "indices2", + UIntAtom(itemsize=self.indsize), + shape, + "Temporary indices 2", + filters, + chunkshape=(1, cs), + ) + # temporary bounds + nbounds_inslice = (ss - 1) // cs + shape = (self.nslices, nbounds_inslice) + CArray( + tmp, + "bounds2", + atom, + shape, + "Temp chunk bounds 2", + filters, + chunkshape=(cs, nbounds_inslice), + ) + shape = (self.nchunks,) + CArray( + tmp, + "abounds2", + atom, + shape, + "Temp start bounds 2", + filters, + chunkshape=(cs,), + ) + CArray( + tmp, + "zbounds2", + atom, + shape, + "Temp end bounds 2", + filters, + chunkshape=(cs,), + ) + CArray( + tmp, + "mbounds2", + atom, + shape, + "Median bounds 2", + filters, + chunkshape=(cs,), + ) + # temporary ranges + CArray( + tmp, + "ranges2", + atom, + (self.nslices, 2), + "Temporary range values 2", + filters, + chunkshape=(cs, 2), + ) + CArray( + tmp, + "mranges2", + atom, + (self.nslices,), + "Median ranges 2", + filters, + chunkshape=(cs,), + ) + + def cleanup_temp(self) -> None: + """Copy the data and delete the temporaries for sorting purposes.""" + if self.verbose: + print("Copying temporary data...") + # tmp -> index + reduction = self.reduction + cs = self.chunksize // reduction + ncs = self.nchunkslice + tmp = self.tmp + for i in range(self.nslices): + # Copy sorted & indices slices + sorted_ = tmp.sorted[i][::reduction].copy() + self.sorted.append(sorted_.reshape(1, sorted_.size)) + # Compute ranges + self.ranges.append([[sorted_[0], sorted_[-1]]]) + # Compute chunk bounds + self.bounds.append([sorted_[cs::cs]]) + # Compute start, stop & median bounds and ranges + self.abounds.append(sorted_[0::cs]) + self.zbounds.append(sorted_[cs - 1 :: cs]) + smedian = sorted_[cs // 2 :: cs] + self.mbounds.append(smedian) + self.mranges.append([smedian[ncs // 2]]) + del sorted_, smedian # delete references + # Now that sorted is gone, we can copy the indices + indices = tmp.indices[i] + self.indices.append(indices.reshape(1, indices.size)) + + # Now it is the last row turn (if needed) + if self.nelementsSLR > 0: + # First, the sorted values + sorted_lr = self.sortedLR + indices_lr = self.indicesLR + nelements_lr = self.nelementsILR + sortedlr = tmp.sortedLR[:nelements_lr][::reduction].copy() + nelements_slr = len(sortedlr) + sorted_lr[:nelements_slr] = sortedlr + # Now, the bounds + self.bebounds = np.concatenate((sortedlr[::cs], [sortedlr[-1]])) + offset2 = len(self.bebounds) + sorted_lr[nelements_slr : nelements_slr + offset2] = self.bebounds + # Finally, the indices + indices_lr[:] = tmp.indicesLR[:] + # Update the number of (reduced) sorted elements + self.nelementsSLR = nelements_slr + # The number of elements will be saved as an attribute + self.sortedLR.attrs.nelements = self.nelementsSLR + self.indicesLR.attrs.nelements = self.nelementsILR + + if self.verbose: + print("Deleting temporaries...") + self.tmp = None + self.tmpfile.close() + Path(self.tmpfilename).unlink() + self.tmpfilename = None + + # The optimization process has finished, and the index is ok now + self.dirty = False + # ...but the memory data cache is dirty now + self.dirtycache = True + + def get_neworder( + self, + neworder: np.ndarray, + src_disk: Array, + tmp_disk: Array, + lastrow: LastRowArray, + nslices: int, + offset: int, + dtype: npt.DTypeLike, + ) -> None: + """Get sorted & indices values in new order.""" + cs = self.chunksize + ncs = ncs2 = self.nchunkslice + self_nslices = self.nslices + tmp = np.empty(shape=self.slicesize, dtype=dtype) + for i in range(nslices): + ns = offset + i + if ns == self_nslices: + # The number of complete chunks in the last row + ncs2 = self.nelementsILR // cs + # Get slices in new order + for j in range(ncs2): + idx = neworder[i * ncs + j] + ins = idx // ncs + inc = (idx - ins * ncs) * cs + ins += offset + nc = j * cs + if ins == self_nslices: + tmp[nc : nc + cs] = lastrow[inc : inc + cs] + else: + tmp[nc : nc + cs] = src_disk[ins, inc : inc + cs] + if ns == self_nslices: + # The number of complete chunks in the last row + lastrow[: ncs2 * cs] = tmp[: ncs2 * cs] + # The elements in the last chunk of the last row will + # participate in the global reordering later on, during + # the phase of sorting of *two* slices at a time + # (including the last row slice, see + # self.reorder_slices()). The caches for last row will + # be updated in self.reorder_slices() too. + # F. Altet 2008-08-25 + else: + tmp_disk[ns] = tmp + + def swap_chunks( + self, mode: Literal["start", "stop", "median"] = "median" + ) -> None: + """Swap & reorder the different chunks in a block.""" + boundsnames = { + "start": "abounds", + "stop": "zbounds", + "median": "mbounds", + } + tmp = self.tmp + sorted_ = tmp.sorted + indices = tmp.indices + tmp_sorted = tmp.sorted2 + tmp_indices = tmp.indices2 + sorted_lr = tmp.sortedLR + indices_lr = tmp.indicesLR + cs = self.chunksize + ncs = self.nchunkslice + nsb = self.nslicesblock + ncb = ncs * nsb + ncb2 = ncb + boundsobj = tmp._f_get_child(boundsnames[mode]) + can_cross_bbounds = self.indsize == 8 and self.nelementsILR > 0 + for nblock in range(self.nblocks): + # Protection for last block having less chunks than ncb + remainingchunks = self.nchunks - nblock * ncb + if remainingchunks < ncb: + ncb2 = remainingchunks + if ncb2 <= 1: + # if only zero or one chunks remains we are done + break + nslices = ncb2 // ncs + bounds = boundsobj[nblock * ncb : nblock * ncb + ncb2] + # Do this only if lastrow elements can cross block boundaries + if nblock == self.nblocks - 1 and can_cross_bbounds: # last block + nslices += 1 + ul = self.nelementsILR // cs + bounds = np.concatenate((bounds, self.bebounds[:ul])) + sbounds_idx = bounds.argsort(kind=defsort) + offset = int(nblock * nsb) + # Swap sorted and indices following the new order + self.get_neworder( + sbounds_idx, + sorted_, + tmp_sorted, + sorted_lr, + nslices, + offset, + self.dtype, + ) + self.get_neworder( + sbounds_idx, + indices, + tmp_indices, + indices_lr, + nslices, + offset, + f"u{self.indsize}", + ) + # Reorder completely the index at slice level + self.reorder_slices(tmp=True) + + def read_slice( + self, where: Array, nslice: int, buffer: np.ndarray, start: int = 0 + ) -> None: + """Read a slice from the `where` dataset and put it in `buffer`.""" + # Create the buffers for specifying the coordinates + self.startl = np.array([nslice, start], np.uint64) + self.stopl = np.array([nslice + 1, start + buffer.size], np.uint64) + self.stepl = np.ones(shape=2, dtype=np.uint64) + where._g_read_slice(self.startl, self.stopl, self.stepl, buffer) + + def write_slice( + self, where: Array, nslice: int, buffer: np.ndarray, start: int = 0 + ) -> None: + """Write a `slice` to the `where` dataset with the `buffer` data.""" + self.startl = np.array([nslice, start], np.uint64) + self.stopl = np.array([nslice + 1, start + buffer.size], np.uint64) + self.stepl = np.ones(shape=2, dtype=np.uint64) + countl = self.stopl - self.startl # (1, self.slicesize) + where._g_write_slice(self.startl, self.stepl, countl, buffer) + + # Read version for LastRow + def read_slice_lr( + self, where: Array, buffer: np.ndarray, start: int = 0 + ) -> None: + """Read a slice from the `where` dataset and put it in `buffer`.""" + startl = np.array([start], dtype=np.uint64) + stopl = np.array([start + buffer.size], dtype=np.uint64) + stepl = np.array([1], dtype=np.uint64) + where._g_read_slice(startl, stopl, stepl, buffer) + + # Write version for LastRow + def write_slice_lr( + self, where: Array, buffer: np.ndarray, start: int = 0 + ) -> None: + """Write a slice from the `where` dataset with the `buffer` data.""" + startl = np.array([start], dtype=np.uint64) + countl = np.array([start + buffer.size], dtype=np.uint64) + stepl = np.array([1], dtype=np.uint64) + where._g_write_slice(startl, stepl, countl, buffer) + + def write_sliceLR( # noqa: N802 + self, where: Array, buffer: np.ndarray, start: int = 0 + ) -> None: + """Write a slice from the `where` dataset with the `buffer` data.""" + warnings.warn( + "'Index.write_sliceLR' is deprecated, " + "please use 'Index.write_slice_lr' instead", + DeprecationWarning, + stacklevel=2, + ) + return self.write_slice_lr(where, buffer, start) + + def reorder_slice( + self, + nslice: int, + sorted_: Array, + indices: Array, + ssorted: np.ndarray, + sindices: np.ndarray, + tmp_sorted: Array, + tmp_indices: Array, + ) -> None: + """Copy & reorder the slice in source to final destination.""" + ss = self.slicesize + # Load the second part in buffers + self.read_slice(tmp_sorted, nslice, ssorted[ss:]) + self.read_slice(tmp_indices, nslice, sindices[ss:]) + indexesextension.keysort(ssorted, sindices) + # Write the first part of the buffers to the regular leaves + self.write_slice(sorted_, nslice - 1, ssorted[:ss]) + self.write_slice(indices, nslice - 1, sindices[:ss]) + # Update caches + self.update_caches(nslice - 1, ssorted[:ss]) + # Shift the slice in the end to the beginning + ssorted[:ss] = ssorted[ss:] + sindices[:ss] = sindices[ss:] + + def update_caches(self, nslice: int, ssorted: np.ndarray) -> None: + """Update the caches for faster lookups.""" + cs = self.chunksize + ncs = self.nchunkslice + tmp = self.tmp + # update first & second cache bounds (ranges & bounds) + tmp.ranges[nslice] = ssorted[[0, -1]] + tmp.bounds[nslice] = ssorted[cs::cs] + # update start & stop bounds + tmp.abounds[nslice * ncs : (nslice + 1) * ncs] = ssorted[0::cs] + tmp.zbounds[nslice * ncs : (nslice + 1) * ncs] = ssorted[cs - 1 :: cs] + # update median bounds + smedian = ssorted[cs // 2 :: cs] + tmp.mbounds[nslice * ncs : (nslice + 1) * ncs] = smedian + tmp.mranges[nslice] = smedian[ncs // 2] + + def reorder_slices(self, tmp) -> None: + """Reorder completely the index at slice level. + + This method has to maintain the locality of elements in the + ambit of ``blocks``, i.e. an element of a ``block`` cannot be + sent to another ``block`` during this reordering. This is + *critical* for ``light`` indexes to be able to use this. + + This version of reorder_slices is optimized in that *two* + complete slices are taken at a time (including the last row + slice) so as to sort them. Then, each new slice that is read is + put at the end of this two-slice buffer, while the previous one + is moved to the beginning of the buffer. This is in order to + better reduce the entropy of the regular part (i.e. all except + the last row) of the index. + + A secondary effect of this is that it takes at least *twice* of + memory than a previous version of reorder_slices() that only + reorders on a slice-by-slice basis. However, as this is more + efficient than the old version, one can configure the slicesize + to be smaller, so the memory consumption is barely similar. + + """ + tmp = self.tmp + sorted_ = tmp.sorted + indices = tmp.indices + if tmp: + tmp_sorted = tmp.sorted2 + tmp_indices = tmp.indices2 + else: + tmp_sorted = tmp.sorted + tmp_indices = tmp.indices + cs = self.chunksize + ss = self.slicesize + nsb = self.blocksize // self.slicesize + nslices = self.nslices + nblocks = self.nblocks + nelements_lr = self.nelementsILR + # Create the buffer for reordering 2 slices at a time + ssorted = np.empty(shape=ss * 2, dtype=self.dtype) + sindices = np.empty(shape=ss * 2, dtype=np.dtype("u%d" % self.indsize)) + + if self.indsize == 8: + # Bootstrap the process for reordering + # Read the first slice in buffers + self.read_slice(tmp_sorted, 0, ssorted[:ss]) + self.read_slice(tmp_indices, 0, sindices[:ss]) + + nslice = 0 # Just in case the loop behind executes nothing + # Loop over the remaining slices in block + for nslice in range(1, sorted_.nrows): + self.reorder_slice( + nslice, + sorted_, + indices, + ssorted, + sindices, + tmp_sorted, + tmp_indices, + ) + + # End the process (enrolling the lastrow if necessary) + if nelements_lr > 0: + sorted_lr = self.tmp.sortedLR + indices_lr = self.tmp.indicesLR + # Shrink the ssorted and sindices arrays to the minimum + ssorted2 = ssorted[: ss + nelements_lr] + sortedlr = ssorted2[ss:] + sindices2 = sindices[: ss + nelements_lr] + indiceslr = sindices2[ss:] + # Read the last row info in the second part of the buffer + self.read_slice_lr(sorted_lr, sortedlr) + self.read_slice_lr(indices_lr, indiceslr) + indexesextension.keysort(ssorted2, sindices2) + # Write the second part of the buffers to the lastrow indices + self.write_slice_lr(sorted_lr, sortedlr) + self.write_slice_lr(indices_lr, indiceslr) + # Update the caches for last row + bebounds = np.concatenate((sortedlr[::cs], [sortedlr[-1]])) + sorted_lr[nelements_lr : nelements_lr + len(bebounds)] = ( + bebounds + ) + self.bebounds = bebounds + # Write the first part of the buffers to the regular leaves + self.write_slice(sorted_, nslice, ssorted[:ss]) + self.write_slice(indices, nslice, sindices[:ss]) + # Update caches for this slice + self.update_caches(nslice, ssorted[:ss]) + else: + # Iterate over each block. No data should cross block + # boundaries to avoid addressing problems with short indices. + for nb in range(nblocks): + # Bootstrap the process for reordering + # Read the first slice in buffers + nrow = nb * nsb + self.read_slice(tmp_sorted, nrow, ssorted[:ss]) + self.read_slice(tmp_indices, nrow, sindices[:ss]) + + # Loop over the remaining slices in block + lrb = nrow + nsb + if lrb > nslices: + lrb = nslices + nslice = nrow # Just in case the loop behind executes nothing + for nslice in range(nrow + 1, lrb): + self.reorder_slice( + nslice, + sorted_, + indices, + ssorted, + sindices, + tmp_sorted, + tmp_indices, + ) + + # Write the first part of the buffers to the regular leaves + self.write_slice(sorted_, nslice, ssorted[:ss]) + self.write_slice(indices, nslice, sindices[:ss]) + # Update caches for this slice + self.update_caches(nslice, ssorted[:ss]) + + def swap_slices( + self, mode: Literal["start", "stop", "median"] = "median" + ) -> None: + """Swap slices in a superblock.""" + tmp = self.tmp + sorted_ = tmp.sorted + indices = tmp.indices + tmp_sorted = tmp.sorted2 + tmp_indices = tmp.indices2 + ncs = self.nchunkslice + nss = self.superblocksize // self.slicesize + nss2 = nss + for sblock in range(self.nsuperblocks): + # Protection for last superblock having less slices than nss + remainingslices = self.nslices - sblock * nss + if remainingslices < nss: + nss2 = remainingslices + if nss2 <= 1: + break + if mode == "start": + ranges = tmp.ranges[sblock * nss : sblock * nss + nss2, 0] + elif mode == "stop": + ranges = tmp.ranges[sblock * nss : sblock * nss + nss2, 1] + elif mode == "median": + ranges = tmp.mranges[sblock * nss : sblock * nss + nss2] + sranges_idx = ranges.argsort(kind=defsort) + # Don't swap the superblock at all if one doesn't need to + ndiff = (sranges_idx != np.arange(nss2)).sum() / 2 + if ndiff * 50 < nss2: + # The number of slices to rearrange is less than 2.5%, + # so skip the reordering of this superblock + # (too expensive for such a little improvement) + if self.verbose: + print("skipping reordering of superblock ->", sblock) + continue + ns = sblock * nss2 + # Swap sorted and indices slices following the new order + for i in range(nss2): + idx = sranges_idx[i] + # Swap sorted & indices slices + oi = ns + i + oidx = ns + idx + tmp_sorted[oi] = sorted_[oidx] + tmp_indices[oi] = indices[oidx] + # Swap start, stop & median ranges + tmp.ranges2[oi] = tmp.ranges[oidx] + tmp.mranges2[oi] = tmp.mranges[oidx] + # Swap chunk bounds + tmp.bounds2[oi] = tmp.bounds[oidx] + # Swap start, stop & median bounds + j = oi * ncs + jn = (oi + 1) * ncs + xj = oidx * ncs + xjn = (oidx + 1) * ncs + tmp.abounds2[j:jn] = tmp.abounds[xj:xjn] + tmp.zbounds2[j:jn] = tmp.zbounds[xj:xjn] + tmp.mbounds2[j:jn] = tmp.mbounds[xj:xjn] + # tmp -> originals + for i in range(nss2): + # Copy sorted & indices slices + oi = ns + i + sorted_[oi] = tmp_sorted[oi] + indices[oi] = tmp_indices[oi] + # Copy start, stop & median ranges + tmp.ranges[oi] = tmp.ranges2[oi] + tmp.mranges[oi] = tmp.mranges2[oi] + # Copy chunk bounds + tmp.bounds[oi] = tmp.bounds2[oi] + # Copy start, stop & median bounds + j = oi * ncs + jn = (oi + 1) * ncs + tmp.abounds[j:jn] = tmp.abounds2[j:jn] + tmp.zbounds[j:jn] = tmp.zbounds2[j:jn] + tmp.mbounds[j:jn] = tmp.mbounds2[j:jn] + + def search_item_lt( + self, + where: RootGroup, + item: float, + nslice: int, + limits: tuple[float, float], + start: int = 0, + ) -> int: + """Search a single item in a specific sorted slice.""" + # This method will only work under the assumption that item + # *is to be found* in the nslice. + assert nan_aware_lt(limits[0], item) and nan_aware_le(item, limits[1]) + cs = self.chunksize + ss = self.slicesize + nelements_lr = self.nelementsILR + bstart = start // cs + + # Find the chunk + if nslice < self.nslices: + nchunk = bisect_left(where.bounds[nslice], item, bstart) + else: + # We need to subtract 1 chunk here because bebounds + # has a leading value + nchunk = bisect_left(self.bebounds, item, bstart) - 1 + assert nchunk >= 0 + + # Find the element in chunk + pos = nchunk * cs + if nslice < self.nslices: + pos += bisect_left(where.sorted[nslice, pos : pos + cs], item) + assert pos <= ss + else: + end = pos + cs + if end > nelements_lr: + end = nelements_lr + pos += bisect_left(self.sortedLR[pos:end], item) + assert pos <= nelements_lr + assert pos > 0 + return pos + + def compute_overlaps_finegrain( + self, where: RootGroup, message: str, verbose: bool + ) -> tuple[int, np.ndarray, float]: + """Compute some statistics about overlaping of slices in index. + + Returns + ------- + noverlaps : int + The total number of elements that overlaps in index. + multiplicity : array of int + The number of times that a concrete slice overlaps with any other. + toverlap : float + An ovelap index: the sum of the values in segment slices that + overlaps divided by the entire range of values. This index is only + computed for numerical types. + + """ + ss = self.slicesize + ranges = where.ranges[:] + sorted_ = where.sorted + sorted_lr = where.sortedLR + nslices = self.nslices + nelements_lr = self.nelementsILR + if nelements_lr > 0: + # Add the ranges corresponding to the last row + rangeslr = np.array([self.bebounds[0], self.bebounds[-1]]) + ranges = np.concatenate((ranges, [rangeslr])) + nslices += 1 + soverlap = 0 + toverlap = -1 + multiplicity = np.zeros(shape=nslices, dtype="int_") + overlaps = multiplicity.copy() + starts = multiplicity.copy() + for i in range(nslices): + prev_end = ranges[i, 1] + for j in range(i + 1, nslices): + stj = starts[j] + assert stj <= ss + if stj == ss: + # This slice has already been counted + continue + if j < self.nslices: + next_beg = sorted_[j, stj] + else: + next_beg = sorted_lr[stj] + next_end = ranges[j, 1] + if prev_end > next_end: + # Complete overlapping case + multiplicity[j - i] += 1 + if j < self.nslices: + overlaps[i] += ss - stj + starts[j] = ss # a sentinel + else: + overlaps[i] += nelements_lr - stj + starts[j] = nelements_lr # a sentinel + elif prev_end > next_beg: + multiplicity[j - i] += 1 + idx = self.search_item_lt( + where, prev_end, j, ranges[j], stj + ) + nelem = idx - stj + overlaps[i] += nelem + starts[j] = idx + if self.type != "string": + # Convert ranges into floats in order to allow + # doing operations with them without overflows + soverlap += float(ranges[i, 1]) - float(ranges[j, 0]) + + # Return the overlap as the ratio between overlaps and entire range + if self.type != "string": + erange = float(ranges[-1, 1]) - float(ranges[0, 0]) + # Check that there is an effective range of values + # Beware, erange can be negative in situations where + # the values are suffering overflow. This can happen + # specially on big signed integer values (on overflows, + # the end value will become negative!). + # Also, there is no way to compute overlap ratios for + # non-numerical types. So, be careful and always check + # that toverlap has a positive value (it must have been + # initialized to -1. before) before using it. + # F. Alted 2007-01-19 + if erange > 0: + toverlap = soverlap / erange + if verbose and message != "init": + print("toverlap (%s):" % message, toverlap) + print("multiplicity:\n", multiplicity, multiplicity.sum()) + print("overlaps:\n", overlaps, overlaps.sum()) + noverlaps = overlaps.sum() + # For full indexes, set the 'is_csi' flag + if self.indsize == 8 and self._v_file._iswritable(): + self._v_attrs.is_csi = noverlaps == 0 + # Save the number of overlaps for future references + self.noverlaps = noverlaps + return (noverlaps, multiplicity, toverlap) + + def compute_overlaps( + self, where: RootGroup, message: str, verbose: bool + ) -> tuple[int, np.ndarray, float]: + """Compute some statistics about overlaping of slices in index. + + Returns + ------- + noverlaps : int + The total number of slices that overlaps in index. + multiplicity : array of int + The number of times that a concrete slice overlaps with any other. + toverlap : float + An ovelap index: the sum of the values in segment slices that + overlaps divided by the entire range of values. This index is only + computed for numerical types. + + """ + ranges = where.ranges[:] + nslices = self.nslices + if self.nelementsILR > 0: + # Add the ranges corresponding to the last row + rangeslr = np.array([self.bebounds[0], self.bebounds[-1]]) + ranges = np.concatenate((ranges, [rangeslr])) + nslices += 1 + noverlaps = 0 + soverlap = 0 + toverlap = -1 + multiplicity = np.zeros(shape=nslices, dtype="int_") + for i in range(nslices): + for j in range(i + 1, nslices): + if ranges[i, 1] > ranges[j, 0]: + noverlaps += 1 + multiplicity[j - i] += 1 + if self.type != "string": + # Convert ranges into floats in order to allow + # doing operations with them without overflows + soverlap += float(ranges[i, 1]) - float(ranges[j, 0]) + + # Return the overlap as the ratio between overlaps and entire range + if self.type != "string": + erange = float(ranges[-1, 1]) - float(ranges[0, 0]) + # Check that there is an effective range of values + # Beware, erange can be negative in situations where + # the values are suffering overflow. This can happen + # specially on big signed integer values (on overflows, + # the end value will become negative!). + # Also, there is no way to compute overlap ratios for + # non-numerical types. So, be careful and always check + # that toverlap has a positive value (it must have been + # initialized to -1. before) before using it. + # F. Altet 2007-01-19 + if erange > 0: + toverlap = soverlap / erange + if verbose: + print("overlaps (%s):" % message, noverlaps, toverlap) + print(multiplicity) + # For full indexes, set the 'is_csi' flag + if self.indsize == 8 and self._v_file._iswritable(): + self._v_attrs.is_csi = noverlaps == 0 + # Save the number of overlaps for future references + self.noverlaps = noverlaps + return (noverlaps, multiplicity, toverlap) + + def read_sorted_indices( + self, + what: Literal["indices", "sorted"], + start: int | None, + stop: int | None, + step: int | None, + ) -> np.ndarray: + """Return the sorted or indices values in the specified range.""" + start, stop, step = self._process_range(start, stop, step) + if start >= stop: + return np.empty(0, self.dtype) + # Correction for negative values of step (reverse indices) + if step < 0: + tmp = start + start = self.nelements - stop + stop = self.nelements - tmp + if what == "sorted": + values = self.sorted + values_lr = self.sortedLR + buffer_ = np.empty(stop - start, dtype=self.dtype) + else: + values = self.indices + values_lr = self.indicesLR + buffer_ = np.empty(stop - start, dtype="u%d" % self.indsize) + ss = self.slicesize + nrow_start = start // ss + istart = start % ss + nrow_stop = stop // ss + tlen = stop - start + bstart = 0 + ilen = 0 + for nrow in range(nrow_start, nrow_stop + 1): + blen = ss - istart + if ilen + blen > tlen: + blen = tlen - ilen + if blen <= 0: + break + if nrow < self.nslices: + self.read_slice( + values, nrow, buffer_[bstart : bstart + blen], istart + ) + else: + self.read_slice_lr( + values_lr, buffer_[bstart : bstart + blen], istart + ) + istart = 0 + bstart += blen + ilen += blen + return buffer_[::step] + + def read_sorted( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + ) -> np.ndarray: + """Return the sorted values of index in the specified range. + + The meaning of the start, stop and step arguments is the same as in + :meth:`Table.read_sorted`. + + """ + return self.read_sorted_indices("sorted", start, stop, step) + + def read_indices( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + ) -> np.ndarray: + """Return the indices values of index in the specified range. + + The meaning of the start, stop and step arguments is the same as in + :meth:`Table.read_sorted`. + + """ + return self.read_sorted_indices("indices", start, stop, step) + + def _process_range( + self, start: int | None, stop: int | None, step: int | None + ) -> tuple[int, int, int]: + """Get a range specific for the index usage.""" + if start is not None and stop is None: + # Special case for the behaviour of PyTables iterators + stop = idx2long(start + 1) + if start is None: + start = 0 + else: + start = idx2long(start) + if stop is None: + stop = idx2long(self.nelements) + else: + stop = idx2long(stop) + if step is None: + step = 1 + else: + step = idx2long(step) + return (start, stop, step) + + def __getitem__(self, key: int | slice) -> int | np.ndarray: + """Return the indices values of index in the specified range. + + If key argument is an integer, the corresponding index is returned. + If key is a slice, the range of indices determined by it is returned. + A negative value of step in slice is supported, meaning that the + results will be returned in reverse order. + + This method is equivalent to :meth:`Index.read_indices`. + + """ + if is_idx(key): + key = operator.index(key) + + if key < 0: + # To support negative values + key += self.nelements + return self.read_indices(key, key + 1, 1)[0] + elif isinstance(key, slice): + return self.read_indices(key.start, key.stop, key.step) + + def __len__(self) -> int: + return self.nelements + + def restorecache(self) -> None: + """Clean the limits cache and resize starts and lengths arrays.""" + params = self._v_file.params + # The sorted IndexArray is absolutely required to be in memory + # at the same time as the Index instance, so create a strong + # reference to it. We are not introducing leaks because the + # strong reference will disappear when this Index instance is + # to be closed. + self._sorted = self.sorted + self._sorted.boundscache = ObjectCache( + params["BOUNDS_MAX_SLOTS"], + params["BOUNDS_MAX_SIZE"], + "non-opt types bounds", + ) + self.sorted.boundscache = ObjectCache( + params["BOUNDS_MAX_SLOTS"], + params["BOUNDS_MAX_SIZE"], + "non-opt types bounds", + ) + """A cache for the bounds (2nd hash) data. Only used for + non-optimized types searches.""" + self.limboundscache = ObjectCache( + params["LIMBOUNDS_MAX_SLOTS"], + params["LIMBOUNDS_MAX_SIZE"], + "bounding limits", + ) + """A cache for bounding limits.""" + self.sortedLRcache = ObjectCache( + params["SORTEDLR_MAX_SLOTS"], + params["SORTEDLR_MAX_SIZE"], + "last row chunks", + ) + """A cache for the last row chunks. Only used for searches in + the last row, and mainly useful for small indexes.""" + self.starts = np.empty(shape=self.nrows, dtype=np.int32) + self.lengths = np.empty(shape=self.nrows, dtype=np.int32) + self.sorted._init_sorted_slice(self) + self.dirtycache = False + + def search(self, item: tuple[float, float]) -> int: + """Do a binary search in this index for an item.""" + if profile: + tref = clock() + if profile: + show_stats("Entering search", tref) + + if self.dirtycache: + self.restorecache() + + # An empty item or if left limit is larger than the right one + # means that the number of records is always going to be empty, + # so we avoid further computation (including looking up the + # limits cache). + if not item or item[0] > item[1]: + self.starts[:] = 0 + self.lengths[:] = 0 + return 0 + + tlen = 0 + # Check whether the item tuple is in the limits cache or not + nslot = self.limboundscache.getslot(item) + if nslot >= 0: + startlengths = self.limboundscache.getitem(nslot) + # Reset the lengths array (not necessary for starts) + self.lengths[:] = 0 + # Now, set the interesting rows + for nrow2, start, length in startlengths: + self.starts[nrow2] = start + self.lengths[nrow2] = length + tlen = tlen + length + return tlen + # The item is not in cache. Do the real lookup. + sorted_ = self.sorted + if self.nslices > 0: + if self.type in self.opt_search_types: + # The next are optimizations. However, they hide the + # CPU functions consumptions from python profiles. + # You may want to de-activate them during profiling. + if self.type == "int32": + tlen = sorted_._search_bin_na_i(*item) + elif self.type == "int64": + tlen = sorted_._search_bin_na_ll(*item) + elif self.type == "float16": + tlen = sorted_._search_bin_na_e(*item) + elif self.type == "float32": + tlen = sorted_._search_bin_na_f(*item) + elif self.type == "float64": + tlen = sorted_._search_bin_na_d(*item) + elif self.type == "float96": + tlen = sorted_._search_bin_na_g(*item) + elif self.type == "float128": + tlen = sorted_._search_bin_na_g(*item) + elif self.type == "uint32": + tlen = sorted_._search_bin_na_ui(*item) + elif self.type == "uint64": + tlen = sorted_._search_bin_na_ull(*item) + elif self.type == "int8": + tlen = sorted_._search_bin_na_b(*item) + elif self.type == "int16": + tlen = sorted_._search_bin_na_s(*item) + elif self.type == "uint8": + tlen = sorted_._search_bin_na_ub(*item) + elif self.type == "uint16": + tlen = sorted_._search_bin_na_us(*item) + else: + assert False, "This can't happen!" + else: + tlen = self.search_scalar(item, sorted_) + # Get possible remaining values in last row + if self.nelementsSLR > 0: + # Look for more indexes in the last row + start, stop = self.search_last_row(item) + self.starts[-1] = start + self.lengths[-1] = stop - start + tlen += stop - start + + if self.limboundscache.couldenablecache(): + # Get a startlengths tuple and save it in cache. + # This is quite slow, but it is a good way to compress + # the bounds info. Moreover, the .couldenablecache() + # is doing a good work so as to avoid computing this + # when it is not necessary to do it. + startlengths = [] + for nrow, length in enumerate(self.lengths): + if length > 0: + startlengths.append((nrow, self.starts[nrow], length)) + # Compute the size of the recarray (aproximately) + # The +1 at the end is important to avoid 0 lengths + # (remember, the object headers take some space) + size = len(startlengths) * 8 * 2 + 1 + # Put this startlengths list in cache + self.limboundscache.setitem(item, startlengths, size) + + if profile: + show_stats("Exiting search", tref) + return tlen + + # This is a scalar version of search. It works with strings as well. + def search_scalar( + self, item: tuple[float | int, float | int], sorted_: IndexArray + ) -> int: + """Do a binary search in this index for an item.""" + tlen = 0 + # Do the lookup for values fulfilling the conditions + for i in range(self.nslices): + start, stop = sorted_._search_bin(i, item) + self.starts[i] = start + self.lengths[i] = stop - start + tlen += stop - start + return tlen + + def search_last_row(self, item: tuple[float, float]) -> tuple[int, int]: + """Search the last row.""" + # Variable initialization + item1, item2 = item + bebounds = self.bebounds + b0, b1 = bebounds[0], bebounds[-1] + bounds = bebounds[1:-1] + itemsize = self.dtype.itemsize + sorted_lr_cache = self.sortedLRcache + hi = self.nelementsSLR # maximum number of elements + rchunksize = self.chunksize // self.reduction + + nchunk = -1 + # Lookup for item1 + if nan_aware_gt(item1, b0): + if nan_aware_le(item1, b1): + # Search the appropriate chunk in bounds cache + nchunk = bisect_left(bounds, item1) + # Lookup for this chunk in cache + nslot = sorted_lr_cache.getslot(nchunk) + if nslot >= 0: + chunk = sorted_lr_cache.getitem(nslot) + else: + begin = rchunksize * nchunk + end = rchunksize * (nchunk + 1) + if end > hi: + end = hi + # Read the chunk from disk + chunk = self.sortedLR._read_sorted_slice( + self.sorted, begin, end + ) + # Put it in cache. It's important to *copy* + # the buffer, as it is reused in future reads! + sorted_lr_cache.setitem( + nchunk, chunk.copy(), (end - begin) * itemsize + ) + start = bisect_left(chunk, item1) + start += rchunksize * nchunk + else: + start = hi + else: + start = 0 + # Lookup for item2 + if nan_aware_ge(item2, b0): + if nan_aware_lt(item2, b1): + # Search the appropriate chunk in bounds cache + nchunk2 = bisect_right(bounds, item2) + if nchunk2 != nchunk: + # Lookup for this chunk in cache + nslot = sorted_lr_cache.getslot(nchunk2) + if nslot >= 0: + chunk = sorted_lr_cache.getitem(nslot) + else: + begin = rchunksize * nchunk2 + end = rchunksize * (nchunk2 + 1) + if end > hi: + end = hi + # Read the chunk from disk + chunk = self.sortedLR._read_sorted_slice( + self.sorted, begin, end + ) + # Put it in cache. It's important to *copy* + # the buffer, as it is reused in future reads! + # See bug #60 in xot.carabos.com + sorted_lr_cache.setitem( + nchunk2, chunk.copy(), (end - begin) * itemsize + ) + stop = bisect_right(chunk, item2) + stop += rchunksize * nchunk2 + else: + stop = hi + else: + stop = 0 + return (start, stop) + + def get_chunkmap(self) -> np.ndarray: + """Compute a map with the interesting chunks in index.""" + if profile: + tref = clock() + if profile: + show_stats("Entering get_chunkmap", tref) + ss = self.slicesize + nsb = self.nslicesblock + nslices = self.nslices + lbucket = self.lbucket + indsize = self.indsize + bucketsinblock = self.blocksize / lbucket + nchunks = math.ceil(self.nelements / lbucket) + chunkmap = np.zeros(shape=nchunks, dtype="bool") + reduction = self.reduction + starts = (self.starts - 1) * reduction + 1 + stops = (self.starts + self.lengths) * reduction + starts[starts < 0] = 0 # All negative values set to zero + indices = self.indices + for nslice in range(self.nrows): + start = starts[nslice] + stop = stops[nslice] + if stop > start: + idx = np.empty(shape=stop - start, dtype="u%d" % indsize) + if nslice < nslices: + indices._read_index_slice(nslice, start, stop, idx) + else: + self.indicesLR._read_index_slice(start, stop, idx) + if indsize == 8: + idx //= np.asarray(lbucket).astype(idx.dtype) + elif indsize == 2: + # The chunkmap size cannot be never larger than 'int_' + idx = idx.astype("int_") + offset = int((nslice // nsb) * bucketsinblock) + idx += offset + elif indsize == 1: + # The chunkmap size cannot be never larger than 'int_' + idx = idx.astype("int_") + offset = (nslice * ss) // lbucket + idx += offset + chunkmap[idx] = True + # The case lbucket < nrowsinchunk should only happen in tests + nrowsinchunk = self.nrowsinchunk + if lbucket != nrowsinchunk: + # Map the 'coarse grain' chunkmap into the 'true' chunkmap + nelements = self.nelements + tnchunks = math.ceil(nelements / nrowsinchunk) + tchunkmap = np.zeros(shape=tnchunks, dtype="bool") + ratio = lbucket / nrowsinchunk + idx = chunkmap.nonzero()[0] + starts = (idx * ratio).astype("int_") + stops = np.ceil((idx + 1) * ratio).astype("int_") + for start, stop in zip(starts, stops): + tchunkmap[start:stop] = True + chunkmap = tchunkmap + if profile: + show_stats("Exiting get_chunkmap", tref) + return chunkmap + + def get_lookup_range( + self, + ops: tuple[str] | tuple[str, str], + limits: tuple[float] | tuple[float, float], + ) -> tuple[float, float]: + """Return the lookup range.""" + assert len(ops) in [1, 2] + assert len(limits) in [1, 2] + assert len(ops) == len(limits) + + column = self.column + coldtype = column.dtype.base + itemsize = coldtype.itemsize + + if len(limits) == 1: + assert ops[0] in ["lt", "le", "eq", "ge", "gt"] + limit = limits[0] + op = ops[0] + if op == "lt": + range_ = ( + inftype(coldtype, itemsize, sign=-1), + nextafter(limit, -1, coldtype, itemsize), + ) + elif op == "le": + range_ = (inftype(coldtype, itemsize, sign=-1), limit) + elif op == "gt": + range_ = ( + nextafter(limit, +1, coldtype, itemsize), + inftype(coldtype, itemsize, sign=+1), + ) + elif op == "ge": + range_ = (limit, inftype(coldtype, itemsize, sign=+1)) + elif op == "eq": + range_ = (limit, limit) + + elif len(limits) == 2: + assert ops[0] in ("gt", "ge") and ops[1] in ("lt", "le") + + lower, upper = limits + if lower > upper: + # ``a <[=] x <[=] b`` is always false if ``a > b``. + return () + + if ops == ("gt", "lt"): # lower < col < upper + range_ = ( + nextafter(lower, +1, coldtype, itemsize), + nextafter(upper, -1, coldtype, itemsize), + ) + elif ops == ("ge", "lt"): # lower <= col < upper + range_ = (lower, nextafter(upper, -1, coldtype, itemsize)) + elif ops == ("gt", "le"): # lower < col <= upper + range_ = (nextafter(lower, +1, coldtype, itemsize), upper) + elif ops == ("ge", "le"): # lower <= col <= upper + range_ = (lower, upper) + + return range_ + + def _f_remove(self, recursive: bool = False) -> None: + """Remove this Index object.""" + # Index removal is always recursive, + # no matter what `recursive` says. + super()._f_remove(True) + + def __str__(self) -> str: + """Return the string representation of the Index object.""" + # The filters + filters = [] + if self.filters.complevel: + if self.filters.shuffle: + filters.append("shuffle") + if self.filters.bitshuffle: + filters.append("bitshuffle") + filters.append(f"{self.filters.complib}({self.filters.complevel})") + return ( + f"Index({self.optlevel}, " + f"{self.kind}{', '.join(filters)}).is_csi={self.is_csi}" + ) + + def __repr__(self) -> str: + """Return the string representation including also metainfo.""" + cpathname = f"{self.table._v_pathname}.cols.{self.column.pathname}" + retstr = f"""{self._v_pathname} (Index for column {cpathname}) + optlevel := {self.optlevel} + kind := {self.kind} + filters := {self.filters} + is_csi := {self.is_csi} + nelements := {self.nelements} + chunksize := {self.chunksize} + slicesize := {self.slicesize} + blocksize := {self.blocksize} + superblocksize := {self.superblocksize} + dirty := {self.dirty} + byteorder := {self.byteorder!r} + sorted := {self.sorted} + indices := {self.indices} + ranges := {self.ranges} + bounds := {self.bounds} + sortedLR := {self.sortedLR} + indicesLR := {self.indicesLR}""" + return retstr + + +class IndexesDescG(NotLoggedMixin, Group): + """Indexes descriptor for groups.""" + + _c_classid = "DINDEX" + + def _g_width_warning(self) -> None: + warnings.warn( + "the number of indexed columns on a single description group " + "is exceeding the recommended maximum (%d); " + "be ready to see PyTables asking for *lots* of memory " + "and possibly slow I/O" % self._v_max_group_width, + PerformanceWarning, + ) + + +class IndexesTableG(NotLoggedMixin, Group): + """Table indexes.""" + + _c_classid = "TINDEX" + + @property + def auto(self) -> bool: + """Return True if auto-index is set.""" + if "AUTO_INDEX" not in self._v_attrs: + return default_auto_index + return self._v_attrs.AUTO_INDEX + + @auto.setter + def auto(self, auto: bool) -> None: + self._v_attrs.AUTO_INDEX = bool(auto) + + @auto.deleter + def auto(self) -> None: + del self._v_attrs.AUTO_INDEX + + def _g_width_warning(self) -> None: + warnings.warn( + "the number of indexed columns on a single table " + "is exceeding the recommended maximum (%d); " + "be ready to see PyTables asking for *lots* of memory " + "and possibly slow I/O" % self._v_max_group_width, + PerformanceWarning, + ) + + def _g_check_name(self, name: str) -> None: + if not name.startswith("_i_"): + raise ValueError( + "names of index groups must start with ``_i_``: %s" % name + ) + + @property + def table(self) -> Table: + """Accessor for the `Table` object of this `IndexesTableG` container.""" + names = self._v_pathname.split("/") + tablename = names.pop()[3:] # "_i_" is at the beginning + parentpathname = "/".join(names) + tablepathname = join_path(parentpathname, tablename) + table = self._v_file._get_node(tablepathname) + return table + + +class OldIndex(NotLoggedMixin, Group): + """This is meant to hide indexes of PyTables 1.x files.""" + + _c_classid = "CINDEX" diff --git a/venv/Lib/site-packages/tables/indexes.py b/venv/Lib/site-packages/tables/indexes.py new file mode 100644 index 0000000..db58882 --- /dev/null +++ b/venv/Lib/site-packages/tables/indexes.py @@ -0,0 +1,190 @@ +"""Here is defined the IndexArray class.""" + +from __future__ import annotations + +from bisect import bisect_left, bisect_right +from typing import TYPE_CHECKING + +from . import indexesextension +from .node import NotLoggedMixin +from .carray import CArray +from .earray import EArray + +if TYPE_CHECKING: + from .atom import Atom + from .group import Group + from .filters import Filters + +# Declarations for inheriting + + +class CacheArray(indexesextension.CacheArray, NotLoggedMixin, EArray): + """Container for keeping index caches of 1st and 2nd level.""" + + # Class identifier. + _c_classid = "CACHEARRAY" + + +class LastRowArray(indexesextension.LastRowArray, NotLoggedMixin, CArray): + """Container for keeping sorted indices values of last row of an index.""" + + # Class identifier. + _c_classid = "LASTROWARRAY" + + +class IndexArray(indexesextension.IndexArray, NotLoggedMixin, EArray): + """Represent the index (sorted or reverse index) dataset in HDF5 file. + + All NumPy typecodes are supported except for complex datatypes. + + Parameters + ---------- + parentnode + The Index class from which this object will hang off. + + .. versionchanged:: 3.0 + Renamed from *parentNode* to *parentnode*. + + name : str + The name of this node in its parent group. + atom + An Atom object representing the shape and type of the atomic objects to + be saved. Only scalar atoms are supported. + title + Sets a TITLE attribute on the array entity. + filters : Filters + An instance of the Filters class that provides information about the + desired I/O filters to be applied during the life of this object. + byteorder + The byteroder of the data on-disk. + + """ + + # Class identifier. + _c_classid = "INDEXARRAY" + + @property + def chunksize(self) -> int: + """Size of the chunk for the object.""" + return self.chunkshape[1] + + @property + def slicesize(self) -> int: + """Size of the slice for the object.""" + return self.shape[1] + + def __init__( + self, + parentnode: Group, + name: str, + atom: Atom | None = None, + title: str = "", + filters: Filters | None = None, + byteorder: str | None = None, + ) -> None: + """Create an IndexArray instance.""" + self._v_pathname = parentnode._g_join(name) + if atom is not None: + # The shape and chunkshape needs to be fixed here + if name == "sorted": + reduction = parentnode.reduction + shape = (0, parentnode.slicesize // reduction) + chunkshape = (1, parentnode.chunksize // reduction) + else: + shape = (0, parentnode.slicesize) + chunkshape = (1, parentnode.chunksize) + else: + # The shape and chunkshape will be read from disk later on + shape = None + chunkshape = None + + super().__init__( + parentnode, + name, + atom, + shape, + title, + filters, + chunkshape=chunkshape, + byteorder=byteorder, + ) + + # This version of searchBin uses both ranges (1st level) and + # bounds (2nd level) caches. It uses a cache for boundary rows, + # but not for 'sorted' rows (this is only supported for the + # 'optimized' types). + def _search_bin( + self, nrow: int, item: tuple[float | int, float | int] + ) -> tuple[int, int]: + item1, item2 = item + result1 = -1 + result2 = -1 + hi = self.shape[1] + ranges = self._v_parent.rvcache + boundscache = self.boundscache + # First, look at the beginning of the slice + begin = ranges[nrow, 0] + # Look for items at the beginning of sorted slices + if item1 <= begin: + result1 = 0 + if item2 < begin: + result2 = 0 + if result1 >= 0 and result2 >= 0: + return (result1, result2) + # Then, look for items at the end of the sorted slice + end = ranges[nrow, 1] + if result1 < 0: + if item1 > end: + result1 = hi + if result2 < 0: + if item2 >= end: + result2 = hi + if result1 >= 0 and result2 >= 0: + return (result1, result2) + # Finally, do a lookup for item1 and item2 if they were not found + # Lookup in the middle of slice for item1 + chunksize = self.chunksize # Number of elements/chunksize + nchunk = -1 + # Try to get the bounds row from the LRU cache + nslot = boundscache.getslot(nrow) + if nslot >= 0: + # Cache hit. Use the row kept there. + bounds = boundscache.getitem(nslot) + else: + # No luck with cached data. Read the row and put it in the cache. + bounds = self._v_parent.bounds[nrow] + size = bounds.size * bounds.itemsize + boundscache.setitem(nrow, bounds, size) + if result1 < 0: + # Search the appropriate chunk in bounds cache + nchunk = bisect_left(bounds, item1) + chunk = self._read_sorted_slice( + nrow, chunksize * nchunk, chunksize * (nchunk + 1) + ) + result1 = indexesextension._bisect_left(chunk, item1, chunksize) + result1 += chunksize * nchunk + # Lookup in the middle of slice for item2 + if result2 < 0: + # Search the appropriate chunk in bounds cache + nchunk2 = bisect_right(bounds, item2) + if nchunk2 != nchunk: + chunk = self._read_sorted_slice( + nrow, chunksize * nchunk2, chunksize * (nchunk2 + 1) + ) + result2 = indexesextension._bisect_right(chunk, item2, chunksize) + result2 += chunksize * nchunk2 + return (result1, result2) + + def __str__(self) -> str: + """Compact representation of the IndexArray object.""" + return f"IndexArray(path={self._v_pathname})" + + def __repr__(self) -> str: + """Retunr the string representation of the IndexArray object.""" + return f"""{self} + atom = {self.atom!r} + shape = {self.shape} + nrows = {self.nrows} + chunksize = {self.chunksize} + slicesize = {self.slicesize} + byteorder = {self.byteorder!r}""" diff --git a/venv/Lib/site-packages/tables/indexesextension.pyd b/venv/Lib/site-packages/tables/indexesextension.pyd new file mode 100644 index 0000000..967dab9 Binary files /dev/null and b/venv/Lib/site-packages/tables/indexesextension.pyd differ diff --git a/venv/Lib/site-packages/tables/indexesextension.pyx b/venv/Lib/site-packages/tables/indexesextension.pyx new file mode 100644 index 0000000..3cb7ffa --- /dev/null +++ b/venv/Lib/site-packages/tables/indexesextension.pyx @@ -0,0 +1,1545 @@ +######################################################################## +# +# License: BSD +# Created: May 18, 2006 +# Author: Francesc Alted - faltet@pytables.com +# +# $Id$ +# +######################################################################## + +"""cython interface for keeping indexes classes. + +Classes (type extensions): + + IndexArray + CacheArray + LastRowArray + +Functions: + + keysort + +Misc variables: + +""" + +import numpy as np +import cython + +cimport numpy as cnp + +from .exceptions import HDF5ExtError + +# Types, constants, functions, classes & other objects from everywhere +from numpy cimport ( + import_array, + ndarray, + npy_int8, + npy_int16, + npy_int32, + npy_int64, + npy_uint8, + npy_uint16, + npy_uint32, + npy_uint64, + npy_float32, + npy_float64, + npy_float, + npy_double, + npy_longdouble, + PyArray_BYTES, + PyArray_DATA, +) + +from .hdf5extension cimport Array + +# These two types are defined in npy_common.h but not in cython's numpy.pxd +ctypedef unsigned char npy_bool +ctypedef npy_uint16 npy_float16 + +from libc.stdlib cimport malloc, free +from libc.string cimport memcpy, strncmp + +from .definitions cimport hid_t, herr_t, hsize_t, H5Screate_simple, H5Sclose +from .lrucacheextension cimport NumCache + +#------------------------------------------------------------------- + +# External C functions + +# Functions for optimized operations with ARRAY for indexing purposes +cdef extern from "H5ARRAY-opt.h" nogil: + herr_t H5ARRAYOinit_readSlice( + hid_t dataset_id, hid_t *mem_space_id, hsize_t count) + herr_t H5ARRAYOread_readSlice( + hid_t dataset_id, hid_t type_id, + hsize_t irow, hsize_t start, hsize_t stop, void *data) + herr_t H5ARRAYOread_readSortedSlice( + hid_t dataset_id, hid_t mem_space_id, hid_t type_id, + hsize_t irow, hsize_t start, hsize_t stop, void *data) + herr_t H5ARRAYOread_readBoundsSlice( + hid_t dataset_id, hid_t mem_space_id, hid_t type_id, + hsize_t irow, hsize_t start, hsize_t stop, void *data) + herr_t H5ARRAYOreadSliceLR( + hid_t dataset_id, hid_t type_id, hsize_t start, hsize_t stop, void *data) + + +# Functions for optimized operations for dealing with indexes +cdef extern from "idx-opt.h" nogil: + int bisect_left_b(npy_int8 *a, long x, int hi, int offset) + int bisect_left_ub(npy_uint8 *a, long x, int hi, int offset) + int bisect_right_b(npy_int8 *a, long x, int hi, int offset) + int bisect_right_ub(npy_uint8 *a, long x, int hi, int offset) + int bisect_left_s(npy_int16 *a, long x, int hi, int offset) + int bisect_left_us(npy_uint16 *a, long x, int hi, int offset) + int bisect_right_s(npy_int16 *a, long x, int hi, int offset) + int bisect_right_us(npy_uint16 *a, long x, int hi, int offset) + int bisect_left_i(npy_int32 *a, long x, int hi, int offset) + int bisect_left_ui(npy_uint32 *a, npy_uint32 x, int hi, int offset) + int bisect_right_i(npy_int32 *a, long x, int hi, int offset) + int bisect_right_ui(npy_uint32 *a, npy_uint32 x, int hi, int offset) + int bisect_left_ll(npy_int64 *a, npy_int64 x, int hi, int offset) + int bisect_left_ull(npy_uint64 *a, npy_uint64 x, int hi, int offset) + int bisect_right_ll(npy_int64 *a, npy_int64 x, int hi, int offset) + int bisect_right_ull(npy_uint64 *a, npy_uint64 x, int hi, int offset) + int bisect_left_e(npy_float16 *a, npy_float64 x, int hi, int offset) + int bisect_right_e(npy_float16 *a, npy_float64 x, int hi, int offset) + int bisect_left_f(npy_float32 *a, npy_float64 x, int hi, int offset) + int bisect_right_f(npy_float32 *a, npy_float64 x, int hi, int offset) + int bisect_left_d(npy_float64 *a, npy_float64 x, int hi, int offset) + int bisect_right_d(npy_float64 *a, npy_float64 x, int hi, int offset) + int bisect_left_g(npy_longdouble *a, npy_longdouble x, int hi, int offset) + int bisect_right_g(npy_longdouble *a, npy_longdouble x, int hi, int offset) + + +#---------------------------------------------------------------------------- + +# Initialization code + +# The numpy API requires this function to be called before +# using any numpy facilities in an extension module. +import_array() + +#--------------------------------------------------------------------------- + +ctypedef fused floating_type: + npy_float32 + npy_float64 + npy_longdouble + + +ctypedef fused number_type: + npy_int8 + npy_int16 + npy_int32 + npy_int64 + + npy_uint8 + npy_uint16 + npy_uint32 + npy_uint64 + + npy_float32 + npy_float64 + npy_longdouble + +#=========================================================================== +# Functions +#=========================================================================== + +#--------------------------------------------------------------------------- +# keysort +#--------------------------------------------------------------------------- + +DEF PYA_QS_STACK = 100 +DEF SMALL_QUICKSORT = 15 + +def keysort(ndarray array1, ndarray array2): + """Sort array1 in-place. array2 is also sorted following the array1 order. + + array1 can be of any type, except complex or string. array2 may be made of + elements on any size. + + """ + cdef size_t size = cnp.PyArray_SIZE(array1) + cdef size_t elsize1 = cnp.PyArray_ITEMSIZE(array1) + cdef size_t elsize2 = cnp.PyArray_ITEMSIZE(array2) + cdef int type_num = cnp.PyArray_TYPE(array1) + + # floating types + if type_num == cnp.NPY_FLOAT16: + _keysort[npy_float16](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + elif type_num == cnp.NPY_FLOAT32: + _keysort[npy_float32](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + elif type_num == cnp.NPY_FLOAT64: + _keysort[npy_float64](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + elif type_num == cnp.NPY_LONGDOUBLE: + _keysort[npy_longdouble](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + # signed integer types + elif type_num == cnp.NPY_INT8: + _keysort[npy_int8](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + elif type_num == cnp.NPY_INT16: + _keysort[npy_int16](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + elif type_num == cnp.NPY_INT32: + _keysort[npy_int32](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + elif type_num == cnp.NPY_INT64: + _keysort[npy_int64](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + # unsigned integer types + elif type_num == cnp.NPY_UINT8: + _keysort[npy_uint8](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + elif type_num == cnp.NPY_UINT16: + _keysort[npy_uint16](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + elif type_num == cnp.NPY_UINT32: + _keysort[npy_uint32](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + elif type_num == cnp.NPY_UINT64: + _keysort[npy_uint64](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + # other + elif type_num == cnp.NPY_BOOL: + _keysort[npy_bool](PyArray_DATA(array1), PyArray_BYTES(array2), elsize2, size) + elif type_num == cnp.NPY_STRING: + _keysort_string(PyArray_BYTES(array1), elsize1, PyArray_BYTES(array2), elsize2, size) + else: + raise ValueError("Unknown array datatype") + + +cdef inline void swap_bytes(char *x, char *y, size_t n) noexcept nogil: + if n == 8: + (x)[0], (y)[0] = (y)[0], (x)[0] + elif n == 4: + (x)[0], (y)[0] = (y)[0], (x)[0] + elif n == 2: + (x)[0], (y)[0] = (y)[0], (x)[0] + else: + for i in range(n): + x[i], y[i] = y[i], x[i] + + +cdef inline int less_than(number_type* a, number_type* b) nogil: + if number_type in floating_type: + return a[0] < b[0] or (b[0] != b[0] and a[0] == a[0]) + else: + return a[0] < b[0] + + +@cython.cdivision(True) +cdef void _keysort(number_type* start1, char* start2, size_t elsize2, size_t n) noexcept nogil: + cdef number_type *pl = start1 + cdef number_type *pr = start1 + (n - 1) + + cdef char *ipl = start2 + cdef char *ipr = start2 + (n - 1) * elsize2 + + cdef number_type vp + cdef char *ivp = malloc(elsize2) + + cdef number_type *stack[PYA_QS_STACK] + cdef number_type **sptr = stack + + cdef char *istack[PYA_QS_STACK] + cdef char **isptr = istack + + cdef size_t stack_index = 0 + + cdef number_type *pm + cdef number_type *pi + cdef number_type *pj + cdef number_type *pt + cdef char *ipm + cdef char *ipi + cdef char *ipj + cdef char *ipt + + while True: + while pr - pl > SMALL_QUICKSORT: + pm = pl + ((pr - pl) >> 1) + ipm = ipl + ((ipr - ipl)//elsize2 >> 1)*elsize2 + + if less_than(pm, pl): + pm[0], pl[0] = pl[0], pm[0] + swap_bytes(ipm, ipl, elsize2) + + if less_than(pr, pm): + pr[0], pm[0] = pm[0], pr[0] + swap_bytes(ipr, ipm, elsize2) + + if less_than(pm, pl): + pm[0], pl[0] = pl[0], pm[0] + swap_bytes(ipm, ipl, elsize2) + + vp = pm[0] + + pi = pl + ipi = ipl + + pj = pr - 1 + ipj = ipr - elsize2 + + pm[0], pj[0] = pj[0], pm[0] + swap_bytes(ipm, ipj, elsize2) + + while True: + pi += 1 + ipi += elsize2 + while less_than(pi, &vp): + pi += 1 + ipi += elsize2 + + pj -= 1 + ipj -= elsize2 + while less_than(&vp, pj): + pj -= 1 + ipj -= elsize2 + + if pi >= pj: + break + + pi[0], pj[0] = pj[0], pi[0] + swap_bytes(ipi, ipj, elsize2) + + pi[0], (pr-1)[0] = (pr-1)[0], pi[0] + swap_bytes(ipi, ipr-elsize2, elsize2) + + # push largest partition on stack and proceed with the other + if (pi - pl) < (pr - pi): + sptr[0] = pi + 1 + sptr[1] = pr + sptr += 2 + + isptr[0] = ipi + elsize2 + isptr[1] = ipr + isptr += 2 + + pr = pi - 1 + ipr = ipi - elsize2 + else: + sptr[0] = pl + sptr[1] = pi - 1 + sptr += 2 + + isptr[0] = ipl + isptr[1] = ipi - elsize2 + isptr += 2 + + pl = pi + 1 + ipl = ipi + elsize2 + + pi = pl + 1 + ipi = ipl + elsize2 + while pi <= pr: + vp = pi[0] + memcpy(ivp, ipi, elsize2) + + pj = pi + pt = pi - 1 + + ipj = ipi + ipt = ipi - elsize2 + + while pj > pl and less_than(&vp, pt): + pj[0] = pt[0] + pj -= 1 + pt -= 1 + + memcpy(ipj, ipt, elsize2) + ipj -= elsize2 + ipt -= elsize2 + + pj[0] = vp + memcpy(ipj, ivp, elsize2) + + pi += 1 + ipi += elsize2 + + if sptr == stack: + break + + sptr -= 2 + pl = sptr[0] + pr = sptr[1] + + isptr -= 2 + ipl = isptr[0] + ipr = isptr[1] + + free(ivp) + + +@cython.cdivision(True) +cdef void _keysort_string(char* start1, size_t ss, char* start2, size_t ts, size_t n) noexcept nogil: + cdef char *pl = start1 + cdef char *pr = start1 + (n - 1) * ss + + cdef char *ipl = start2 + cdef char *ipr = start2 + (n - 1) * ts + + cdef char *vp = malloc(ss) + cdef char *ivp = malloc(ts) + + cdef char *stack[PYA_QS_STACK] + cdef char **sptr = stack + + cdef char *istack[PYA_QS_STACK] + cdef char **isptr = istack + + cdef size_t stack_index = 0 + + cdef char *pm + cdef char *pi + cdef char *pj + cdef char *pt + + cdef char *ipm + cdef char *ipi + cdef char *ipj + cdef char *ipt + + while True: + while pr - pl > (SMALL_QUICKSORT * ss): + pm = pl + ((pr - pl)//ss >> 1)*ss + ipm = ipl + ((ipr - ipl)//ts >> 1)*ts + + if strncmp(pm, pl, ss) < 0: + swap_bytes(pm, pl, ss) + swap_bytes(ipm, ipl, ts) + + if strncmp(pr, pm, ss) < 0: + swap_bytes(pr, pm, ss) + swap_bytes(ipr, ipm, ts) + + if strncmp(pm, pl, ss) < 0: + swap_bytes(pm, pl, ss) + swap_bytes(ipm, ipl, ts) + + memcpy(vp, pm, ss) + + pi = pl + ipi = ipl + + pj = pr - ss + ipj = ipr - ts + + swap_bytes(pm, pj, ss) + swap_bytes(ipm, ipj, ts) + + while True: + pi += ss + ipi += ts + while strncmp(pi, vp, ss) < 0: + pi += ss + ipi += ts + + pj -= ss + ipj -= ts + while strncmp(vp, pj, ss) < 0: + pj -= ss + ipj -= ts + + if pi >= pj: + break + + swap_bytes(pi, pj, ss) + swap_bytes(ipi, ipj, ts) + + swap_bytes(pi, pr-ss, ss) + swap_bytes(ipi, ipr-ts, ts) + + # push largest partition on stack and proceed with the other + if (pi - pl) < (pr - pi): + sptr[0] = pi + ss + sptr[1] = pr + sptr += 2 + + isptr[0] = ipi + ts + isptr[1] = ipr + isptr += 2 + + pr = pi - ss + ipr = ipi - ts + else: + sptr[0] = pl + sptr[1] = pi - ss + sptr += 2 + + isptr[0] = ipl + isptr[1] = ipi - ts + isptr += 2 + + pl = pi + ss + ipl = ipi + ts + + pi = pl + ss + ipi = ipl + ts + + while pi <= pr: + memcpy(vp, pi, ss) + memcpy(ivp, ipi, ts) + + pj = pi + pt = pi - ss + + ipj = ipi + ipt = ipi - ts + + while pj > pl and strncmp(vp, pt, ss) < 0: + memcpy(pj, pt, ss) + pj -= ss + pt -= ss + + memcpy(ipj, ipt, ts) + ipj -= ts + ipt -= ts + + memcpy(pj, vp, ss) + memcpy(ipj, ivp, ts) + + pi += ss + ipi += ts + + if sptr == stack: + break + + sptr -= 2 + pl = sptr[0] + pr = sptr[1] + + isptr -= 2 + ipl = isptr[0] + ipr = isptr[1] + + free(vp) + free(ivp) + +#--------------------------------------------------------------------------- +# bisect +#--------------------------------------------------------------------------- + +# This has been copied from the standard module bisect. +# Checks for the values out of limits has been added at the beginning +# because I forsee that this should be a very common case. +# 2004-05-20 +def _bisect_left(a, x, int hi): + """Return the index where to insert item x in list a, assuming a is sorted. + + The return value i is such that all e in a[:i] have e < x, and all e in + a[i:] have e >= x. So if x already appears in the list, i points just + before the leftmost x already there. + + """ + + cdef int lo, mid + + lo = 0 + if x <= a[0]: return 0 + if a[-1] < x: return hi + while lo < hi: + mid = (lo+hi)//2 + if a[mid] < x: lo = mid+1 + else: hi = mid + return lo + + +def _bisect_right(a, x, int hi): + """Return the index where to insert item x in list a, assuming a is sorted. + + The return value i is such that all e in a[:i] have e <= x, and all e in + a[i:] have e > x. So if x already appears in the list, i points just + beyond the rightmost x already there. + + """ + + cdef int lo, mid + + lo = 0 + if x < a[0]: return 0 + if a[-1] <= x: return hi + while lo < hi: + mid = (lo+hi)//2 + if x < a[mid]: hi = mid + else: lo = mid+1 + return lo + + +#=========================================================================== +# Classes +#=========================================================================== + + + +cdef class Index: + pass + + +cdef class CacheArray(Array): + """Container for keeping index caches of 1st and 2nd level.""" + + cdef hid_t mem_space_id + + cdef initread(self, int nbounds): + # "Actions to accelerate the reads afterwards." + + # Precompute the mem_space_id + if (H5ARRAYOinit_readSlice(self.dataset_id, &self.mem_space_id, + nbounds) < 0): + raise HDF5ExtError("Problems initializing the bounds array data.") + return + + cdef read_slice(self, hsize_t nrow, hsize_t start, hsize_t stop, void *rbuf): + # "Read an slice of bounds." + + if (H5ARRAYOread_readBoundsSlice( + self.dataset_id, self.mem_space_id, self.type_id, + nrow, start, stop, rbuf) < 0): + raise HDF5ExtError("Problems reading the bounds array data.") + return + + def _g_close(self): + super()._g_close() + # Release specific resources of this class + if self.mem_space_id > 0: + H5Sclose(self.mem_space_id) + + +cdef class IndexArray(Array): + """Container for keeping sorted and indices values.""" + + cdef void *rbufst + cdef void *rbufln + cdef void *rbufrv + cdef void *rbufbc + cdef void *rbuflb + cdef hid_t mem_space_id + cdef int l_chunksize, l_slicesize, nbounds, indsize + cdef CacheArray bounds_ext + cdef NumCache boundscache, sortedcache + cdef ndarray bufferbc, bufferlb + + def _read_index_slice(self, hsize_t irow, hsize_t start, hsize_t stop, + ndarray idx): + cdef herr_t ret + cdef void *buf = PyArray_DATA(idx) + + # Do the physical read + with nogil: + ret = H5ARRAYOread_readSlice(self.dataset_id, self.type_id, + irow, start, stop, buf) + + if ret < 0: + raise HDF5ExtError("Problems reading the index indices.") + + + def _init_sorted_slice(self, index): + """Initialize the structures for doing a binary search.""" + + cdef long ndims + cdef int rank, buflen, cachesize + cdef char *bname + cdef hsize_t count[2] + cdef ndarray starts, lengths, rvcache + cdef object maxslots, rowsize + + dtype = self.atom.dtype + # Create the buffer for reading sorted data chunks if not created yet + if self.bufferlb is None: + # Internal buffers + self.bufferlb = np.empty(dtype=dtype, shape=self.chunksize) + # Get the pointers to the different buffer data areas + self.rbuflb = PyArray_DATA(self.bufferlb) + # Init structures for accelerating sorted array reads + rank = 2 + count[0] = 1 + count[1] = self.chunksize + self.mem_space_id = H5Screate_simple(rank, count, NULL) + # Cache some counters in local extension variables + self.l_chunksize = self.chunksize + self.l_slicesize = self.slicesize + + # Get the addresses of buffer data + starts = index.starts + lengths = index.lengths + self.rbufst = PyArray_DATA(starts) + self.rbufln = PyArray_DATA(lengths) + # The 1st cache is loaded completely in memory and needs to be reloaded + rvcache = index.ranges[:] + self.rbufrv = PyArray_DATA(rvcache) + index.rvcache = rvcache + # Init the bounds array for reading + self.nbounds = index.bounds.shape[1] + self.bounds_ext = index.bounds + self.bounds_ext.initread(self.nbounds) + if str(dtype) in self._v_parent.opt_search_types: + # The next caches should be defined only for optimized search types. + # The 2nd level cache will replace the already existing ObjectCache and + # already bound to the boundscache attribute. This way, the cache will + # not be duplicated (I know, this smells badly, but anyway). + params = self._v_file.params + rowsize = (self.bounds_ext._v_chunkshape[1] * dtype.itemsize) + maxslots = params['BOUNDS_MAX_SIZE'] // rowsize + self.boundscache = NumCache( + (maxslots, self.nbounds), dtype, 'non-opt types bounds') + self.bufferbc = np.empty(dtype=dtype, shape=self.nbounds) + # Get the pointer for the internal buffer for 2nd level cache + self.rbufbc = PyArray_DATA(self.bufferbc) + # Another NumCache for the sorted values + rowsize = (self.chunksize*dtype.itemsize) + maxslots = params['SORTED_MAX_SIZE'] // (self.chunksize*dtype.itemsize) + self.sortedcache = NumCache( + (maxslots, self.chunksize), dtype, 'sorted') + + + + cdef void *_g_read_sorted_slice(self, hsize_t irow, hsize_t start, + hsize_t stop): + """Read the sorted part of an index.""" + + with nogil: + ret = H5ARRAYOread_readSortedSlice( + self.dataset_id, self.mem_space_id, self.type_id, + irow, start, stop, self.rbuflb) + + if ret < 0: + raise HDF5ExtError("Problems reading the array data.") + + return self.rbuflb + + # can't time machine since this function is cdef'd + #_g_read_sorted_slice = prveious_api(_g_read_sorted_slice) + + # This is callable from python + def _read_sorted_slice(self, hsize_t irow, hsize_t start, hsize_t stop): + """Read the sorted part of an index.""" + + self._g_read_sorted_slice(irow, start, stop) + return self.bufferlb + + + cdef void *get_lru_bounds(self, int nrow, int nbounds): + """Get the bounds from the cache, or read them.""" + + cdef void *vpointer + cdef long nslot + + nslot = self.boundscache.getslot_(nrow) + if nslot >= 0: + vpointer = self.boundscache.getitem1_(nslot) + else: + # Bounds row is not in cache. Read it and put it in the LRU cache. + self.bounds_ext.read_slice(nrow, 0, nbounds, self.rbufbc) + self.boundscache.setitem_(nrow, self.rbufbc, 0) + vpointer = self.rbufbc + return vpointer + + # can't time machine since get_lru_bounds() function is cdef'd + + cdef void *get_lru_sorted(self, int nrow, int ncs, int nchunk, int cs): + """Get the sorted row from the cache or read it.""" + + cdef void *vpointer + cdef npy_int64 nckey + cdef long nslot + cdef hsize_t start, stop + + # Compute the number of chunk read and use it as the key for the cache. + nckey = nrow*ncs+nchunk + nslot = self.sortedcache.getslot_(nckey) + if nslot >= 0: + vpointer = self.sortedcache.getitem1_(nslot) + else: + # The sorted chunk is not in cache. Read it and put it in the LRU cache. + start = cs*nchunk + stop = cs*(nchunk+1) + vpointer = self._g_read_sorted_slice(nrow, start, stop) + self.sortedcache.setitem_(nckey, vpointer, 0) + return vpointer + + # can't time machine since get_lru_sorted() function is cdef'd + + # Optimized version for int8 + def _search_bin_na_b(self, long item1, long item2): + cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow + cdef int start, stop, tlength, length, bread, nchunk, nchunk2 + cdef int *rbufst + cdef int *rbufln + + # Variables with specific type + cdef npy_int8 *rbufrv + cdef npy_int8 *rbufbc = NULL + cdef npy_int8 *rbuflb = NULL + + cs = self.l_chunksize + ss = self.l_slicesize + ncs = ss // cs + nbounds = self.nbounds + nrows = self.nrows + rbufst = self.rbufst + rbufln = self.rbufln + rbufrv = self.rbufrv + tlength = 0 + for nrow from 0 <= nrow < nrows: + rvrow = nrow*2 + bread = 0 + nchunk = -1 + + # Look if item1 is in this row + if item1 > rbufrv[rvrow]: + if item1 <= rbufrv[rvrow+1]: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + bread = 1 + nchunk = bisect_left_b(rbufbc, item1, nbounds, 0) + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk, cs) + start = bisect_left_b(rbuflb, item1, cs, 0) + cs*nchunk + else: + start = ss + else: + start = 0 + # Now, for item2 + if item2 >= rbufrv[rvrow]: + if item2 < rbufrv[rvrow+1]: + if not bread: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + nchunk2 = bisect_right_b(rbufbc, item2, nbounds, 0) + if nchunk2 <> nchunk: + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk2, cs) + stop = bisect_right_b(rbuflb, item2, cs, 0) + cs*nchunk2 + else: + stop = ss + else: + stop = 0 + length = stop - start + tlength = tlength + length + rbufst[nrow] = start + rbufln[nrow] = length + return tlength + + + # Optimized version for uint8 + def _search_bin_na_ub(self, long item1, long item2): + cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow + cdef int start, stop, tlength, length, bread, nchunk, nchunk2 + cdef int *rbufst + cdef int *rbufln + + # Variables with specific type + cdef npy_uint8 *rbufrv + cdef npy_uint8 *rbufbc = NULL + cdef npy_uint8 *rbuflb = NULL + + cs = self.l_chunksize + ss = self.l_slicesize + ncs = ss // cs + nbounds = self.nbounds + nrows = self.nrows + rbufst = self.rbufst + rbufln = self.rbufln + rbufrv = self.rbufrv + tlength = 0 + for nrow from 0 <= nrow < nrows: + rvrow = nrow*2 + bread = 0 + nchunk = -1 + + # Look if item1 is in this row + if item1 > rbufrv[rvrow]: + if item1 <= rbufrv[rvrow+1]: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + bread = 1 + nchunk = bisect_left_ub(rbufbc, item1, nbounds, 0) + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk, cs) + start = bisect_left_ub(rbuflb, item1, cs, 0) + cs*nchunk + else: + start = ss + else: + start = 0 + # Now, for item2 + if item2 >= rbufrv[rvrow]: + if item2 < rbufrv[rvrow+1]: + if not bread: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + nchunk2 = bisect_right_ub(rbufbc, item2, nbounds, 0) + if nchunk2 <> nchunk: + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk2, cs) + stop = bisect_right_ub(rbuflb, item2, cs, 0) + cs*nchunk2 + else: + stop = ss + else: + stop = 0 + length = stop - start + tlength = tlength + length + rbufst[nrow] = start + rbufln[nrow] = length + return tlength + + + # Optimized version for int16 + def _search_bin_na_s(self, long item1, long item2): + cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow + cdef int start, stop, tlength, length, bread, nchunk, nchunk2 + cdef int *rbufst + cdef int *rbufln + + # Variables with specific type + cdef npy_int16 *rbufrv + cdef npy_int16 *rbufbc = NULL + cdef npy_int16 *rbuflb = NULL + + cs = self.l_chunksize + ss = self.l_slicesize + ncs = ss // cs + nbounds = self.nbounds + nrows = self.nrows + rbufst = self.rbufst + rbufln = self.rbufln + rbufrv = self.rbufrv + tlength = 0 + for nrow from 0 <= nrow < nrows: + rvrow = nrow*2 + bread = 0 + nchunk = -1 + # Look if item1 is in this row + if item1 > rbufrv[rvrow]: + if item1 <= rbufrv[rvrow+1]: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + bread = 1 + nchunk = bisect_left_s(rbufbc, item1, nbounds, 0) + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk, cs) + start = bisect_left_s(rbuflb, item1, cs, 0) + cs*nchunk + else: + start = ss + else: + start = 0 + # Now, for item2 + if item2 >= rbufrv[rvrow]: + if item2 < rbufrv[rvrow+1]: + if not bread: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + nchunk2 = bisect_right_s(rbufbc, item2, nbounds, 0) + if nchunk2 <> nchunk: + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk2, cs) + stop = bisect_right_s(rbuflb, item2, cs, 0) + cs*nchunk2 + else: + stop = ss + else: + stop = 0 + length = stop - start + tlength = tlength + length + rbufst[nrow] = start + rbufln[nrow] = length + return tlength + + + # Optimized version for uint16 + def _search_bin_na_us(self, long item1, long item2): + cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow + cdef int start, stop, tlength, length, bread, nchunk, nchunk2 + cdef int *rbufst + cdef int *rbufln + + # Variables with specific type + cdef npy_uint16 *rbufrv + cdef npy_uint16 *rbufbc = NULL + cdef npy_uint16 *rbuflb = NULL + + cs = self.l_chunksize + ss = self.l_slicesize + ncs = ss // cs + nbounds = self.nbounds + nrows = self.nrows + rbufst = self.rbufst + rbufln = self.rbufln + rbufrv = self.rbufrv + tlength = 0 + for nrow from 0 <= nrow < nrows: + rvrow = nrow*2 + bread = 0 + nchunk = -1 + # Look if item1 is in this row + if item1 > rbufrv[rvrow]: + if item1 <= rbufrv[rvrow+1]: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + bread = 1 + nchunk = bisect_left_us(rbufbc, item1, nbounds, 0) + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk, cs) + start = bisect_left_us(rbuflb, item1, cs, 0) + cs*nchunk + else: + start = ss + else: + start = 0 + # Now, for item2 + if item2 >= rbufrv[rvrow]: + if item2 < rbufrv[rvrow+1]: + if not bread: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + nchunk2 = bisect_right_us(rbufbc, item2, nbounds, 0) + if nchunk2 <> nchunk: + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk2, cs) + stop = bisect_right_us(rbuflb, item2, cs, 0) + cs*nchunk2 + else: + stop = ss + else: + stop = 0 + length = stop - start + tlength = tlength + length + rbufst[nrow] = start + rbufln[nrow] = length + return tlength + + + # Optimized version for int32 + def _search_bin_na_i(self, long item1, long item2): + cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow + cdef int start, stop, tlength, length, bread, nchunk, nchunk2 + cdef int *rbufst + cdef int *rbufln + + # Variables with specific type + cdef npy_int32 *rbufrv + cdef npy_int32 *rbufbc = NULL + cdef npy_int32 *rbuflb = NULL + + cs = self.l_chunksize + ss = self.l_slicesize + ncs = ss // cs + nbounds = self.nbounds + nrows = self.nrows + rbufst = self.rbufst + rbufln = self.rbufln + rbufrv = self.rbufrv + tlength = 0 + for nrow from 0 <= nrow < nrows: + rvrow = nrow*2 + bread = 0 + nchunk = -1 + # Look if item1 is in this row + if item1 > rbufrv[rvrow]: + if item1 <= rbufrv[rvrow+1]: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + bread = 1 + nchunk = bisect_left_i(rbufbc, item1, nbounds, 0) + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk, cs) + start = bisect_left_i(rbuflb, item1, cs, 0) + cs*nchunk + else: + start = ss + else: + start = 0 + # Now, for item2 + if item2 >= rbufrv[rvrow]: + if item2 < rbufrv[rvrow+1]: + if not bread: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + nchunk2 = bisect_right_i(rbufbc, item2, nbounds, 0) + if nchunk2 <> nchunk: + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk2, cs) + stop = bisect_right_i(rbuflb, item2, cs, 0) + cs*nchunk2 + else: + stop = ss + else: + stop = 0 + length = stop - start + tlength = tlength + length + rbufst[nrow] = start + rbufln[nrow] = length + return tlength + + + # Optimized version for uint32 + def _search_bin_na_ui(self, npy_uint32 item1, npy_uint32 item2): + cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow + cdef int start, stop, tlength, length, bread, nchunk, nchunk2 + cdef int *rbufst + cdef int *rbufln + + # Variables with specific type + cdef npy_uint32 *rbufrv + cdef npy_uint32 *rbufbc = NULL + cdef npy_uint32 *rbuflb = NULL + + cs = self.l_chunksize + ss = self.l_slicesize + ncs = ss // cs + nbounds = self.nbounds + nrows = self.nrows + rbufst = self.rbufst + rbufln = self.rbufln + rbufrv = self.rbufrv + tlength = 0 + for nrow from 0 <= nrow < nrows: + rvrow = nrow*2 + bread = 0 + nchunk = -1 + # Look if item1 is in this row + if item1 > rbufrv[rvrow]: + if item1 <= rbufrv[rvrow+1]: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + bread = 1 + nchunk = bisect_left_ui(rbufbc, item1, nbounds, 0) + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk, cs) + start = bisect_left_ui(rbuflb, item1, cs, 0) + cs*nchunk + else: + start = ss + else: + start = 0 + # Now, for item2 + if item2 >= rbufrv[rvrow]: + if item2 < rbufrv[rvrow+1]: + if not bread: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + nchunk2 = bisect_right_ui(rbufbc, item2, nbounds, 0) + if nchunk2 <> nchunk: + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk2, cs) + stop = bisect_right_ui(rbuflb, item2, cs, 0) + cs*nchunk2 + else: + stop = ss + else: + stop = 0 + length = stop - start + tlength = tlength + length + rbufst[nrow] = start + rbufln[nrow] = length + return tlength + + + # Optimized version for int64 + def _search_bin_na_ll(self, npy_int64 item1, npy_int64 item2): + cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow + cdef int start, stop, tlength, length, bread, nchunk, nchunk2 + cdef int *rbufst + cdef int *rbufln + + # Variables with specific type + cdef npy_int64 *rbufrv + cdef npy_int64 *rbufbc = NULL + cdef npy_int64 *rbuflb = NULL + + cs = self.l_chunksize + ss = self.l_slicesize + ncs = ss // cs + nbounds = self.nbounds + nrows = self.nrows + rbufst = self.rbufst + rbufln = self.rbufln + rbufrv = self.rbufrv + tlength = 0 + for nrow from 0 <= nrow < nrows: + rvrow = nrow*2 + bread = 0 + nchunk = -1 + # Look if item1 is in this row + if item1 > rbufrv[rvrow]: + if item1 <= rbufrv[rvrow+1]: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + bread = 1 + nchunk = bisect_left_ll(rbufbc, item1, nbounds, 0) + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk, cs) + start = bisect_left_ll(rbuflb, item1, cs, 0) + cs*nchunk + else: + start = ss + else: + start = 0 + # Now, for item2 + if item2 >= rbufrv[rvrow]: + if item2 < rbufrv[rvrow+1]: + if not bread: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + nchunk2 = bisect_right_ll(rbufbc, item2, nbounds, 0) + if nchunk2 <> nchunk: + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk2, cs) + stop = bisect_right_ll(rbuflb, item2, cs, 0) + cs*nchunk2 + else: + stop = ss + else: + stop = 0 + length = stop - start + tlength = tlength + length + rbufst[nrow] = start + rbufln[nrow] = length + return tlength + + + # Optimized version for uint64 + def _search_bin_na_ull(self, npy_uint64 item1, npy_uint64 item2): + cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow + cdef int start, stop, tlength, length, bread, nchunk, nchunk2 + cdef int *rbufst + cdef int *rbufln + + # Variables with specific type + cdef npy_uint64 *rbufrv + cdef npy_uint64 *rbufbc = NULL + cdef npy_uint64 *rbuflb = NULL + + cs = self.l_chunksize + ss = self.l_slicesize + ncs = ss // cs + nbounds = self.nbounds + nrows = self.nrows + rbufst = self.rbufst + rbufln = self.rbufln + rbufrv = self.rbufrv + tlength = 0 + for nrow from 0 <= nrow < nrows: + rvrow = nrow*2 + bread = 0 + nchunk = -1 + # Look if item1 is in this row + if item1 > rbufrv[rvrow]: + if item1 <= rbufrv[rvrow+1]: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + bread = 1 + nchunk = bisect_left_ull(rbufbc, item1, nbounds, 0) + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk, cs) + start = bisect_left_ull(rbuflb, item1, cs, 0) + cs*nchunk + else: + start = ss + else: + start = 0 + # Now, for item2 + if item2 >= rbufrv[rvrow]: + if item2 < rbufrv[rvrow+1]: + if not bread: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + nchunk2 = bisect_right_ull(rbufbc, item2, nbounds, 0) + if nchunk2 <> nchunk: + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk2, cs) + stop = bisect_right_ull(rbuflb, item2, cs, 0) + cs*nchunk2 + else: + stop = ss + else: + stop = 0 + length = stop - start + tlength = tlength + length + rbufst[nrow] = start + rbufln[nrow] = length + return tlength + + + # Optimized version for float16 + def _search_bin_na_e(self, npy_float64 item1, npy_float64 item2): + cdef int cs, ss, ncs, nrow, nrows, nrow2, nbounds, rvrow + cdef int start, stop, tlength, length, bread, nchunk, nchunk2 + cdef int *rbufst + cdef int *rbufln + + # Variables with specific type + cdef npy_float16 *rbufrv + cdef npy_float16 *rbufbc = NULL + cdef npy_float16 *rbuflb = NULL + + cs = self.l_chunksize + ss = self.l_slicesize + ncs = ss // cs + nbounds = self.nbounds + nrows = self.nrows + tlength = 0 + rbufst = self.rbufst + rbufln = self.rbufln + # Limits not in cache, do a lookup + rbufrv = self.rbufrv + for nrow from 0 <= nrow < nrows: + rvrow = nrow*2 + bread = 0 + nchunk = -1 + + # Look if item1 is in this row + if item1 > rbufrv[rvrow]: + if item1 <= rbufrv[rvrow+1]: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + bread = 1 + nchunk = bisect_left_e(rbufbc, item1, nbounds, 0) + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk, cs) + start = bisect_left_e(rbuflb, item1, cs, 0) + cs*nchunk + else: + start = ss + else: + start = 0 + # Now, for item2 + if item2 >= rbufrv[rvrow]: + if item2 < rbufrv[rvrow+1]: + if not bread: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + nchunk2 = bisect_right_e(rbufbc, item2, nbounds, 0) + if nchunk2 <> nchunk: + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk2, cs) + stop = bisect_right_e(rbuflb, item2, cs, 0) + cs*nchunk2 + else: + stop = ss + else: + stop = 0 + length = stop - start + tlength = tlength + length + rbufst[nrow] = start + rbufln[nrow] = length + return tlength + + + # Optimized version for float32 + def _search_bin_na_f(self, npy_float64 item1, npy_float64 item2): + cdef int cs, ss, ncs, nrow, nrows, nrow2, nbounds, rvrow + cdef int start, stop, tlength, length, bread, nchunk, nchunk2 + cdef int *rbufst + cdef int *rbufln + # Variables with specific type + cdef npy_float32 *rbufrv + cdef npy_float32 *rbufbc = NULL + cdef npy_float32 *rbuflb = NULL + + cs = self.l_chunksize + ss = self.l_slicesize + ncs = ss // cs + nbounds = self.nbounds + nrows = self.nrows + tlength = 0 + rbufst = self.rbufst + rbufln = self.rbufln + + # Limits not in cache, do a lookup + rbufrv = self.rbufrv + for nrow from 0 <= nrow < nrows: + rvrow = nrow*2 + bread = 0 + nchunk = -1 + # Look if item1 is in this row + if item1 > rbufrv[rvrow]: + if item1 <= rbufrv[rvrow+1]: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + bread = 1 + nchunk = bisect_left_f(rbufbc, item1, nbounds, 0) + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk, cs) + start = bisect_left_f(rbuflb, item1, cs, 0) + cs*nchunk + else: + start = ss + else: + start = 0 + # Now, for item2 + if item2 >= rbufrv[rvrow]: + if item2 < rbufrv[rvrow+1]: + if not bread: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + nchunk2 = bisect_right_f(rbufbc, item2, nbounds, 0) + if nchunk2 <> nchunk: + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk2, cs) + stop = bisect_right_f(rbuflb, item2, cs, 0) + cs*nchunk2 + else: + stop = ss + else: + stop = 0 + length = stop - start + tlength = tlength + length + rbufst[nrow] = start + rbufln[nrow] = length + return tlength + + + # Optimized version for float64 + def _search_bin_na_d(self, npy_float64 item1, npy_float64 item2): + cdef int cs, ss, ncs, nrow, nrows, nrow2, nbounds, rvrow + cdef int start, stop, tlength, length, bread, nchunk, nchunk2 + cdef int *rbufst + cdef int *rbufln + + # Variables with specific type + cdef npy_float64 *rbufrv + cdef npy_float64 *rbufbc = NULL + cdef npy_float64 *rbuflb = NULL + + cs = self.l_chunksize + ss = self.l_slicesize + ncs = ss // cs + nbounds = self.nbounds + nrows = self.nrows + tlength = 0 + rbufst = self.rbufst + rbufln = self.rbufln + + # Limits not in cache, do a lookup + rbufrv = self.rbufrv + for nrow from 0 <= nrow < nrows: + rvrow = nrow*2 + bread = 0 + nchunk = -1 + + # Look if item1 is in this row + if item1 > rbufrv[rvrow]: + if item1 <= rbufrv[rvrow+1]: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + bread = 1 + nchunk = bisect_left_d(rbufbc, item1, nbounds, 0) + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk, cs) + start = bisect_left_d(rbuflb, item1, cs, 0) + cs*nchunk + else: + start = ss + else: + start = 0 + # Now, for item2 + if item2 >= rbufrv[rvrow]: + if item2 < rbufrv[rvrow+1]: + if not bread: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + nchunk2 = bisect_right_d(rbufbc, item2, nbounds, 0) + if nchunk2 <> nchunk: + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk2, cs) + stop = bisect_right_d(rbuflb, item2, cs, 0) + cs*nchunk2 + else: + stop = ss + else: + stop = 0 + length = stop - start + tlength = tlength + length + rbufst[nrow] = start + rbufln[nrow] = length + return tlength + + + # Optimized version for npy_longdouble/float96/float128 + def _search_bin_na_g(self, npy_longdouble item1, npy_longdouble item2): + cdef int cs, ss, ncs, nrow, nrows, nrow2, nbounds, rvrow + cdef int start, stop, tlength, length, bread, nchunk, nchunk2 + cdef int *rbufst + cdef int *rbufln + + # Variables with specific type + cdef npy_longdouble *rbufrv + cdef npy_longdouble *rbufbc = NULL + cdef npy_longdouble *rbuflb = NULL + + cs = self.l_chunksize + ss = self.l_slicesize + ncs = ss // cs + nbounds = self.nbounds + nrows = self.nrows + tlength = 0 + rbufst = self.rbufst + rbufln = self.rbufln + + # Limits not in cache, do a lookup + rbufrv = self.rbufrv + for nrow from 0 <= nrow < nrows: + rvrow = nrow*2 + bread = 0 + nchunk = -1 + + # Look if item1 is in this row + if item1 > rbufrv[rvrow]: + if item1 <= rbufrv[rvrow+1]: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + bread = 1 + nchunk = bisect_left_g(rbufbc, item1, nbounds, 0) + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk, cs) + start = bisect_left_g(rbuflb, item1, cs, 0) + cs*nchunk + else: + start = ss + else: + start = 0 + # Now, for item2 + if item2 >= rbufrv[rvrow]: + if item2 < rbufrv[rvrow+1]: + if not bread: + # Get the bounds row from the LRU cache or read them. + rbufbc = self.get_lru_bounds(nrow, nbounds) + nchunk2 = bisect_right_g(rbufbc, item2, nbounds, 0) + if nchunk2 <> nchunk: + # Get the sorted row from the LRU cache or read it. + rbuflb = self.get_lru_sorted(nrow, ncs, nchunk2, cs) + stop = bisect_right_g(rbuflb, item2, cs, 0) + cs*nchunk2 + else: + stop = ss + else: + stop = 0 + length = stop - start + tlength = tlength + length + rbufst[nrow] = start + rbufln[nrow] = length + return tlength + + + def _g_close(self): + super()._g_close() + # Release specific resources of this class + if self.mem_space_id > 0: + H5Sclose(self.mem_space_id) + + +cdef class LastRowArray(Array): + """ + Container for keeping sorted and indices values of last rows of an index. + """ + + def _read_index_slice(self, hsize_t start, hsize_t stop, ndarray idx): + """Read the reverse index part of an LR index.""" + + cdef void *buf = PyArray_DATA(idx) + with nogil: + ret = H5ARRAYOreadSliceLR(self.dataset_id, self.type_id, + start, stop, buf) + + if ret < 0: + raise HDF5ExtError("Problems reading the index data in Last Row.") + + + def _read_sorted_slice(self, IndexArray sorted, hsize_t start, hsize_t stop): + """Read the sorted part of an LR index.""" + + cdef void *rbuflb + + rbuflb = sorted.rbuflb # direct access to rbuflb: very fast. + with nogil: + ret = H5ARRAYOreadSliceLR(self.dataset_id, self.type_id, + start, stop, rbuflb) + + if ret < 0: + raise HDF5ExtError("Problems reading the index data.") + return sorted.bufferlb[:stop-start] + + + +## Local Variables: +## mode: python +## py-indent-offset: 2 +## tab-width: 2 +## fill-column: 78 +## End: diff --git a/venv/Lib/site-packages/tables/leaf.py b/venv/Lib/site-packages/tables/leaf.py new file mode 100644 index 0000000..95f526b --- /dev/null +++ b/venv/Lib/site-packages/tables/leaf.py @@ -0,0 +1,1063 @@ +"""Here is defined the Leaf class.""" + +from __future__ import annotations + +import json +import math +import warnings +from typing import Any, Literal, NamedTuple, Union, TYPE_CHECKING +from pathlib import Path +from functools import lru_cache + +import numpy as np + +from .node import Node +from .utils import byteorders, lazyattr, SizeType +from .flavor import check_flavor, internal_flavor, toarray +from .flavor import alias_map as flavor_alias_map +from .filters import Filters +from .exceptions import ( + NoSuchChunkError, + NotChunkAlignedError, + NotChunkedError, + PerformanceWarning, +) + +if TYPE_CHECKING: + from .group import Group + + +# These should be declared as type aliases, +# but ``TypeAlias`` requires Python >= 3.10 +# and ``type`` statements require Python >= 3.12. + +# ``np.typing.NDArray[np.uint8]`` requires NumPy >= 1.21. +NPByteArray = np.ndarray[tuple[int], np.dtype[np.uint8]] + +# ``Buffer`` requires Python >= 3.12. +BufferLike = Union[bytes, bytearray, memoryview, NPByteArray] + + +def read_cached_cpu_info() -> dict[str, Any]: + """Load the CPU information from a cache file.""" + try: + with open(Path.home() / ".pytables-cpuinfo.json") as f: + return json.load(f) + except FileNotFoundError: + return {} + + +def write_cached_cpu_info(cpu_info_dict: dict[str, Any]) -> None: + """Write CPU information to a cache file.""" + with open(Path.home() / ".pytables-cpuinfo.json", "w") as f: + json.dump(cpu_info_dict, f, indent=4) + + +@lru_cache(maxsize=1) +def get_cpu_info() -> dict[str, Any]: + """Return a dictionary containing CPU information.""" + cached_info = read_cached_cpu_info() + if cached_info: + return cached_info + + try: + import cpuinfo + except ImportError: + return {} + cpu_info_dict = cpuinfo.get_cpu_info() + try: + write_cached_cpu_info(cpu_info_dict) + except OSError: + # cpu info cannot be stored. + # will need to be recomputed in the next process + pass + return cpu_info_dict + + +def csformula(expected_mb: int) -> int: + """Return the fitted chunksize for expected_mb.""" + # For a basesize of 8 KB, this will return: + # 8 KB for datasets <= 1 MB + # 1 MB for datasets >= 10 TB + basesize = 8 * 1024 # 8 KB is a good minimum + return basesize * int(2 ** math.log10(expected_mb)) + + +def limit_es(expected_mb: int) -> int: + """Protection against creating too small or too large chunks.""" + if expected_mb < 1: # < 1 MB + expected_mb = 1 + elif expected_mb > 10**7: # > 10 TB + expected_mb = 10**7 + return expected_mb + + +def calc_chunksize(expected_mb: int) -> int: + """Compute the optimum HDF5 chunksize for I/O purposes. + + Rational: HDF5 takes the data in bunches of chunksize length to + write the on disk. A BTree in memory is used to map structures on + disk. The more chunks that are allocated for a dataset the larger + the B-tree. Large B-trees take memory and causes file storage + overhead as well as more disk I/O and higher contention for the meta + data cache. You have to balance between memory and I/O overhead + (small B-trees) and time to access to data (big B-trees). + + The tuning of the chunksize parameter affects the performance and + the memory consumed. This is based on my own experiments and, as + always, your mileage may vary. + + """ + expected_mb = limit_es(expected_mb) + zone = int(math.log10(expected_mb)) + expected_mb = 10**zone + chunksize = csformula(expected_mb) + # XXX: Multiply by 8 seems optimal for sequential access + return chunksize * 8 + + +class ChunkInfo(NamedTuple): + """Information about storage for a given chunk. + + It may also refer to a chunk which is within the dataset's shape but that + does not exist in storage, i.e. a missing chunk. + + An instance of this named tuple class contains the following information, + in field order: + + .. attribute:: start + + The coordinates in dataset items where the chunk starts, a tuple of + integers with the same rank as the dataset. These coordinates are + always aligned with chunk boundaries. Also present for missing + chunks. + + .. attribute:: filter_mask + + An integer where each active bit signals that the filter in its + position in the pipeline was disabled when storing the chunk. For + instance, ``0b10`` disables shuffling, ``0b100`` disables szip, and so + on. ``None`` for missing chunks. + + .. attribute:: offset + + An integer which indicates the offset in bytes of chunk data as it + exists in storage. ``None`` for missing chunks. + + .. attribute:: size + + An integer which indicates the size in bytes of chunk data as it + exists in storage. ``None`` for missing chunks. + + """ + + start: tuple[int, ...] | None + filter_mask: int | None + offset: int | None + size: int | None + + +class Leaf(Node): + """Abstract base class for all PyTables leaves. + + A leaf is a node (see the Node class in :class:`Node`) which hangs from a + group (see the Group class in :class:`Group`) but, unlike a group, it can + not have any further children below it (i.e. it is an end node). + + This definition includes all nodes which contain actual data (datasets + handled by the Table - see :ref:`TableClassDescr`, Array - + see :ref:`ArrayClassDescr`, CArray - see :ref:`CArrayClassDescr`, EArray - + see :ref:`EArrayClassDescr`, and VLArray - see :ref:`VLArrayClassDescr` + classes) and unsupported nodes (the UnImplemented + class - :ref:`UnImplementedClassDescr`) these classes do in fact inherit + from Leaf. + + + .. rubric:: Leaf attributes + + These instance variables are provided in addition to those in Node + (see :ref:`NodeClassDescr`): + + .. attribute:: byteorder + + The byte ordering of the leaf data *on disk*. It will be either + ``little`` or ``big``. + + .. attribute:: dtype + + The NumPy dtype that most closely matches this leaf type. + + .. attribute:: extdim + + The index of the enlargeable dimension (-1 if none). + + .. attribute:: nrows + + The length of the main dimension of the leaf data. + + .. attribute:: nrowsinbuf + + The number of rows that fit in internal input buffers. + + You can change this to fine-tune the speed or memory + requirements of your application. + + .. attribute:: shape + + The shape of data in the leaf. + + """ + + # These are a little hard to override, but so are properties. + attrs = Node._v_attrs + """The associated AttributeSet instance - see :ref:`AttributeSetClassDescr` + (This is an easier-to-write alias of :attr:`Node._v_attrs`.""" + title = Node._v_title + """A description for this node + (This is an easier-to-write alias of :attr:`Node._v_title`).""" + + @property + def name(self) -> str: + """Name of the node. + + The name of this node in its parent group (This is an + easier-to-write alias of :attr:`Node._v_name`). + """ + return self._v_name + + @property + def chunkshape(self) -> tuple[int, ...]: + """HDF5 chunk size for chunked leaves (a tuple). + + This is read-only because you cannot change the chunk size of a + leaf once it has been created. + """ + return getattr(self, "_v_chunkshape", None) + + @property + def object_id(self) -> int: + """Node identifier, which may change from run to run. + + (This is an easier-to-write alias of :attr:`Node._v_objectid`). + + .. versionchanged:: 3.0 + The *objectID* property has been renamed into *object_id*. + + """ + return self._v_objectid + + @property + def ndim(self) -> int: + """Return the number of dimensions of the leaf data. + + .. versionadded: 2.4 + """ + return len(self.shape) + + @lazyattr + def filters(self) -> Filters: + """Filter properties for this leaf. + + See Also + -------- + Filters + + """ + return Filters._from_leaf(self) + + @property + def track_times(self) -> bool: + """Return True if the timestamps for the leaf are recorded. + + If the leaf is not a dataset, this will fail with HDF5ExtError. + + The track times dataset creation property does not seem to + survive closing and reopening as of HDF5 1.8.17. Currently, + it may be more accurate to test whether the ctime for the + dataset is 0: + track_times = (leaf._get_obj_timestamps().ctime == 0) + """ + return self._get_obj_track_times() + + @property + def maindim(self) -> int: + """Dimension along which iterators work. + + Its value is 0 (i.e. the first dimension) when the dataset is not + extendable, and self.extdim (where available) for extendable ones. + """ + if self.extdim < 0: + return 0 # choose the first dimension + return self.extdim + + @property + def flavor(self) -> Literal["numpy", "python"]: + """Type of the data object read from this leaf. + + It can be any of 'numpy' or 'python'. + + You can (and are encouraged to) use this property to get, set + and delete the FLAVOR HDF5 attribute of the leaf. When the leaf + has no such attribute, the default flavor is used. + """ + return self._flavor + + @flavor.setter + def flavor(self, flavor: Literal["numpy", "python"]) -> None: + self._v_file._check_writable() + check_flavor(flavor) + self._v_attrs.FLAVOR = self._flavor = flavor # logs the change + + @flavor.deleter + def flavor(self) -> None: + del self._v_attrs.FLAVOR + self._flavor = internal_flavor + + @property + def size_on_disk(self) -> int: + """Size on disk of the object. + + The size of this leaf's data in bytes as it is stored on disk. If the + data is compressed, this shows the compressed size. In the case of + uncompressed, chunked data, this may be slightly larger than the amount + of data, due to partially filled chunks. + """ + return self._get_storage_size() + + def __init__( + self, + parentnode: Group, + name: str, + new: bool = False, + filters: Filters | None = None, + byteorder: Literal["little", "big", None] = None, + _log: bool = True, + track_times: bool = True, + ) -> None: + self._v_new = new + """Is this the first time the node has been created?""" + self.nrowsinbuf: int | None = None + """ + The number of rows that fits in internal input buffers. + + You can change this to fine-tune the speed or memory + requirements of your application. + """ + self._flavor: Literal["numpy", "python", None] = None + """Private storage for the `flavor` property.""" + + if new: + # Get filter properties from parent group if not given. + if filters is None: + filters = parentnode._v_filters + self.__dict__["filters"] = filters # bypass the property + + if byteorder not in (None, "little", "big"): + raise ValueError( + "the byteorder can only take 'little' or 'big' values " + "and you passed: %s" % byteorder + ) + self.byteorder = byteorder + """The byte ordering of the leaf data *on disk*.""" + + self._want_track_times = track_times + + # Existing filters need not be read since `filters` + # is a lazy property that automatically handles their loading. + + super().__init__(parentnode, name, _log) + + def __len__(self) -> int: + """Return the length of the main dimension of the leaf data. + + Please note that this may raise an OverflowError on 32-bit platforms + for datasets having more than 2**31-1 rows. This is a limitation of + Python that you can work around by using the nrows or shape attributes. + + """ + return self.nrows + + def __str__(self) -> str: + """Return the string representation of the object. + + The string representation for this object is its pathname in the + HDF5 object tree plus some additional metainfo. + """ + filters = [] + if self.filters.fletcher32: + filters.append("fletcher32") + if self.filters.complevel: + if self.filters.shuffle: + filters.append("shuffle") + if self.filters.bitshuffle: + filters.append("bitshuffle") + filters.append(f"{self.filters.complib}({self.filters.complevel})") + return ( + f"{self._v_pathname} ({self.__class__.__name__}" + f"{self.shape}{', '.join(filters)}) {self._v_title!r}" + ) + + def _g_post_init_hook(self) -> None: + """Code to be run after node creation and before creation logging. + + This method gets or sets the flavor of the leaf. + + """ + super()._g_post_init_hook() + if self._v_new: # set flavor of new node + if self._flavor is None: + self._flavor = internal_flavor + else: # flavor set at creation time, do not log + if self._v_file.params["PYTABLES_SYS_ATTRS"]: + self._v_attrs._g__setattr("FLAVOR", self._flavor) + else: # get flavor of existing node (if any) + if self._v_file.params["PYTABLES_SYS_ATTRS"]: + flavor = getattr(self._v_attrs, "FLAVOR", internal_flavor) + self._flavor = flavor_alias_map.get(flavor, flavor) + else: + self._flavor = internal_flavor + + def _calc_chunkshape( + self, expectedrows: int, rowsize: int, itemsize: int + ) -> tuple[int, ...]: + """Calculate the shape for the HDF5 chunk.""" + # In case of a scalar shape, return the unit chunksize + if self.shape == (): + return (SizeType(1),) + + # Compute the chunksize + MB = 1024 * 1024 # noqa: N806 + expected_mb = (expectedrows * rowsize) // MB + chunksize = calc_chunksize(expected_mb) + complib = self.filters.complib + if ( + complib is not None + and complib.startswith("blosc2") + and self._c_classid in ("TABLE", "CARRAY", "EARRAY") + ): + # Blosc2 can introspect into blocks, so we can increase the + # chunksize for improving HDF5 perf for its internal btree. + # For the time being, this has been implemented efficiently + # just for tables, but in the future *Array objects could also + # be included. + # Use a decent default value for chunksize + chunksize *= 16 + # Now, go explore the L3 size and try to find a smarter chunksize + cpu_info = get_cpu_info() + if "l3_cache_size" in cpu_info: + # In general, is a good idea to set the chunksize equal to L3 + l3_cache_size = cpu_info["l3_cache_size"] + # cpuinfo sometimes returns cache sizes as strings (like, + # "4096 KB"), so refuse the temptation to guess and use the + # value only when it is an actual int. + # Also, sometimes cpuinfo does not return a correct L3 size; + # so in general, enforcing L3 > L2 is a good sanity check. + l2_cache_size = cpu_info.get("l2_cache_size", "Not found") + if ( + type(l3_cache_size) is int + and type(l2_cache_size) is int + and l3_cache_size > l2_cache_size + ): + chunksize = l3_cache_size + # In Blosc2, the chunksize cannot be larger than 2 GB + # BLOSC2_MAX_BUFFERSIZE + if chunksize > 2**31 - 32: + chunksize = 2**31 - 32 + + maindim = self.maindim + # Compute the chunknitems + chunknitems = chunksize // itemsize + # Safeguard against itemsizes being extremely large + if chunknitems == 0: + chunknitems = 1 + chunkshape = list(self.shape) + # Check whether trimming the main dimension is enough + chunkshape[maindim] = 1 + newchunknitems = np.prod(chunkshape, dtype=SizeType) + if newchunknitems <= chunknitems: + chunkshape[maindim] = chunknitems // newchunknitems + else: + # No, so start trimming other dimensions as well + for j in range(len(chunkshape)): + # Check whether trimming this dimension is enough + chunkshape[j] = 1 + newchunknitems = np.prod(chunkshape, dtype=SizeType) + if newchunknitems <= chunknitems: + chunkshape[j] = chunknitems // newchunknitems + break + else: + # Ops, we ran out of the loop without a break + # Set the last dimension to chunknitems + chunkshape[-1] = chunknitems + + return tuple(SizeType(s) for s in chunkshape) + + def _calc_nrowsinbuf(self) -> int: + """Calculate the number of rows that fits on a PyTables buffer.""" + params = self._v_file.params + # Compute the nrowsinbuf + rowsize = self.rowsize + buffersize = params["IO_BUFFER_SIZE"] + if rowsize != 0: + nrowsinbuf = buffersize // rowsize + else: + nrowsinbuf = 1 + + # Safeguard against row sizes being extremely large + if nrowsinbuf == 0: + nrowsinbuf = 1 + # If rowsize is too large, issue a Performance warning + maxrowsize = params["BUFFER_TIMES"] * buffersize + if rowsize > maxrowsize: + warnings.warn( + f"The Leaf ``{self._v_pathname}`` is exceeding the " + f"maximum recommended rowsize ({maxrowsize} bytes); " + f"be ready to see PyTables asking for *lots* " + f"of memory and possibly slow I/O. " + f"You may want to reduce the rowsize by trimming the " + f"value of dimensions that are orthogonal (and preferably " + f"close) to the *main* dimension of this leave. " + f"Alternatively, in case you have specified a very " + f"small/large chunksize, you may want to " + f"increase/decrease it.", + PerformanceWarning, + ) + return nrowsinbuf + + # This method is appropriate for calls to __getitem__ methods + def _process_range( + self, + start: int, + stop: int, + step: int, + dim: int | None = None, + warn_negstep: bool = True, + ) -> tuple[int, int, int]: + if dim is None: + nrows = self.nrows # self.shape[self.maindim] + else: + nrows = self.shape[dim] + + if warn_negstep and step and step < 0: + raise ValueError("slice step cannot be negative") + + # if start is not None: start = long(start) + # if stop is not None: stop = long(stop) + # if step is not None: step = long(step) + + return slice(start, stop, step).indices(int(nrows)) + + # This method is appropriate for calls to read() methods + def _process_range_read( + self, start: int, stop: int, step: int, warn_negstep: bool = True + ) -> tuple[int, int, int]: + nrows = self.nrows + if start is not None and stop is None and step is None: + # Protection against start greater than available records + # nrows == 0 is a special case for empty objects + if 0 < nrows <= start: + raise IndexError( + "start of range (%s) is greater than " + "number of rows (%s)" % (start, nrows) + ) + step = 1 + if start == -1: # corner case + stop = nrows + else: + stop = start + 1 + # Finally, get the correct values (over the main dimension) + start, stop, step = self._process_range( + start, stop, step, warn_negstep=warn_negstep + ) + return (start, stop, step) + + def _g_copy( + self, + newparent: Group, + newname: str, + recursive: bool, + _log: bool = True, + **kwargs, + ) -> Leaf: + # Compute default arguments. + start = kwargs.pop("start", None) + stop = kwargs.pop("stop", None) + step = kwargs.pop("step", None) + title = kwargs.pop("title", self._v_title) + filters = kwargs.pop("filters", self.filters) + chunkshape = kwargs.pop("chunkshape", self.chunkshape) + copyuserattrs = kwargs.pop("copyuserattrs", True) + stats = kwargs.pop("stats", None) + if chunkshape == "keep": + chunkshape = self.chunkshape # Keep the original chunkshape + elif chunkshape == "auto": + chunkshape = None # Will recompute chunkshape + + # Fix arguments with explicit None values for backwards compatibility. + if title is None: + title = self._v_title + if filters is None: + filters = self.filters + + # Create a copy of the object. + new_node, bytes_ = self._g_copy_with_stats( + newparent, + newname, + start, + stop, + step, + title, + filters, + chunkshape, + _log, + **kwargs, + ) + + # Copy user attributes if requested (or the flavor at least). + if copyuserattrs: + self._v_attrs._g_copy(new_node._v_attrs, copyclass=True) + elif "FLAVOR" in self._v_attrs: + if self._v_file.params["PYTABLES_SYS_ATTRS"]: + new_node._v_attrs._g__setattr("FLAVOR", self._flavor) + new_node._flavor = self._flavor # update cached value + + # Update statistics if needed. + if stats is not None: + stats["leaves"] += 1 + stats["bytes"] += bytes_ + + return new_node + + def _g_fix_byteorder_data( + self, data: np.ndarray, dbyteorder: str + ) -> np.ndarray: + """Fix the byteorder of data passed in constructors.""" + dbyteorder = byteorders[dbyteorder] + # If self.byteorder has not been passed as an argument of + # the constructor, then set it to the same value of data. + if self.byteorder is None: + self.byteorder = dbyteorder + # Do an additional in-place byteswap of data if the in-memory + # byteorder doesn't match that of the on-disk. This is the only + # place that we have to do the conversion manually. In all the + # other cases, it will be HDF5 the responsible for doing the + # byteswap properly. + if dbyteorder in ["little", "big"]: + if dbyteorder != self.byteorder: + # if data is not writeable, do a copy first + if not data.flags.writeable: + data = data.copy() + data.byteswap(True) + else: + # Fix the byteorder again, no matter which byteorder have + # specified the user in the constructor. + self.byteorder = "irrelevant" + return data + + def _point_selection(self, key: list | tuple | np.ndarray) -> np.ndarray: + """Perform a point-wise selection. + + `key` can be any of the following items: + + * A boolean array with the same shape as self. Those positions + with True values will signal the coordinates to be returned. + + * A numpy array (or list or tuple) with the point coordinates. + This has to be a two-dimensional array of size len(self.shape) + by num_elements containing a list of zero-based values + specifying the coordinates in the dataset of the selected + elements. The order of the element coordinates in the array + specifies the order in which the array elements are iterated + through when I/O is performed. Duplicate coordinate locations + are not checked for. + + Return the coordinates array. If this is not possible, raise a + `TypeError` so that the next selection method can be tried out. + + This is useful for whatever `Leaf` instance implementing a + point-wise selection. + + """ + input_key = key + if type(key) in (list, tuple): + if isinstance(key, tuple) and len(key) > len(self.shape): + raise IndexError(f"Invalid index or slice: {key!r}") + # Try to convert key to a numpy array. If not possible, + # a TypeError will be issued (to be catched later on). + try: + key = toarray(key) + except ValueError: + raise TypeError(f"Invalid index or slice: {key!r}") + elif not isinstance(key, np.ndarray): + raise TypeError(f"Invalid index or slice: {key!r}") + + # Protection against empty keys + if len(key) == 0: + return np.array([], dtype="i8") + + if key.dtype.kind == "b": + if not key.shape == self.shape: + raise IndexError( + "Boolean indexing array has incompatible shape" + ) + # Get the True coordinates (64-bit indices!) + coords = np.asarray(key.nonzero(), dtype="i8") + coords = np.transpose(coords) + elif key.dtype.kind == "i" or key.dtype.kind == "u": + if len(key.shape) > 2: + raise IndexError( + "Coordinate indexing array has incompatible shape" + ) + elif len(key.shape) == 2: + if key.shape[0] != len(self.shape): + raise IndexError( + "Coordinate indexing array has incompatible shape" + ) + coords = np.asarray(key, dtype="i8") + coords = np.transpose(coords) + else: + # For 1-dimensional datasets + coords = np.asarray(key, dtype="i8") + + # handle negative indices + base = coords if coords.base is None else coords.base + if base is input_key: + # never modify the original "key" data + coords = coords.copy() + + idx = coords < 0 + coords[idx] = (coords + self.shape)[idx] + + # bounds check + if np.any(coords < 0) or np.any(coords >= self.shape): + raise IndexError("Index out of bounds") + else: + raise TypeError("Only integer coordinates allowed.") + # We absolutely need a contiguous array + if not coords.flags.contiguous: + coords = coords.copy() + return coords + + def _check_chunked(self) -> None: + if self.chunkshape is None: + raise NotChunkedError("The dataset is not chunked") + + def _check_chunk_within(self, coords: tuple[int, ...]) -> None: + if len(coords) != self.ndim: + raise ValueError( + f"Chunk coordinates do not match dataset shape: " + f"{coords} !~ {self.shape}" + ) + if any(c < 0 or c >= s for (c, s) in zip(coords, self.shape)): + raise IndexError( + f"Chunk coordinates not within dataset shape: " + f"{coords} <> {self.shape}" + ) + + def _check_chunk_coords(self, coords: tuple[int, ...]) -> None: + if any(c % cs for (c, cs) in zip(coords, self.chunkshape)): + raise NotChunkAlignedError( + f"Coordinates are not multiples of chunk shape: " + f"{tuple(coords)} !* {self.chunkshape}" + ) + + # Tree manipulation + def remove(self) -> None: + """Remove this node from the hierarchy. + + This method has the behavior described + in :meth:`Node._f_remove`. Please note that there is no recursive flag + since leaves do not have child nodes. + + """ + self._f_remove(False) + + def rename(self, newname: str) -> None: + """Rename this node in place. + + This method has the behavior described in :meth:`Node._f_rename()`. + + """ + self._f_rename(newname) + + def move( + self, + newparent: Group | None = None, + newname: str | None = None, + overwrite: bool = False, + createparents: bool = False, + ) -> None: + """Move or rename this node. + + This method has the behavior described in :meth:`Node._f_move` + + """ + self._f_move(newparent, newname, overwrite, createparents) + + def copy( + self, + newparent: Group | None = None, + newname: str | None = None, + overwrite: bool = False, + createparents: bool = False, + **kwargs, + ) -> Leaf: + """Copy this node and return the new one. + + This method has the behavior described in :meth:`Node._f_copy`. Please + note that there is no recursive flag since leaves do not have child + nodes. + + .. warning:: + + Note that unknown parameters passed to this method will be + ignored, so may want to double check the spelling of these + (i.e. if you write them incorrectly, they will most probably + be ignored). + + Parameters + ---------- + title + The new title for the destination. If omitted or None, the original + title is used. + filters : Filters + Specifying this parameter overrides the original filter properties + in the source node. If specified, it must be an instance of the + Filters class (see :ref:`FiltersClassDescr`). The default is to + copy the filter properties from the source node. + copyuserattrs + You can prevent the user attributes from being copied by setting + this parameter to False. The default is to copy them. + start, stop, step : int + Specify the range of rows to be copied; the default is to copy all + the rows. + stats + This argument may be used to collect statistics on the copy + process. When used, it should be a dictionary with keys 'groups', + 'leaves' and 'bytes' having a numeric value. Their values will be + incremented to reflect the number of groups, leaves and bytes, + respectively, that have been copied during the operation. + chunkshape + The chunkshape of the new leaf. It supports a couple of special + values. A value of keep means that the chunkshape will be the same + as original leaf (this is the default). A value of auto means + that a new shape will be computed automatically in order to ensure + the best performance when accessing the dataset through the main + dimension. Any other value should be an integer or a tuple + matching the dimensions of the leaf. + + """ + return self._f_copy( + newparent, + newname, + overwrite, + createparents=createparents, + **kwargs, + ) + + def truncate(self, size: int) -> None: + """Truncate the main dimension to be size rows. + + If the main dimension previously was larger than this size, the extra + data is lost. If the main dimension previously was shorter, it is + extended, and the extended part is filled with the default values. + + The truncation operation can only be applied to *enlargeable* datasets, + else a TypeError will be raised. + + """ + # A non-enlargeable arrays (Array, CArray) cannot be truncated + if self.extdim < 0: + raise TypeError("non-enlargeable datasets cannot be truncated") + self._g_truncate(size) + + def isvisible(self) -> bool: + """Return True if this node is visible. + + This method has the behavior described in :meth:`Node._f_isvisible()`. + + """ + return self._f_isvisible() + + # Attribute handling + def get_attr(self, name: str) -> Any: + """Get a PyTables attribute from this node. + + This method has the behavior described in :meth:`Node._f_getattr`. + + """ + return self._f_getattr(name) + + def set_attr(self, name: str, value: Any) -> None: + """Set a PyTables attribute for this node. + + This method has the behavior described in :meth:`Node._f_setattr()`. + + """ + self._f_setattr(name, value) + + def del_attr(self, name: str) -> None: + """Delete a PyTables attribute from this node. + + This method has the behavior described in :meth:`Node_f_delAttr`. + + """ + self._f_delattr(name) + + # Data handling + def flush(self) -> None: + """Flush pending data to disk. + + Saves whatever remaining buffered data to disk. It also releases + I/O buffers, so if you are filling many datasets in the same + PyTables session, please call flush() extensively so as to help + PyTables to keep memory requirements low. + + """ + self._g_flush() + + def chunk_info(self, coords: tuple[int, ...]) -> ChunkInfo: + """Get storage information about the chunk containing the `coords`. + + The coordinates `coords` are a tuple of integers with the same rank as + the dataset. + + Return a :class:`ChunkInfo` instance with the information. + + The coordinates need not be aligined with chunk boundaries. This + means that this method may be used to get the start coordinates of the + chunk that contains the item at the given coordinates, for use with + other direct chunking operations (see :attr:`ChunkInfo.start`). + + If the coordinates are within the dataset's shape but there is no such + chunk in storage (missing chunk), a :class:`ChunkInfo` with a valid + ``start`` and ``filter_mask = offset = size = None`` is returned. If + the coordinates are beyond the shape, :exc:`IndexError` is raised + (even if the start of the chunk would fall within the shape). + + Calling this method on a non-chunked dataset raises a + :exc:`NotChunkedError`. + + """ + self._check_chunked() + self._check_chunk_within(coords) + + coords = np.array(coords, dtype=SizeType) + filter_mask, offset, size = self._g_chunk_info(coords) + + # Align coordinates to chunk boundary. + chunkshape = self.chunkshape + coords //= chunkshape + coords *= chunkshape + return ChunkInfo(tuple(coords.tolist()), filter_mask, offset, size) + + def read_chunk( + self, + coords: tuple[int, ...], + out: bytearray | NPByteArray | None = None, + ) -> bytes | memoryview: + """Get the raw chunk that starts at the given `coords` from storage. + + The coordinates `coords` are a tuple of integers with the same rank as + the dataset. If they are not multiples of its chunkshape, + :exc:`NotChunkAlignedError` is raised. + + If a buffer-like `out` argument is given, it receives chunk data. If + it has insufficient storage for the chunk, :exc:`ValueError` is raised + (use :meth:`chunk_info()` to get the required capacity). + + The obtained data is supposed to have gone at storage time through + dataset filters, minus those in the chunk's filter mask (use + :meth:`chunk_info()` to get it). + + Return the chunk's raw content, either as a `bytes` instance (if `out` + is ``None``) or as a `memoryview` over the object given as `out`. + + Reading a chunk within the dataset's shape, but not in storage + (missing chunk) raises a :exc:`NoSuchChunkError`. If the chunk is + beyond the shape, :exc:`IndexError` is raised. + + Calling this method on a non-chunked dataset raises a + :exc:`NotChunkedError`. + + """ + self._check_chunked() + self._check_chunk_within(coords) + self._check_chunk_coords(coords) + + if out is not None: + out = np.ndarray((len(out),), dtype="u1", buffer=out) + + coords = np.array(coords, dtype=SizeType) + chunk = self._g_read_chunk(coords, out) + if chunk is None: + raise NoSuchChunkError( + f"Can't read missing chunk at coordinates " f"{tuple(coords)}" + ) + return chunk.tobytes() if out is None else memoryview(out) + + def write_chunk( + self, coords: tuple[int, ...], data: BufferLike, filter_mask: int = 0 + ) -> None: + """Write `data` to storage for the chunk starting at the given `coords`. + + The coordinates `coords` are a tuple of integers with the same rank as + the dataset. If they are not multiples of its chunkshape, + :exc:`NotChunkAlignedError` is raised. + + The content of the buffer-like `data` must already have gone through + dataset filters, minus those in the given `filter_mask` (which is to + be saved along data; see :attr:`ChunkInfo.filter_mask`). + + Writing a chunk which is already in storage replaces it, otherwise it + is added to storage as long as it is within the dataset's shape + (missing chunk). This means that you may use :meth:`truncate()` to + grow an enlargeable dataset cheaply (as no chunk data is written), + then sparsely write selected chunks in arbitrary order. + + If the chunk is beyond the dataset's shape, :exc:`IndexError` is + raised. + + Calling this method on a non-chunked dataset raises a + :exc:`NotChunkedError`. + + """ + self._check_chunked() + self._check_chunk_within(coords) + self._check_chunk_coords(coords) + + coords = np.array(coords, dtype=SizeType) + data = np.ndarray((len(data),), dtype="u1", buffer=data) + self._g_write_chunk(coords, data, filter_mask) + + def _f_close(self, flush: bool = True) -> None: + """Close this node in the tree. + + This method has the behavior described in :meth:`Node._f_close`. + Besides that, the optional argument flush tells whether to flush + pending data to disk or not before closing. + + """ + if not self._v_isopen: + return # the node is already closed or not initialized + + # Only do a flush in case the leaf has an IO buffer. The + # internal buffers of HDF5 will be flushed afterwards during the + # self._g_close() call. Avoiding an unnecessary flush() + # operation accelerates the closing for the unbuffered leaves. + if flush and hasattr(self, "_v_iobuf"): + self.flush() + + # Close the dataset and release resources + self._g_close() + + # Close myself as a node. + super()._f_close() + + def close(self, flush: bool = True) -> None: + """Close this node in the tree. + + This method is completely equivalent to :meth:`Leaf._f_close`. + + """ + self._f_close(flush) diff --git a/venv/Lib/site-packages/tables/libblosc2.dll b/venv/Lib/site-packages/tables/libblosc2.dll new file mode 100644 index 0000000..5b25357 Binary files /dev/null and b/venv/Lib/site-packages/tables/libblosc2.dll differ diff --git a/venv/Lib/site-packages/tables/link.py b/venv/Lib/site-packages/tables/link.py new file mode 100644 index 0000000..cebf763 --- /dev/null +++ b/venv/Lib/site-packages/tables/link.py @@ -0,0 +1,464 @@ +"""Create links in the HDF5 file. + +This module implements containers for soft and external links. Hard +links doesn't need a container as such as they are the same as regular +nodes (groups or leaves). + +Classes: + + SoftLink + ExternalLink + +Functions: + +Misc variables: + +""" + +from __future__ import annotations + +from typing import Any, Literal, NoReturn, TYPE_CHECKING +from pathlib import Path + +import tables as tb + +from . import linkextension +from .node import Node +from .utils import lazyattr +from .attributeset import AttributeSet + +if TYPE_CHECKING: + from .group import Group + + +def _g_get_link_class( + parent_id: int, name: str +) -> Literal["ExternalLink", "HardLink", "SoftLink", "UnImplemented"]: + """Guess the link class.""" + return linkextension._get_link_class(parent_id, name) + + +class Link(Node): + """Abstract base class for all PyTables links. + + A link is a node that refers to another node. The Link class inherits from + Node class and the links that inherits from Link are SoftLink and + ExternalLink. There is not a HardLink subclass because hard links behave + like a regular Group or Leaf. Contrarily to other nodes, links cannot have + HDF5 attributes. This is an HDF5 library limitation that might be solved + in future releases. + + See :ref:`LinksTutorial` for a small tutorial on how to work with links. + + .. rubric:: Link attributes + + .. attribute:: target + + The path string to the pointed node. + + """ + + # Properties + @lazyattr + def _v_attrs(self) -> AttributeSet: + """Attributes. + + A *NoAttrs* instance replacing the typical *AttributeSet* instance of + other node objects. The purpose of *NoAttrs* is to make clear that + HDF5 attributes are not supported in link nodes. + """ + + class NoAttrs(AttributeSet): + def __getattr__(self, name: str) -> NoReturn: + raise KeyError( + "you cannot get attributes from this " + "`%s` instance" % self.__class__.__name__ + ) + + def __setattr__(self, name: str, value: Any) -> NoReturn: + raise KeyError( + "you cannot set attributes to this " + "`%s` instance" % self.__class__.__name__ + ) + + def _g_close(self) -> None: + pass + + return NoAttrs(self) + + def __init__( + self, + parentnode: Group, + name: str, + target: str | None = None, + _log: bool = False, + ) -> None: + self._v_new = target is not None + self.target = target + """The path string to the pointed node.""" + + super().__init__(parentnode, name, _log) + + # Public and tailored versions for copy, move, rename and remove methods + def copy( + self, + newparent: Group | None = None, + newname: str | None = None, + overwrite: bool = False, + createparents: bool = False, + ) -> Link: + """Copy this link and return the new one. + + See :meth:`Node._f_copy` for a complete explanation of the arguments. + Please note that there is no recursive flag since links do not have + child nodes. + + """ + newnode = self._f_copy( + newparent=newparent, + newname=newname, + overwrite=overwrite, + createparents=createparents, + ) + # Insert references to a `newnode` via `newname` + newnode._v_parent._g_refnode(newnode, newname, True) + return newnode + + def move( + self, + newparent: Group | None = None, + newname: str | None = None, + overwrite: bool = False, + ) -> None: + """Move or rename this link. + + See :meth:`Node._f_move` for a complete explanation of the arguments. + + """ + return self._f_move( + newparent=newparent, newname=newname, overwrite=overwrite + ) + + def remove(self) -> None: + """Remove this link from the hierarchy.""" + return self._f_remove() + + def rename( + self, newname: str | None = None, overwrite: bool = False + ) -> None: + """Rename this link in place. + + See :meth:`Node._f_rename` for a complete explanation of the arguments. + + """ + return self._f_rename(newname=newname, overwrite=overwrite) + + def __repr__(self): + return str(self) + + +class SoftLink(linkextension.SoftLink, Link): + """Represents a soft link (aka symbolic link). + + A soft link is a reference to another node in the *same* file hierarchy. + Provided that the target node exists, its attributes and methods can be + accessed directly from the softlink using the normal `.` syntax. + + Softlinks also have the following public methods/attributes: + + * `target` + * `dereference()` + * `copy()` + * `move()` + * `remove()` + * `rename()` + * `is_dangling()` + + Note that these will override any correspondingly named methods/attributes + of the target node. + + For backwards compatibility, it is also possible to obtain the target node + via the `__call__()` special method (this action is called *dereferencing*; + see below) + + Examples + -------- + :: + + >>> import numpy as np + >>> f = tb.open_file('/tmp/test_softlink.h5', 'w') + >>> a = f.create_array('/', 'A', np.arange(10)) + >>> link_a = f.create_soft_link('/', 'link_A', target='/A') + + # transparent read/write access to a softlinked node + >>> link_a[0] = -1 + >>> link_a[:], link_a.dtype + (array([-1, 1, 2, 3, 4, 5, 6, 7, 8, 9]), dtype('int64')) + + # dereferencing a softlink using the __call__() method + >>> link_a() is a + True + + # SoftLink.remove() overrides Array.remove() + >>> link_a.remove() + >>> print(link_a) + + >>> a[:], a.dtype + (array([-1, 1, 2, 3, 4, 5, 6, 7, 8, 9]), dtype('int64')) + >>> f.close() + + """ + + # Class identifier. + _c_classid = "SOFTLINK" + + # attributes with these names/prefixes are treated as attributes of the + # SoftLink rather than the target node + _link_attrnames = ( + "target", + "dereference", + "is_dangling", + "copy", + "move", + "remove", + "rename", + "__init__", + "__str__", + "__repr__", + "__unicode__", + "__class__", + "__dict__", + ) + _link_attrprefixes = ("_f_", "_c_", "_g_", "_v_") + + def __call__(self) -> Node | None: + """Dereference `self.target` and return the object. + + Examples + -------- + :: + + >>> f = tb.open_file('tables/tests/slink.h5') + >>> f.root.arr2 + /arr2 (SoftLink) -> /arr + >>> print(f.root.arr2()) + /arr (Array(2,)) '' + >>> f.close() + + """ + return self.dereference() + + def dereference(self) -> Node | None: + """Dereference a link.""" + if self._v_isopen: + target = self.target + # Check for relative pathnames + if not self.target.startswith("/"): + target = self._v_parent._g_join(self.target) + return self._v_file._get_node(target) + else: + return None + + def __getattribute__(self, attrname: str) -> Any: + + # get attribute of the SoftLink itself + if ( + attrname in SoftLink._link_attrnames + or attrname[:3] in SoftLink._link_attrprefixes + ): + return object.__getattribute__(self, attrname) + + # get attribute of the target node + elif not self._v_isopen: + raise tb.ClosedNodeError("the node object is closed") + elif self.is_dangling(): + return None + else: + target_node = self.dereference() + try: + # __getattribute__() fails to get children of Groups + return target_node.__getattribute__(attrname) + except AttributeError: + # some node classes (e.g. Array) don't implement __getattr__() + return target_node.__getattr__(attrname) + + def __setattr__(self, attrname: str, value: Any) -> None: + + # set attribute of the SoftLink itself + if ( + attrname in SoftLink._link_attrnames + or attrname[:3] in SoftLink._link_attrprefixes + ): + object.__setattr__(self, attrname, value) + + # set attribute of the target node + elif not self._v_isopen: + raise tb.ClosedNodeError("the node object is closed") + elif self.is_dangling(): + raise ValueError("softlink target does not exist") + else: + self.dereference().__setattr__(attrname, value) + + def __getitem__(self, key: str) -> Any: + """Getitem magic method. + + The __getitem__ must be defined in the SoftLink class in order + for array indexing syntax to work. + """ + if not self._v_isopen: + raise tb.ClosedNodeError("the node object is closed") + elif self.is_dangling(): + raise ValueError("softlink target does not exist") + else: + return self.dereference().__getitem__(key) + + def __setitem__(self, key: str, value: Any) -> None: + """Setitem magic method. + + The __setitem__ method must be defined in the SoftLink class + in order for array indexing syntax to work. + """ + if not self._v_isopen: + raise tb.ClosedNodeError("the node object is closed") + elif self.is_dangling(): + raise ValueError("softlink target does not exist") + else: + self.dereference().__setitem__(key, value) + + def is_dangling(self) -> bool: + """Return True if the link is dangling.""" + return not (self.dereference() in self._v_file) + + def __str__(self) -> str: + """Return a short string representation of the link. + + Examples + -------- + :: + + >>> f = tb.open_file('tables/tests/slink.h5') + >>> f.root.arr2 + /arr2 (SoftLink) -> /arr + >>> f.close() + + """ + target = str(self.target) + # Check for relative pathnames + if not self.target.startswith("/"): + target = self._v_parent._g_join(self.target) + closed = "" if self._v_isopen else "closed " + dangling = "" if target in self._v_file else " (dangling)" + return ( + f"{closed}{self._v_pathname} ({self.__class__.__name__}) -> " + f"{self.target}{dangling}" + ) + + +class ExternalLink(linkextension.ExternalLink, Link): + """Represents an external link. + + An external link is a reference to a node in *another* file. + Getting access to the pointed node (this action is called + *dereferencing*) is done via the :meth:`__call__` special method + (see below). + + .. rubric:: ExternalLink attributes + + .. attribute:: extfile + + The external file handler, if the link has been dereferenced. + In case the link has not been dereferenced yet, its value is + None. + + """ + + # Class identifier. + _c_classid = "EXTERNALLINK" + + def __init__( + self, + parentnode: Group, + name: str, + target: str | None = None, + _log: bool = False, + ) -> None: + self.extfile = None + """The external file handler, if the link has been dereferenced. + In case the link has not been dereferenced yet, its value is + None.""" + super().__init__(parentnode, name, target, _log) + + def _get_filename_node(self) -> tuple[str, str]: + """Return the external filename and nodepath from `self.target`.""" + # This is needed for avoiding the 'C:\\file.h5' filepath notation + filename, target = self.target.split(":/") + return filename, "/" + target + + def __call__(self, **kwargs) -> Node: + """Dereference self.target and return the object. + + You can pass all the arguments supported by the :func:`open_file` + function (except filename, of course) so as to open the referenced + external file. + + Examples + -------- + :: + + >>> f = tb.open_file('tables/tests/elink.h5') + >>> f.root.pep.pep2 + /pep/pep2 (ExternalLink) -> elink2.h5:/pep + >>> pep2 = f.root.pep.pep2(mode='r') # open in 'r'ead mode + >>> print(pep2) + /pep (Group) '' + >>> pep2._v_file.filename # belongs to referenced file + 'tables/tests/elink2.h5' + >>> f.close() + + """ + filename, target = self._get_filename_node() + + if not Path(filename).is_absolute(): + # Resolve the external link with respect to this + # file's directory. See #306. + filename = str(Path(self._v_file.filename).parent / filename) + + if self.extfile is None or not self.extfile.isopen: + self.extfile = tb.open_file(filename, **kwargs) + else: + # XXX: implement better consistency checks + assert self.extfile.filename == filename + assert self.extfile.mode == kwargs.get("mode", "r") + + return self.extfile._get_node(target) + + def umount(self) -> None: + """Safely unmount self.extfile, if opened.""" + extfile = self.extfile + # Close external file, if open + if extfile is not None and extfile.isopen: + extfile.close() + self.extfile = None + + def _f_close(self) -> None: + """Especific close for external links.""" + self.umount() + super()._f_close() + + def __str__(self) -> str: + """Return a short string representation of the link. + + Examples + -------- + :: + + >>> f = tb.open_file('tables/tests/elink.h5') + >>> f.root.pep.pep2 + /pep/pep2 (ExternalLink) -> elink2.h5:/pep + >>> f.close() + + """ + return ( + f"{self._v_pathname} ({self.__class__.__name__}) -> " + f"{self.target}" + ) diff --git a/venv/Lib/site-packages/tables/linkextension.pyd b/venv/Lib/site-packages/tables/linkextension.pyd new file mode 100644 index 0000000..010471a Binary files /dev/null and b/venv/Lib/site-packages/tables/linkextension.pyd differ diff --git a/venv/Lib/site-packages/tables/linkextension.pyx b/venv/Lib/site-packages/tables/linkextension.pyx new file mode 100644 index 0000000..4e3e1cf --- /dev/null +++ b/venv/Lib/site-packages/tables/linkextension.pyx @@ -0,0 +1,283 @@ +######################################################################## +# +# License: BSD +# Created: November 25, 2009 +# Author: Francesc Alted - faltet@pytables.com +# +# $Id$ +# +######################################################################## + +"""Cython functions and classes for supporting links in HDF5.""" + +from .exceptions import HDF5ExtError + +from libc.stdlib cimport malloc, free +from libc.string cimport strlen +from cpython.unicode cimport PyUnicode_DecodeUTF8 + +from .definitions cimport ( + H5P_DEFAULT, + hid_t, + herr_t, + hbool_t, + int64_t, + H5T_cset_t, + haddr_t, +) +from .hdf5extension cimport Node +from .utilsextension cimport cstr_to_pystr + +#---------------------------------------------------------------------- + +# External declarations + +cdef extern from "H5Lpublic.h" nogil: + + ctypedef enum H5L_type_t: + H5L_TYPE_ERROR = (-1), # Invalid link type id + H5L_TYPE_HARD = 0, # Hard link id + H5L_TYPE_SOFT = 1, # Soft link id + H5L_TYPE_EXTERNAL = 64, # External link id + H5L_TYPE_MAX = 255 # Maximum link type id + + # Information struct for link (for H5Lget_info) + cdef union _add_u: + haddr_t address # Address hard link points to + size_t val_size # Size of a soft link or UD link value + + ctypedef struct H5L_info_t: + H5L_type_t type # Type of link + hbool_t corder_valid # Indicate if creation order is valid + int64_t corder # Creation order + H5T_cset_t cset # Character set of link name + _add_u u # Size of a soft link or UD link value + + # Operations with links + herr_t H5Lcreate_hard( + hid_t obj_loc_id, char *obj_name, hid_t link_loc_id, char *link_name, + hid_t lcpl_id, hid_t lapl_id) + + herr_t H5Lcreate_soft( + char *target_path, hid_t link_loc_id, char *link_name, + hid_t lcpl_id, hid_t lapl_id) + + herr_t H5Lcreate_external( + char *file_name, char *object_name, hid_t link_loc_id, char *link_name, + hid_t lcpl_id, hid_t lapl_id) + + herr_t H5Lget_info( + hid_t link_loc_id, char *link_name, H5L_info_t *link_buff, + hid_t lapl_id) + + herr_t H5Lget_val( + hid_t link_loc_id, char *link_name, void *linkval_buff, size_t size, + hid_t lapl_id) + + herr_t H5Lunpack_elink_val( + char *ext_linkval, size_t link_size, unsigned *flags, + const char **filename, const char **obj_path) + + herr_t H5Lcopy( + hid_t src_loc_id, char *src_name, hid_t dest_loc_id, char *dest_name, + hid_t lcpl_id, hid_t lapl_id) + + +#---------------------------------------------------------------------- + +# Helper functions + +def _get_link_class(parent_id, name): + """Guess the link class.""" + + cdef herr_t ret + cdef H5L_info_t link_buff + cdef H5L_type_t link_type + + ret = H5Lget_info(parent_id, name, &link_buff, H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("failed to get info about link") + + link_type = link_buff.type + if link_type == H5L_TYPE_SOFT: + return "SoftLink" + elif link_type == H5L_TYPE_EXTERNAL: + return "ExternalLink" + # elif link_type == H5L_TYPE_HARD: + # return "HardLink" + else: + return "UnImplemented" + + + + +def _g_create_hard_link(parentnode, str name, targetnode): + """Create a hard link in the file.""" + + cdef herr_t ret + cdef bytes encoded_name = name.encode('utf-8') + cdef bytes encoded_v_name = targetnode._v_name.encode('utf-8') + + ret = H5Lcreate_hard(targetnode._v_parent._v_objectid, encoded_v_name, + parentnode._v_objectid, encoded_name, + H5P_DEFAULT, H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("failed to create HDF5 hard link") + + + + +#---------------------------------------------------------------------- + +# Public classes + +cdef class Link(Node): + """Extension class from which all link extensions inherits.""" + + def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs): + """Private part for the _f_copy() method.""" + + cdef herr_t ret + cdef object stats + cdef bytes encoded_name, encoded_newname + + encoded_name = self.name.encode('utf-8') + encoded_newname = newname.encode('utf-8') + + # @TODO: set property list --> utf-8 + ret = H5Lcopy(self.parent_id, encoded_name, newparent._v_objectid, + encoded_newname, H5P_DEFAULT, H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("failed to copy HDF5 link") + + # Update statistics if needed. + stats = kwargs.get('stats', None) + if stats is not None: + stats['links'] += 1 + + return newparent._v_file.get_node(newparent, newname) + + +cdef class SoftLink(Link): + """Extension class representing a soft link.""" + + def _g_create(self): + """Create the link in file.""" + + cdef herr_t ret + cdef bytes encoded_name = self.name.encode('utf-8') + cdef bytes encoded_target = self.target.encode('utf-8') + + ret = H5Lcreate_soft(encoded_target, self.parent_id, encoded_name, + H5P_DEFAULT, H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("failed to create HDF5 soft link") + + return 0 # Object ID is zero'ed, as HDF5 does not assign one for links + + def _g_open(self): + """Open the link in file.""" + + cdef herr_t ret + cdef H5L_info_t link_buff + cdef size_t val_size + cdef char *clinkval + cdef bytes encoded_name + + encoded_name = self.name.encode('utf-8') + + ret = H5Lget_info(self.parent_id, encoded_name, &link_buff, H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("failed to get info about soft link") + + val_size = link_buff.u.val_size + clinkval = malloc(val_size) + + ret = H5Lget_val(self.parent_id, encoded_name, clinkval, val_size, + H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("failed to get target value") + + self.target = PyUnicode_DecodeUTF8(clinkval, strlen(clinkval), NULL) + + # Release resources + free(clinkval) + return 0 # Object ID is zero'ed, as HDF5 does not assign one for links + + +cdef class ExternalLink(Link): + """Extension class representing an external link.""" + + def _g_create(self): + """Create the link in file.""" + + cdef herr_t ret + cdef bytes encoded_name, encoded_filename, encoded_target + + encoded_name = self.name.encode('utf-8') + + filename, target = self._get_filename_node() + encoded_filename = filename.encode('utf-8') + encoded_target = target.encode('utf-8') + + ret = H5Lcreate_external(encoded_filename, encoded_target, + self.parent_id, encoded_name, + H5P_DEFAULT, H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("failed to create HDF5 external link") + + return 0 # Object ID is zero'ed, as HDF5 does not assign one for links + + def _g_open(self): + """Open the link in file.""" + + cdef herr_t ret + cdef H5L_info_t link_buff + cdef size_t val_size + cdef char *clinkval + cdef char *cfilename + cdef char *c_obj_path + cdef unsigned flags + cdef bytes encoded_name + cdef str filename, obj_path + + encoded_name = self.name.encode('utf-8') + + ret = H5Lget_info(self.parent_id, encoded_name, &link_buff, H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("failed to get info about external link") + + val_size = link_buff.u.val_size + clinkval = malloc(val_size) + + ret = H5Lget_val(self.parent_id, encoded_name, clinkval, val_size, + H5P_DEFAULT) + if ret < 0: + raise HDF5ExtError("failed to get target value") + + ret = H5Lunpack_elink_val(clinkval, val_size, &flags, + &cfilename, + &c_obj_path) + if ret < 0: + raise HDF5ExtError("failed to unpack external link value") + + filename = cstr_to_pystr(cfilename) + obj_path = cstr_to_pystr(c_obj_path) + + self.target = filename+':'+obj_path + + # Release resources + free(clinkval) + return 0 # Object ID is zero'ed, as HDF5 does not assign one for links + + def _get_obj_info(self): + # ExternalLink do not have ObjectId. Hardcode addr and rc to 0, 1 + return 0, 1 + + +## Local Variables: +## mode: python +## py-indent-offset: 2 +## tab-width: 2 +## fill-column: 78 +## End: diff --git a/venv/Lib/site-packages/tables/lrucacheextension.pxd b/venv/Lib/site-packages/tables/lrucacheextension.pxd new file mode 100644 index 0000000..bf8a742 --- /dev/null +++ b/venv/Lib/site-packages/tables/lrucacheextension.pxd @@ -0,0 +1,83 @@ +######################################################################## +# +# License: BSD +# Created: +# Author: Francesc Alted - faltet@pytables.com +# +# $Id$ +# +######################################################################## + +from numpy cimport ndarray + + +# Declaration of instance variables for shared classes +# The NodeCache class is useful for caching general objects (like Nodes). +cdef class NodeCache: + cdef readonly long nslots + cdef long nextslot + cdef object nodes, paths + cdef object setitem(self, object path, object node) + cdef long getslot(self, object path) + cdef object cpop(self, object path) + + +# Base class for other caches +cdef class BaseCache: + cdef int iscachedisabled, incsetcount + cdef long setcount, getcount, containscount + cdef long disablecyclecount, disableeverycycles + cdef long enablecyclecount, enableeverycycles + cdef double nprobes, hitratio + cdef long seqn_, nextslot, nslots + cdef long *ratimes + cdef double lowesthr + cdef ndarray atimes + cdef object name + cdef int checkhitratio(self) + cdef int couldenablecache_(self) + cdef long incseqn(self) + + +# Helper class for ObjectCache +cdef class ObjectNode: + cdef object key, obj + cdef long nslot + + +# The ObjectCache class is useful for general python objects +cdef class ObjectCache(BaseCache): + cdef long maxcachesize, cachesize, maxobjsize + cdef long *rsizes + cdef ndarray sizes + cdef object __list, __dict + cdef ObjectNode mrunode + cdef removeslot_(self, long nslot) + cdef clearcache_(self) + cdef updateslot_(self, long nslot, long size, object key, object value) + cdef long setitem_(self, object key, object value, long size) + cdef long getslot_(self, object key) + cdef object getitem_(self, long nslot) + + +# The NumCache class is useful for caching numerical data in an efficient way +cdef class NumCache(BaseCache): + cdef long itemsize, slotsize + cdef ndarray cacheobj, keys + cdef void *rcache + cdef long long *rkeys + cdef object __dict + cdef void *getaddrslot_(self, long nslot) + cdef long setitem_(self, long long key, void *data, long start) + cdef long setitem1_(self, long long key) + cdef long getslot_(self, long long key) + cdef getitem_(self, long nslot, void *data, long start) + cdef void *getitem1_(self, long nslot) + + +## Local Variables: +## mode: python +## py-indent-offset: 2 +## tab-width: 2 +## fill-column: 78 +## End: diff --git a/venv/Lib/site-packages/tables/lrucacheextension.pyd b/venv/Lib/site-packages/tables/lrucacheextension.pyd new file mode 100644 index 0000000..ba0d52d Binary files /dev/null and b/venv/Lib/site-packages/tables/lrucacheextension.pyd differ diff --git a/venv/Lib/site-packages/tables/lrucacheextension.pyx b/venv/Lib/site-packages/tables/lrucacheextension.pyx new file mode 100644 index 0000000..e8336d4 --- /dev/null +++ b/venv/Lib/site-packages/tables/lrucacheextension.pyx @@ -0,0 +1,641 @@ +######################################################################## +# +# License: BSD +# Created: Aug 13, 2006 +# Author: Francesc Alted - faltet@pytables.com +# +# $Id: $ +# +######################################################################## + +"""Cython interface for several LRU cache systems. + +Classes (type extensions): + + NodeCache + ObjectCache + NumCache + +Functions: + +Misc variables: + +""" + +cdef extern from "Python.h": + int PyUnicode_Compare(object, object) + +import sys + +import numpy as np + +from numpy cimport import_array, ndarray, PyArray_DATA +from libc.string cimport memcpy, strcmp +from cpython.unicode cimport PyUnicode_Check + +from .parameters import ( + DISABLE_EVERY_CYCLES, + ENABLE_EVERY_CYCLES, + LOWEST_HIT_RATIO, +) + +#---------------------------------------------------------------------------- +# Initialization code. +# The numpy API requires this function to be called before +# using any numpy facilities in an extension module. +import_array() +#---------------------------------------------------------------------------- + + +# ------- Minimalist NodeCache for nodes in PyTables --------- + +# The next NodeCache code relies on the fact that a node that is +# fetched from the cache will be removed from it. Said in other words: +# "A node cannot be alive and dead at the same time." + +# Thanks to the above behaviour, the next code has been stripped down +# to a bare minimum (the info in cache is kept in just 2 lists). + +#*********************** Important note! ***************************** +# The code behind has been carefully tuned to serve the needs of +# PyTables cache for nodes. As a consequence, it is no longer +# appropriate as a general LRU cache implementation. You have been +# warned!. F. Alted 2006-08-08 +#********************************************************************* + + +cdef class NodeCache: + """Least-Recently-Used (LRU) cache for PyTables nodes.""" + + def __init__(self, nslots): + """Maximum nslots of the cache. + + If more than 'nslots' elements are added to the cache, + the least-recently-used ones will be discarded. + + """ + + if nslots < 0: + raise ValueError("Negative number (%s) of slots!" % nslots) + self.nslots = nslots + self.nextslot = 0 + self.nodes = [] + self.paths = [] + + def __len__(self): + return len(self.nodes) + + def __setitem__(self, path, node): + self.setitem(path, node) + + cdef setitem(self, object path, object node): + """Puts a new node in the node list.""" + + if self.nslots == 0: # Oops, the cache is set to empty + return + # Check if we are growing out of space + if self.nextslot == self.nslots: + # It is critical to reduce nextslot *before* the preemption of + # the LRU node. If not, this can lead with problems in situations + # with very small caches (length 1 or so). + # F. Alted 2008-10-22 + self.nextslot = self.nextslot - 1 + # Remove the LRU node and path (the start of the lists) + del self.nodes[0] + del self.paths[0] + # The equality protection has been put for situations in which a + # node is being preempted and added simultaneously (with very small + # caches). + if len(self.nodes) == len(self.paths): + # Add the node and path to the end of its lists + self.nodes.append(node) + self.paths.append(path) + self.nextslot = self.nextslot + 1 + + def __contains__(self, path): + if self.getslot(path) == -1: + return 0 + else: + return 1 + + cdef long getslot(self, object path): + """Checks whether path is in this cache or not.""" + + cdef long i, nslot, compare + + nslot = -1 # -1 means not found + if PyUnicode_Check(path): + # Start looking from the trailing values (most recently used) + for i from self.nextslot > i >= 0: + #if strcmp(encoded_path, self.paths[i]) == 0: + if PyUnicode_Compare(path, self.paths[i]) == 0: + nslot = i + break + else: + # Start looking from the trailing values (most recently used) + for i from self.nextslot > i >= 0: + #if strcmp(path, self.paths[i]) == 0: + if PyUnicode_Check(self.paths[i]): + compare = PyUnicode_Compare(path, self.paths[i]) + else: + compare = strcmp(path, self.paths[i]) + if compare == 0: + nslot = i + break + + return nslot + + __marker = object() + + def pop(self, path, d=__marker): + try: + node = self.cpop(path) + except KeyError: + if d is not self.__marker: + return d + else: + raise + else: + return node + + cdef object cpop(self, object path): + cdef long nslot + + nslot = self.getslot(path) + if nslot == -1: + raise KeyError(path) + else: + node = self.nodes[nslot] + del self.nodes[nslot] + del self.paths[nslot] + self.nextslot = self.nextslot - 1 + return node + + def __iter__(self): + # Do a copy of the paths list because it can be modified in the middle of + # the iterator! + copy = self.paths[:] + return iter(copy) + + def __repr__(self): + return "<%s (%d elements)>" % (str(self.__class__), len(self.paths)) + + +######################################################################## +# Common code for other LRU cache classes +######################################################################## + +cdef class BaseCache: + """Base class that implements automatic probing/disabling of the cache.""" + + def __init__(self, long nslots, object name): + + if nslots < 0: + raise ValueError("Negative number (%s) of slots!" % nslots) + self.setcount = 0; self.getcount = 0; self.containscount = 0 + self.enablecyclecount = 0; self.disablecyclecount = 0 + self.iscachedisabled = False # Cache is enabled by default + self.disableeverycycles = DISABLE_EVERY_CYCLES + self.enableeverycycles = ENABLE_EVERY_CYCLES + self.lowesthr = LOWEST_HIT_RATIO + self.nprobes = 0.0; self.hitratio = 0.0 + self.nslots = nslots + self.seqn_ = 0; self.nextslot = 0 + self.name = name + self.incsetcount = False + # The array for keeping the access times (using long ints here) + self.atimes = np.zeros(shape=nslots, dtype=np.int_) + self.ratimes = PyArray_DATA(self.atimes) + + def __len__(self): + return self.nslots + + # Machinery for determining whether the hit ratio is being effective + # or not. If not, the cache will be disabled. The efficency will be + # checked every cycle (the time that the cache would be refilled + # completely). In situations where the cache is not being re-filled + # (i.e. it is not enabled) for a long time, it is forced to be + # re-enabled when a certain number of cycles has passed so as to + # check whether a new scenario where the cache can be useful again + # has come. + # F. Alted 2006-08-09 + cdef int checkhitratio(self): + cdef double hitratio + cdef long nslot + + if self.setcount > self.nslots: + self.disablecyclecount = self.disablecyclecount + 1 + self.enablecyclecount = self.enablecyclecount + 1 + self.nprobes = self.nprobes + 1 + hitratio = self.getcount / self.containscount + self.hitratio = self.hitratio + hitratio + # Reset the hit counters + self.setcount = 0; self.getcount = 0; self.containscount = 0 + if (not self.iscachedisabled and + self.disablecyclecount >= self.disableeverycycles): + # Check whether the cache is being effective or not + if hitratio < self.lowesthr: + # Hit ratio is low. Disable the cache. + self.iscachedisabled = True + else: + # Hit ratio is acceptable. (Re-)Enable the cache. + self.iscachedisabled = False + self.disablecyclecount = 0 + if self.enablecyclecount >= self.enableeverycycles: + # We have reached the time for forcing the cache to act again + self.iscachedisabled = False + self.enablecyclecount = 0 + return not self.iscachedisabled + + def couldenablecache(self): + return self.couldenablecache_() + + # Check whether the cache is enabled or *could* be enabled in the next + # setitem operation. This method can be used in order to probe whether + # an (expensive) operation to be done before a .setitem() is worth the + # effort or not. + cdef int couldenablecache_(self): + + if self.nslots == 0: + return False + # Increment setitem because it can be that .setitem() doesn't + # get called after calling this. + self.setcount = self.setcount + 1; self.incsetcount = True + if self.iscachedisabled: + if self.setcount == self.nslots: + # The cache *could* be enabled in the next setitem operation + return True + else: + return False + else: + return True + + # Increase the access time (implemented as a C long sequence) + cdef long incseqn(self): + + self.seqn_ = self.seqn_ + 1 + if self.seqn_ < 0: + # Ooops, the counter has run out of range! Reset all the access times. + self.atimes[:] = sys.maxsize + # Set the counter to 1 (to indicate that it is newer than existing ones) + self.seqn_ = 1 + return self.seqn_ + + def __repr__(self): + return "<%s(%s) (%d elements)>" % (self.name, str(self.__class__), + self.nslots) + + +######################################################################## +# Helper class for ObjectCache +######################################################################## + +cdef class ObjectNode: + """Record of a cached value. Not for public consumption.""" + + def __init__(self, object key, object obj, long nslot): + object.__init__(self) + self.key = key + self.obj = obj + self.nslot = nslot + + def __repr__(self): + return "<%s %s (slot #%s) => %s>" % (self.__class__, self.key, self.nslot, + self.object) + + +######################################################################## +# Minimalistic LRU cache implementation for general python objects +# This is a *true* general lru cache for python objects +######################################################################## + +cdef class ObjectCache(BaseCache): + """Least-Recently-Used (LRU) cache specific for python objects.""" + + def __init__(self, long nslots, long maxcachesize, object name): + """Maximum size of the cache. + + If more than 'nslots' elements are added to the cache, + the least-recently-used ones will be discarded. + + Parameters: + nslots - The number of slots in cache + name - A descriptive name for this cache + + """ + + super().__init__(nslots, name) + self.cachesize = 0 + self.maxcachesize = maxcachesize + # maxobjsize will be the same as the maximum cache size + self.maxobjsize = maxcachesize + self.__list = [None]*nslots + self.__dict = {} + self.mrunode = None # Most Recent Used node + # The array for keeping the object size (using long ints here) + self.sizes = np.zeros(shape=nslots, dtype=np.int_) + self.rsizes = PyArray_DATA(self.sizes) + + # Clear cache + cdef clearcache_(self): + self.__list = [None]*self.nslots + self.__dict = {} + self.mrunode = None + self.cachesize = 0 + self.nextslot = 0 + self.seqn_ = 0 + + # Remove a slot (if it exists in cache) + cdef removeslot_(self, long nslot): + cdef ObjectNode node + + assert nslot < self.nslots, "Attempting to remove beyond cache capacity." + node = self.__list[nslot] + if node is not None: + self.__list[nslot] = None + del self.__dict[node.key] + self.cachesize = self.cachesize - self.rsizes[nslot] + self.rsizes[nslot] = 0 + if self.mrunode and self.mrunode.nslot == nslot: + self.mrunode = None + # The next slot to be updated will be this one + self.nextslot = nslot + + # Update a slot + cdef updateslot_(self, long nslot, long size, object key, object value): + cdef ObjectNode node, oldnode + cdef long nslot1, nslot2 + cdef object lruidx + + assert nslot < self.nslots, "Number of nodes exceeding cache capacity." + # Remove the previous nslot + self.removeslot_(nslot) + # Protection against too large data cache size + while size + self.cachesize > self.maxcachesize: + # Remove the LRU node among the 10 largest ones + largidx = self.sizes.argsort()[-10:] + nslot1 = self.atimes[largidx].argmin() + nslot2 = largidx[nslot1] + self.removeslot_(nslot2) + # Insert the new one + node = ObjectNode(key, value, nslot) + self.ratimes[nslot] = self.incseqn() + self.rsizes[nslot] = size + self.__list[nslot] = node + self.__dict[key] = node + self.mrunode = node + self.cachesize = self.cachesize + size + # The next slot to update will be the LRU + self.nextslot = self.atimes.argmin() + + # Put the object to the data in cache (for Python calls) + def setitem(self, object key, object value, object size): + return self.setitem_(key, value, size) + + # Put the object in cache (for cython calls) + # size can be the exact size of the value object or an estimation. + cdef long setitem_(self, object key, object value, long size): + cdef long nslot + + if self.nslots == 0: # The cache has been set to empty + return -1 + nslot = -1 + # Perhaps setcount has been already incremented in couldenablecache() + if not self.incsetcount: + self.setcount = self.setcount + 1 + else: + self.incsetcount = False + if size > self.maxobjsize: # Check if the object is too large + return -1 + if self.checkhitratio(): + nslot = self.nextslot + self.updateslot_(nslot, size, key, value) + else: + # Empty the cache because it is not effective and it is taking space + self.clearcache_() + return nslot + + # Tells whether the key is in cache or not + def __contains__(self, object key): + return self.__dict.has_key(key) + + # Tells in which slot the key is. If not found, -1 is returned. + def getslot(self, object key): + return self.getslot_(key) + + # Tells in which slot the key is. If not found, -1 is returned. + cdef long getslot_(self, object key): + cdef ObjectNode node + + if self.nslots == 0: # The cache has been set to empty + return -1 + self.containscount = self.containscount + 1 + # Give a chance to the MRU node + node = self.mrunode + if node and node.key == key: + return node.nslot + # No luck. Look in the dictionary. + node = self.__dict.get(key) + if node is None: + return -1 + return node.nslot + + # Return the object to the data in cache (for Python calls) + def getitem(self, object nslot): + return self.getitem_(nslot) + + # Return the object to the data in cache (for cython calls) + cdef object getitem_(self, long nslot): + cdef ObjectNode node + + self.getcount = self.getcount + 1 + node = self.__list[nslot] + self.ratimes[nslot] = self.incseqn() + self.mrunode = node + return node.obj + + def __repr__(self): + if self.nprobes > 0: + hitratio = self.hitratio / self.nprobes + else: + hitratio = self.getcount / self.containscount + return """<%s(%s) + (%d maxslots, %d slots used, %.3f KB cachesize, + hit ratio: %.3f, disabled? %s)> + """ % (self.name, str(self.__class__), self.nslots, self.nextslot, + self.cachesize / 1024., hitratio, self.iscachedisabled) + + +################################################################### +# Minimalistic LRU cache implementation for numerical data +################################################################### +# The next code is more efficient in situations where efficiency is low. +################################################################### + +#*********************** Important note! **************************** +# The code behind has been carefully tuned to serve the needs of +# caching numerical data. As a consequence, it is no longer appropriate +# as a general LRU cache implementation. You have been warned!. +# F. Alted 2006-08-09 +#******************************************************************** + +cdef class NumCache(BaseCache): + """Least-Recently-Used (LRU) cache specific for Numerical data.""" + + def __init__(self, object shape, object dtype, object name): + """Maximum size of the cache. + + If more than 'nslots' elements are added to the cache, + the least-recently-used ones will be discarded. + + Parameters: + shape - The rectangular shape of the cache (nslots, nelemsperslot) + itemsize - The size of the element base in cache + name - A descriptive name for this cache + + """ + + cdef long nslots + + nslots = shape[0]; self.slotsize = shape[1] + if nslots >= 1<<16: + # nslots can't be higher than 2**16. Will silently trunk the number. + nslots = ((1<<16)-1) # Cast makes cython happy here + super().__init__(nslots, name) + self.itemsize = dtype.itemsize + self.__dict = {} + # The cache object where all data will go + # The last slot is to allow the setitem1_ method to still return + # a valid scratch area for writing purposes + self.cacheobj = np.empty(shape=(nslots+1, self.slotsize), + dtype=dtype) + self.rcache = PyArray_DATA(self.cacheobj) + # The array for keeping the keys of slots + self.keys = (-np.ones(shape=nslots, dtype=np.int64)) + self.rkeys = PyArray_DATA(self.keys) + + # Returns the address of nslot + cdef void *getaddrslot_(self, long nslot): + if nslot >= 0: + return self.rcache + nslot * self.slotsize * self.itemsize + else: + return self.rcache + self.nslots * self.slotsize * self.itemsize + + def setitem(self, long long key, ndarray nparr, long start): + return self.setitem_(key, PyArray_DATA(nparr), start) + + # Copy the new data into a cache slot + cdef long setitem_(self, long long key, void *data, long start): + cdef long nslot + + nslot = self.setitem1_(key) + if nslot >= 0: + # Copy the data to cache + memcpy(self.rcache + nslot * self.slotsize * self.itemsize, + data + start * self.itemsize, + self.slotsize * self.itemsize) + return nslot + + # Return a cache data pointer appropriate to save data. + # Even if the cache is disabled, this will return a -1, which is + # the last element in the cache. + # This version avoids a memcpy of data, but the user should be + # aware that data in nslot cannot be overwritten! + cdef long setitem1_(self, long long key): + cdef long nslot + cdef object key2 + + if self.nslots == 0: # Oops, the cache is set to empty + return -1 + # Perhaps setcount has been already incremented in couldenablecache() + if not self.incsetcount: + self.setcount = self.setcount + 1 + else: + self.incsetcount = False + nslot = -1 + if self.checkhitratio(): + # Check if we are growing out of space + if self.nextslot == self.nslots: + # Get the least recently used slot + nslot = self.atimes.argmin() + # Remove the slot from the dict + key2 = self.keys[nslot] + del self.__dict[key2] + self.nextslot = self.nextslot - 1 + else: + # Get the next slot available + nslot = self.nextslot + # Insert the slot in the dictionary + self.__dict[key] = nslot + self.keys[nslot] = key + self.ratimes[nslot] = self.incseqn() + self.nextslot = self.nextslot + 1 + # The next reduces the performance of the cache in scenarios where + # the efficicency is near to zero. I don't understand exactly why. + # F. Alted 24-03-2008 + elif self.nextslot > 0: + # Empty the cache if needed + self.__dict.clear() + self.nextslot = 0 + return nslot + + def getslot(self, long long key): + return self.getslot_(key) + + # Tells in which slot key is. If not found, -1 is returned. + cdef long getslot_(self, long long key): + cdef object nslot + + self.containscount = self.containscount + 1 + if self.nextslot == 0: # No chances for finding a slot + return -1 + try: + nslot = self.__dict[key] + except KeyError: + return -1 + return nslot + + def getitem(self, long nslot, ndarray nparr, long start): + self.getitem_(nslot, PyArray_DATA(nparr), start) + + # This version copies data in cache to data+start. + # The user should be responsible to provide a large enough data buffer + # to keep all the data. + cdef getitem_(self, long nslot, void *data, long start): + cdef void *cachedata + + cachedata = self.getitem1_(nslot) + # Copy the data in cache to destination + memcpy(data + start * self.itemsize, cachedata, + self.slotsize * self.itemsize) + + # Return the pointer to the data in cache + # This version avoids a memcpy of data, but the user should be + # aware that data in nslot cannot be overwritten! + cdef void *getitem1_(self, long nslot): + + self.getcount = self.getcount + 1 + self.ratimes[nslot] = self.incseqn() + return self.rcache + nslot * self.slotsize * self.itemsize + + def __repr__(self): + cachesize = (self.nslots * self.slotsize * self.itemsize) / 1024. + if self.nprobes > 0: + hitratio = self.hitratio / self.nprobes + elif self.containscount > 0: + hitratio = self.getcount / self.containscount + else: + hitratio = np.nan + return """<%s(%s) + (%d maxslots, %d slots used, %.3f KB cachesize, + hit ratio: %.3f, disabled? %s)> + """ % (self.name, str(self.__class__), self.nslots, self.nextslot, + cachesize, hitratio, self.iscachedisabled) + + +## Local Variables: +## mode: python +## py-indent-offset: 2 +## tab-width: 2 +## fill-column: 78 +## End: diff --git a/venv/Lib/site-packages/tables/misc/__init__.py b/venv/Lib/site-packages/tables/misc/__init__.py new file mode 100644 index 0000000..cd3992e --- /dev/null +++ b/venv/Lib/site-packages/tables/misc/__init__.py @@ -0,0 +1,6 @@ +"""Miscellaneous general-purpose modules. + +The purpose, authorship and license of modules in this package is +diverse, and they may be useful outside of PyTables. Please read +their source code for further information. +""" diff --git a/venv/Lib/site-packages/tables/misc/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/tables/misc/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..53f6fab Binary files /dev/null and b/venv/Lib/site-packages/tables/misc/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/misc/__pycache__/enum.cpython-311.pyc b/venv/Lib/site-packages/tables/misc/__pycache__/enum.cpython-311.pyc new file mode 100644 index 0000000..1dbd388 Binary files /dev/null and b/venv/Lib/site-packages/tables/misc/__pycache__/enum.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/misc/__pycache__/proxydict.cpython-311.pyc b/venv/Lib/site-packages/tables/misc/__pycache__/proxydict.cpython-311.pyc new file mode 100644 index 0000000..22cbb73 Binary files /dev/null and b/venv/Lib/site-packages/tables/misc/__pycache__/proxydict.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/misc/enum.py b/venv/Lib/site-packages/tables/misc/enum.py new file mode 100644 index 0000000..acfec7f --- /dev/null +++ b/venv/Lib/site-packages/tables/misc/enum.py @@ -0,0 +1,437 @@ +"""Implementation of enumerated types. + +This module provides the `Enum` class, which can be used to construct +enumerated types. Those types are defined by providing an *exhaustive +set or list* of possible, named values for a variable of that type. +Enumerated variables of the same type are usually compared between them +for equality and sometimes for order, but are not usually operated upon. + +Enumerated values have an associated *name* and *concrete value*. Every +name is unique and so are concrete values. An enumerated variable +always takes the concrete value, not its name. Usually, the concrete +value is not used directly, and frequently it is entirely irrelevant. +For the same reason, an enumerated variable is not usually compared with +concrete values out of its enumerated type. For that kind of use, +standard variables and constants are more adequate. + +""" + +from __future__ import annotations + +from typing import Any, NoReturn +from collections.abc import Generator + +__docformat__ = "reStructuredText" +"""The format of documentation strings in this module.""" + + +class Enum: + """Enumerated type. + + Each instance of this class represents an enumerated type. The + values of the type must be declared + *exhaustively* and named with + *strings*, and they might be given explicit + concrete values, though this is not compulsory. Once the type is + defined, it can not be modified. + + There are three ways of defining an enumerated type. Each one + of them corresponds to the type of the only argument in the + constructor of Enum: + + - *Sequence of names*: each enumerated + value is named using a string, and its order is determined by + its position in the sequence; the concrete value is assigned + automatically:: + + >>> boolEnum = Enum(['True', 'False']) + + - *Mapping of names*: each enumerated + value is named by a string and given an explicit concrete value. + All of the concrete values must be different, or a + ValueError will be raised:: + + >>> priority = Enum({'red': 20, 'orange': 10, 'green': 0}) + >>> colors = Enum({'red': 1, 'blue': 1}) + Traceback (most recent call last): + ... + ValueError: enumerated values contain duplicate concrete values: 1 + + - *Enumerated type*: in that case, a copy + of the original enumerated type is created. Both enumerated + types are considered equal:: + + >>> prio2 = Enum(priority) + >>> priority == prio2 + True + + Please note that names starting with _ are + not allowed, since they are reserved for internal usage:: + + >>> prio2 = Enum(['_xx']) + Traceback (most recent call last): + ... + ValueError: name of enumerated value can not start with ``_``: '_xx' + + The concrete value of an enumerated value is obtained by + getting its name as an attribute of the Enum + instance (see __getattr__()) or as an item (see + __getitem__()). This allows comparisons between + enumerated values and assigning them to ordinary Python + variables:: + + >>> redv = priority.red + >>> redv == priority['red'] + True + >>> redv > priority.green + True + >>> priority.red == priority.orange + False + + The name of the enumerated value corresponding to a concrete + value can also be obtained by using the + __call__() method of the enumerated type. In this + way you get the symbolic name to use it later with + __getitem__():: + + >>> priority(redv) + 'red' + >>> priority.red == priority[priority(priority.red)] + True + + (If you ask, the __getitem__() method is + not used for this purpose to avoid ambiguity in the case of using + strings as concrete values.) + + """ + + def __init__( + self, enum: list[str] | tuple[str, ...] | dict[str, Any] | Enum + ) -> None: + mydict = self.__dict__ + + mydict["_names"] = {} + mydict["_values"] = {} + + if isinstance(enum, list) or isinstance(enum, tuple): + for value, name in enumerate(enum): # values become 0, 1, 2... + self._check_and_set_pair(name, value) + elif isinstance(enum, dict): + for name, value in enum.items(): + self._check_and_set_pair(name, value) + elif isinstance(enum, Enum): + for name, value in enum._names.items(): + self._check_and_set_pair(name, value) + else: + raise TypeError( + "enumerations can only be created from sequences, " + "mappings and other enumerations" + ) + + def _check_and_set_pair(self, name: str, value: Any) -> None: + """Check validity of enumerated value and insert it into type.""" + names = self._names + values = self._values + + if not isinstance(name, str): + raise TypeError( + f"name of enumerated value is not a string: {name!r}" + ) + if name.startswith("_"): + raise ValueError( + f"name of enumerated value can not start with ``_``: {name!r}" + ) + # This check is only necessary with a sequence base object. + if name in names: + raise ValueError( + f"enumerated values contain duplicate names: {name!r}" + ) + # This check is only necessary with a mapping base object. + if value in values: + raise ValueError( + f"enumerated values contain duplicate concrete values: " + f"{value!r}" + ) + + names[name] = value + values[value] = name + self.__dict__[name] = value + + def __getitem__(self, name: str) -> Any: + """Get the concrete value of the enumerated value with that name. + + The name of the enumerated value must be a string. If there is no value + with that name in the enumeration, a KeyError is raised. + + Examples + -------- + Let ``enum`` be an enumerated type defined as: + + >>> enum = Enum({'T0': 0, 'T1': 2, 'T2': 5}) + + then: + + >>> enum['T1'] + 2 + >>> enum['foo'] + Traceback (most recent call last): + ... + KeyError: "no enumerated value with that name: 'foo'" + + """ + try: + return self._names[name] + except KeyError: + raise KeyError(f"no enumerated value with that name: {name!r}") + + def __setitem__(self, name: Any, value: Any) -> NoReturn: + """Forbidden operation.""" + raise IndexError("operation not allowed") + + def __delitem__(self, name: Any) -> NoReturn: + """Forbidden operation.""" + raise IndexError("operation not allowed") + + def __getattr__(self, name: str) -> Any: + """Get the concrete value of the enumerated value with that name. + + The name of the enumerated value must be a string. If there is no value + with that name in the enumeration, an AttributeError is raised. + + Examples + -------- + Let ``enum`` be an enumerated type defined as: + + >>> enum = Enum({'T0': 0, 'T1': 2, 'T2': 5}) + + then: + + >>> enum.T1 + 2 + >>> enum.foo + Traceback (most recent call last): + ... + AttributeError: no enumerated value with that name: 'foo' + + """ + try: + return self[name] + except KeyError as ke: + raise AttributeError(*ke.args) + + def __setattr__(self, name: Any, value: Any) -> NoReturn: + """Forbidden operation.""" + raise AttributeError("operation not allowed") + + def __delattr__(self, name: Any) -> NoReturn: + """Forbidden operation.""" + raise AttributeError("operation not allowed") + + def __contains__(self, name: str) -> bool: + """Return True if the Enum has a value with the specified name. + + If the enumerated type has an enumerated value with that name, True is + returned. Otherwise, False is returned. The name must be a string. + + This method does *not* check for concrete values matching a value in an + enumerated type. For that, please use the :meth:`Enum.__call__` method. + + Examples + -------- + Let ``enum`` be an enumerated type defined as: + + >>> enum = Enum({'T0': 0, 'T1': 2, 'T2': 5}) + + then: + + >>> 'T1' in enum + True + >>> 'foo' in enum + False + >>> 0 in enum + Traceback (most recent call last): + ... + TypeError: name of enumerated value is not a string: 0 + >>> enum.T1 in enum # Be careful with this! + Traceback (most recent call last): + ... + TypeError: name of enumerated value is not a string: 2 + + """ + if not isinstance(name, str): + raise TypeError( + f"name of enumerated value is not a string: {name!r}" + ) + return name in self._names + + def __call__(self, value: Any, *default: Any) -> Any: + """Get the name of the enumerated value with that concrete value. + + If there is no value with that concrete value in the enumeration and a + second argument is given as a default, this is returned. Else, a + ValueError is raised. + + This method can be used for checking that a concrete value belongs to + the set of concrete values in an enumerated type. + + Examples + -------- + Let ``enum`` be an enumerated type defined as: + + >>> enum = Enum({'T0': 0, 'T1': 2, 'T2': 5}) + + then: + + >>> enum(5) + 'T2' + >>> enum(42, None) is None + True + >>> enum(42) + Traceback (most recent call last): + ... + ValueError: no enumerated value with that concrete value: 42 + + """ + try: + return self._values[value] + except KeyError: + if len(default) > 0: + return default[0] + raise ValueError( + f"no enumerated value with that concrete value: {value!r}" + ) + + def __len__(self) -> int: + """Return the number of enumerated values in the enumerated type. + + Examples + -------- + >>> len(Enum(['e%d' % i for i in range(10)])) + 10 + + """ + return len(self._names) + + def __iter__(self) -> Generator[Any]: + """Iterate over the enumerated values. + + Enumerated values are returned as (name, value) pairs *in no particular + order*. + + Examples + -------- + >>> enumvals = {'red': 4, 'green': 2, 'blue': 1} + >>> enum = Enum(enumvals) + >>> enumdict = dict([(name, value) for (name, value) in enum]) + >>> enumvals == enumdict + True + + """ + yield from self._names.items() + + def __eq__(self, other: Enum) -> bool: + """Return True if `other` equivalent to this enumerated type. + + Two enumerated types are equivalent if they have exactly the same + enumerated values (i.e. with the same names and concrete values). + + Examples + -------- + Let ``enum*`` be enumerated types defined as: + + >>> enum1 = Enum({'T0': 0, 'T1': 2}) + >>> enum2 = Enum(enum1) + >>> enum3 = Enum({'T1': 2, 'T0': 0}) + >>> enum4 = Enum({'T0': 0, 'T1': 2, 'T2': 5}) + >>> enum5 = Enum({'T0': 0}) + >>> enum6 = Enum({'T0': 10, 'T1': 20}) + + then: + + >>> enum1 == enum1 + True + >>> enum1 == enum2 == enum3 + True + >>> enum1 == enum4 + False + >>> enum5 == enum1 + False + >>> enum1 == enum6 + False + + Comparing enumerated types with other kinds of objects produces + a false result: + + >>> enum1 == {'T0': 0, 'T1': 2} + False + >>> enum1 == ['T0', 'T1'] + False + >>> enum1 == 2 + False + + """ + if not isinstance(other, Enum): + return False + return self._names == other._names + + def __ne__(self, other: Enum) -> bool: + """Return True if `other` is different from this enumerated type. + + Two enumerated types are different if they don't have exactly + the same enumerated values (i.e. with the same names and + concrete values). + + Examples + -------- + Let ``enum*`` be enumerated types defined as: + + >>> enum1 = Enum({'T0': 0, 'T1': 2}) + >>> enum2 = Enum(enum1) + >>> enum3 = Enum({'T1': 2, 'T0': 0}) + >>> enum4 = Enum({'T0': 0, 'T1': 2, 'T2': 5}) + >>> enum5 = Enum({'T0': 0}) + >>> enum6 = Enum({'T0': 10, 'T1': 20}) + + then: + + >>> enum1 != enum1 + False + >>> enum1 != enum2 != enum3 + False + >>> enum1 != enum4 + True + >>> enum5 != enum1 + True + >>> enum1 != enum6 + True + + """ + return not self.__eq__(other) + + # XXX: API incompatible change for PyTables 3 line + # Overriding __eq__ blocks inheritance of __hash__ in 3.x + # def __hash__(self): + # return hash((self.__class__, tuple(self._names.items()))) + def __repr__(self) -> str: + """Return the canonical string representation of the enumeration. + + The output of this method can be evaluated to give a new + enumeration object that will compare equal to this one. + + Examples + -------- + >>> repr(Enum({'name': 10})) + "Enum({'name': 10})" + + """ + return f"Enum({self._names})" + + +def _test(): + import doctest + + return doctest.testmod() + + +if __name__ == "__main__": + _test() diff --git a/venv/Lib/site-packages/tables/misc/proxydict.py b/venv/Lib/site-packages/tables/misc/proxydict.py new file mode 100644 index 0000000..b797eb1 --- /dev/null +++ b/venv/Lib/site-packages/tables/misc/proxydict.py @@ -0,0 +1,63 @@ +"""Proxy dictionary for objects stored in a container.""" + +import weakref + + +class ProxyDict(dict): + """A dictionary which uses a container object to store its values.""" + + def __init__(self, container): + self.containerref = weakref.ref(container) + """A weak reference to the container object. + + .. versionchanged:: 3.0 + The *containerRef* attribute has been renamed into + *containerref*. + + """ + + def __getitem__(self, key): + if key not in self: + raise KeyError(key) + + # Values are not actually stored to avoid extra references. + return self._get_value_from_container(self._get_container(), key) + + def __setitem__(self, key, value): + # Values are not actually stored to avoid extra references. + super().__setitem__(key, None) + + def __repr__(self): + return object.__repr__(self) + + def __str__(self): + # C implementation does not use `self.__getitem__()`. :( + return "{" + ", ".join("{k!r}: {v!r}" for k, v in self.items()) + "}" + + def values(self): + """Return the list of values.""" + # C implementation does not use `self.__getitem__()`. :( + return [self[key] for key in self.keys()] + + def itervalues(self): + """Iterate on values.""" + # C implementation does not use `self.__getitem__()`. :( + for key in self.keys(): + yield self[key] + + def items(self): + """Return items as a list.""" + # C implementation does not use `self.__getitem__()`. :( + return [(key, self[key]) for key in self.keys()] + + def iteritems(self): + """Items iterator.""" + # C implementation does not use `self.__getitem__()`. :( + for key in self.keys(): + yield (key, self[key]) + + def _get_container(self): + container = self.containerref() + if container is None: + raise ValueError("the container object does no longer exist") + return container diff --git a/venv/Lib/site-packages/tables/node.py b/venv/Lib/site-packages/tables/node.py new file mode 100644 index 0000000..d2136f8 --- /dev/null +++ b/venv/Lib/site-packages/tables/node.py @@ -0,0 +1,946 @@ +"""PyTables nodes.""" + +from __future__ import annotations + +import warnings +import functools +from typing import Any, TYPE_CHECKING +from collections.abc import Callable + +from .path import join_path, split_path, isvisiblepath +from .utils import lazyattr +from .registry import class_name_dict, class_id_dict +from .undoredo import move_to_shadow +from .exceptions import ( + ClosedNodeError, + NodeError, + UndoRedoWarning, + PerformanceWarning, +) +from .attributeset import AttributeSet, NotLoggedAttributeSet + +# The following imports are just needed for type annotations. +# However, actually importing them is not possible here as it will +# create a circular import. +if TYPE_CHECKING: + from .link import SoftLink + from .group import Group + +__docformat__ = "reStructuredText" +"""The format of documentation strings in this module.""" + + +def _closedrepr(oldmethod: Callable[[], str]) -> Callable[[], str]: + """Decorate string representation method to handle closed nodes. + + If the node is closed, a string like this is returned:: + + + + instead of calling `oldmethod` and returning its result. + + """ + + @functools.wraps(oldmethod) + def newmethod(self) -> str: + if not self._v_isopen: + return ( + f"" + ) + return oldmethod(self) + + return newmethod + + +class MetaNode(type): + """Node metaclass. + + This metaclass ensures that their instance classes get registered + into several dictionaries (namely the `tables.utils.class_name_dict` + class name dictionary and the `tables.utils.class_id_dict` class + identifier dictionary). + + It also adds sanity checks to some methods: + + * Check that the node is open when calling string representation + and provide a default string if so. + + """ + + def __new__( + cls, name: str, bases: tuple, dict_: dict[str, Any] + ) -> MetaNode: + """Add default behavior for representing closed nodes.""" + for mname in ["__str__", "__repr__"]: + if mname in dict_: + dict_[mname] = _closedrepr(dict_[mname]) + + return type.__new__(cls, name, bases, dict_) + + def __init__(cls, name: str, bases: tuple, dict_: dict[str, Any]) -> None: + super().__init__(name, bases, dict_) + + # Always register into class name dictionary. + class_name_dict[cls.__name__] = cls + + # Register into class identifier dictionary only if the class + # has an identifier and it is different from its parents'. + cid = getattr(cls, "_c_classid", None) + if cid is not None: + for base in bases: + pcid = getattr(base, "_c_classid", None) + if pcid == cid: + break + else: + class_id_dict[cid] = cls + + +class Node(metaclass=MetaNode): + """Abstract base class for all PyTables nodes. + + This is the base class for *all* nodes in a PyTables hierarchy. It is an + abstract class, i.e. it may not be directly instantiated; however, every + node in the hierarchy is an instance of this class. + + A PyTables node is always hosted in a PyTables *file*, under a *parent + group*, at a certain *depth* in the node hierarchy. A node knows its own + *name* in the parent group and its own *path name* in the file. + + All the previous information is location-dependent, i.e. it may change when + moving or renaming a node in the hierarchy. A node also has + location-independent information, such as its *HDF5 object identifier* and + its *attribute set*. + + This class gathers the operations and attributes (both location-dependent + and independent) which are common to all PyTables nodes, whatever their + type is. Nonetheless, due to natural naming restrictions, the names of all + of these members start with a reserved prefix (see the Group class + in :ref:`GroupClassDescr`). + + Sub-classes with no children (e.g. *leaf nodes*) may define new methods, + attributes and properties to avoid natural naming restrictions. For + instance, _v_attrs may be shortened to attrs and _f_rename to + rename. However, the original methods and attributes should still be + available. + + .. rubric:: Node attributes + + .. attribute:: _v_depth + + The depth of this node in the tree (n non-negative integer value). + + .. attribute:: _v_file + + The hosting File instance (see :ref:`FileClassDescr`). + + .. attribute:: _v_name + + The name of this node in its parent group (a string). + + .. attribute:: _v_pathname + + The path of this node in the tree (a string). + + .. attribute:: _v_objectid + + A node identifier (may change from run to run). + + .. versionchanged:: 3.0 + The *_v_objectID* attribute has been renamed into *_v_object_id*. + + """ + + # By default, attributes accept Undo/Redo. + _AttributeSet = AttributeSet + + # `_v_parent` is accessed via its file to avoid upwards references. + def _g_getparent(self) -> Group: + """Return the parent :class:`Group` instance.""" + parentpath, nodename = split_path(self._v_pathname) + return self._v_file._get_node(parentpath) + + _v_parent = property(_g_getparent) + + # '_v_attrs' is defined as a lazy read-only attribute. + # This saves 0.7s/3.8s. + @lazyattr + def _v_attrs(self) -> AttributeSet: + """`AttributeSet` instance associated to the `Node`. + + See Also + -------- + tables.attributeset.AttributeSet : container for the HDF5 attributes + + """ + return self._AttributeSet(self) + + # '_v_title' is a direct read-write shorthand for the 'TITLE' attribute + # with the empty string as a default value. + def _g_gettitle(self) -> str: + """Return the description of the node. + + A shorthand for TITLE attribute. + """ + if hasattr(self._v_attrs, "TITLE"): + return self._v_attrs.TITLE + else: + return "" + + def _g_settitle(self, title: str) -> None: + self._v_attrs.TITLE = title + + _v_title = property(_g_gettitle, _g_settitle) + + # This may be looked up by ``__del__`` when ``__init__`` doesn't get + # to be called. See ticket #144 for more info. + _v_isopen = False + """Whether this node is open or not.""" + + # The ``_log`` argument is only meant to be used by ``_g_copy_as_child()`` + # to avoid logging the creation of children nodes of a copied sub-tree. + def __init__( + self, parentnode: Group | SoftLink, name: str, _log: bool = True + ) -> None: + # Remember to assign these values in the root group constructor + # as it does not use this method implementation! + + # if the parent node is a softlink, dereference it + if isinstance(parentnode, class_name_dict["SoftLink"]): + parentnode = parentnode.dereference() + + self._v_file = None + """The hosting File instance (see :ref:`FileClassDescr`).""" + + self._v_isopen = False + """Whether this node is open or not.""" + + self._v_pathname = None + """The path of this node in the tree (a string).""" + + self._v_name = None + """The name of this node in its parent group (a string).""" + + self._v_depth = None + """The depth of this node in the tree (an non-negative integer value). + """ + + self._v_maxtreedepth = parentnode._v_file.params["MAX_TREE_DEPTH"] + """Maximum tree depth before warning the user. + + .. versionchanged:: 3.0 + Renamed into *_v_maxtreedepth* from *_v_maxTreeDepth*. + + """ + + self._v__deleting = False + """Is the node being deleted?""" + + self._v_objectid = None + """A node identifier (may change from run to run). + + .. versionchanged:: 3.0 + The *_v_objectID* attribute has been renamed into *_v_objectid*. + + """ + + validate = new = self._v_new # set by subclass constructor + + # Is the parent node a group? Is it open? + self._g_check_group(parentnode) + parentnode._g_check_open() + file_ = parentnode._v_file + + # Will the file be able to host a new node? + if new: + file_._check_writable() + + # Bind to the parent node and set location-dependent information. + if new: + # Only new nodes need to be referenced. + # Opened nodes are already known by their parent group. + parentnode._g_refnode(self, name, validate) + self._g_set_location(parentnode, name) + + try: + # hdf5extension operations: + # Update node attributes. + self._g_new(parentnode, name, init=True) + # Create or open the node and get its object ID. + if new: + self._v_objectid = self._g_create() + else: + self._v_objectid = self._g_open() + + # The node *has* been created, log that. + if new and _log and file_.is_undo_enabled(): + self._g_log_create() + + # This allows extra operations after creating the node. + self._g_post_init_hook() + except Exception: + # If anything happens, the node must be closed + # to undo every possible registration made so far. + # We do *not* rely on ``__del__()`` doing it later, + # since it might never be called anyway. + self._f_close() + raise + + def _g_log_create(self) -> None: + self._v_file._log("CREATE", self._v_pathname) + + def __del__(self) -> None: + # Closed `Node` instances can not be killed and revived. + # Instead, accessing a closed and deleted (from memory, not + # disk) one yields a *new*, open `Node` instance. This is + # because of two reasons: + # + # 1. Predictability. After closing a `Node` and deleting it, + # only one thing can happen when accessing it again: a new, + # open `Node` instance is returned. If closed nodes could be + # revived, one could get either a closed or an open `Node`. + # + # 2. Ease of use. If the user wants to access a closed node + # again, the only condition would be that no references to + # the `Node` instance were left. If closed nodes could be + # revived, the user would also need to force the closed + # `Node` out of memory, which is not a trivial task. + # + + if not self._v_isopen: + return # the node is already closed or not initialized + + self._v__deleting = True + + # If we get here, the `Node` is still open. + try: + node_manager = self._v_file._node_manager + node_manager.drop_node(self, check_unregistered=False) + finally: + # At this point the node can still be open if there is still some + # alive reference around (e.g. if the __del__ method is called + # explicitly by the user). + if self._v_isopen: + self._v__deleting = True + self._f_close() + + def _g_pre_kill_hook(self) -> None: + """Code to be called before killing the node.""" + pass + + def _g_create(self) -> int: + """Create a new HDF5 node and return its object identifier.""" + raise NotImplementedError + + def _g_open(self) -> int: + """Open an existing HDF5 node and return its object identifier.""" + raise NotImplementedError + + def _g_check_open(self) -> None: + """Check that the node is open. + + If the node is closed, a `ClosedNodeError` is raised. + + """ + if not self._v_isopen: + raise ClosedNodeError("the node object is closed") + assert self._v_file.isopen, "found an open node in a closed file" + + def _g_set_location(self, parentnode: Group, name: str) -> None: + """Set location-dependent attributes. + + Sets the location-dependent attributes of this node to reflect + that it is placed under the specified `parentnode`, with the + specified `name`. + + This also triggers the insertion of file references to this + node. If the maximum recommended tree depth is exceeded, a + `PerformanceWarning` is issued. + + """ + file_ = parentnode._v_file + parentdepth = parentnode._v_depth + + self._v_file = file_ + self._v_isopen = True + + root_uep = file_.root_uep + if name.startswith(root_uep): + # This has been called from File._get_node() + assert parentdepth == 0 + if root_uep == "/": + self._v_pathname = name + else: + self._v_pathname = name[len(root_uep) :] + _, self._v_name = split_path(name) + self._v_depth = name.count("/") - root_uep.count("/") + 1 + else: + # If we enter here is because this has been called elsewhere + self._v_name = name + self._v_pathname = join_path(parentnode._v_pathname, name) + self._v_depth = parentdepth + 1 + + # Check if the node is too deep in the tree. + if parentdepth >= self._v_maxtreedepth: + warnings.warn( + """\ +node ``%s`` is exceeding the recommended maximum depth (%d);\ +be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" + % (self._v_pathname, self._v_maxtreedepth), + PerformanceWarning, + ) + + if self._v_pathname != "/": + file_._node_manager.cache_node(self, self._v_pathname) + + def _g_update_location(self, newparentpath: str) -> None: + """Update location-dependent attributes. + + Updates location data when an ancestor node has changed its + location in the hierarchy to `newparentpath`. In fact, this + method is expected to be called by an ancestor of this node. + + This also triggers the update of file references to this node. + If the maximum recommended node depth is exceeded, a + `PerformanceWarning` is issued. This warning is assured to be + unique. + + """ + oldpath = self._v_pathname + newpath = join_path(newparentpath, self._v_name) + newdepth = newpath.count("/") + + self._v_pathname = newpath + self._v_depth = newdepth + + # Check if the node is too deep in the tree. + if newdepth > self._v_maxtreedepth: + warnings.warn( + """\ +moved descendent node is exceeding the recommended maximum depth (%d);\ +be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" + % (self._v_maxtreedepth,), + PerformanceWarning, + ) + + node_manager = self._v_file._node_manager + node_manager.rename_node(oldpath, newpath) + + # Tell dependent objects about the new location of this node. + self._g_update_dependent() + + def _g_del_location(self) -> None: + """Clear location-dependent attributes. + + This also triggers the removal of file references to this node. + + """ + node_manager = self._v_file._node_manager + pathname = self._v_pathname + + if not self._v__deleting: + node_manager.drop_from_cache(pathname) + # Note: node_manager.drop_node does not remove the node from the + # registry if it is still open + node_manager.registry.pop(pathname, None) + + self._v_file = None + self._v_isopen = False + self._v_pathname = None + self._v_name = None + self._v_depth = None + + def _g_post_init_hook(self) -> None: + """Code to be run after node creation and before creation logging.""" + pass + + def _g_update_dependent(self) -> None: + """Update dependent objects after a location change. + + All dependent objects (but not nodes!) referencing this node + must be updated here. + + """ + if "_v_attrs" in self.__dict__: + self._v_attrs._g_update_node_location(self) + + def _f_close(self) -> None: + """Close this node in the tree. + + This releases all resources held by the node, so it should not + be used again. On nodes with data, it may be flushed to disk. + + You should not need to close nodes manually because they are + automatically opened/closed when they are loaded/evicted from + the integrated LRU cache. + + """ + # After calling ``_f_close()``, two conditions are met: + # + # 1. The node object is detached from the tree. + # 2. *Every* attribute of the node is removed. + # + # Thus, cleanup operations used in ``_f_close()`` in sub-classes + # must be run *before* calling the method in the superclass. + + if not self._v_isopen: + return # the node is already closed + + dict_ = self.__dict__ + + # Close the associated `AttributeSet` + # only if it has already been placed in the object's dictionary. + if "_v_attrs" in dict_: + self._v_attrs._g_close() + + # Detach the node from the tree if necessary. + self._g_del_location() + + # Finally, clear all remaining attributes from the object. + dict_.clear() + + # Just add a final flag to signal that the node is closed: + self._v_isopen = False + + def _g_remove(self, recursive: bool, force: bool) -> None: + """Remove this node from the hierarchy. + + If the node has children, recursive removal must be stated by + giving `recursive` a true value; otherwise, a `NodeError` will + be raised. + + If `force` is set to true, the node will be removed no matter it + has children or not (useful for deleting hard links). + + It does not log the change. + + """ + # Remove the node from the PyTables hierarchy. + parent = self._v_parent + parent._g_unrefnode(self._v_name) + # Close the node itself. + self._f_close() + # hdf5extension operations: + # Remove the node from the HDF5 hierarchy. + self._g_delete(parent) + + def _f_remove(self, recursive: bool = False, force: bool = False) -> None: + """Remove this node from the hierarchy. + + If the node has children, recursive removal must be stated by giving + recursive a true value; otherwise, a NodeError will be raised. + + If the node is a link to a Group object, and you are sure that you want + to delete it, you can do this by setting the force flag to true. + + """ + self._g_check_open() + file_ = self._v_file + file_._check_writable() + + if file_.is_undo_enabled(): + self._g_remove_and_log(recursive, force) + else: + self._g_remove(recursive, force) + + def _g_remove_and_log(self, recursive: bool, force: bool) -> None: + file_ = self._v_file + oldpathname = self._v_pathname + # Log *before* moving to use the right shadow name. + file_._log("REMOVE", oldpathname) + move_to_shadow(file_, oldpathname) + + def _g_move(self, newparent: Group, newname: str) -> None: + """Move this node in the hierarchy. + + Moves the node into the given `newparent`, with the given + `newname`. + + It does not log the change. + + """ + oldparent = self._v_parent + oldname = self._v_name + oldpathname = self._v_pathname # to move the HDF5 node + + # Try to insert the node into the new parent. + newparent._g_refnode(self, newname) + # Remove the node from the new parent. + oldparent._g_unrefnode(oldname) + + # Remove location information for this node. + self._g_del_location() + # Set new location information for this node. + self._g_set_location(newparent, newname) + + # hdf5extension operations: + # Update node attributes. + self._g_new(newparent, self._v_name, init=False) + # Move the node. + # self._v_parent._g_move_node(oldpathname, self._v_pathname) + self._v_parent._g_move_node( + oldparent._v_objectid, + oldname, + newparent._v_objectid, + newname, + oldpathname, + self._v_pathname, + ) + + # Tell dependent objects about the new location of this node. + self._g_update_dependent() + + def _f_rename(self, newname: str, overwrite: bool = False) -> None: + """Rename this node in place. + + Changes the name of a node to *newname* (a string). If a node with the + same newname already exists and overwrite is true, recursively remove + it before renaming. + + """ + self._f_move(newname=newname, overwrite=overwrite) + + def _f_move( + self, + newparent: Group | str | None = None, + newname: str | None = None, + overwrite: bool = False, + createparents: bool = False, + ) -> None: + """Move or rename this node. + + Moves a node into a new parent group, or changes the name of the + node. `newparent` can be a Group object (see :ref:`GroupClassDescr`) + or a pathname in string form. If it is not specified or `None`, the + current parent group is chosen as the new parent. newname must be + a string with a new name. + If it is not specified or None, the current name is chosen as the + new name. If `createparents` is true, the needed groups for the + given new parent group path to exist will be created. + + Moving a node across databases is not allowed, nor it is moving a node + *into* itself. These result in a NodeError. However, moving a node + *over* itself is allowed and simply does nothing. Moving over another + existing node is similarly not allowed, unless the optional overwrite + argument is true, in which case that node is recursively removed before + moving. + + Usually, only the first argument will be used, effectively moving the + node to a new location without changing its name. Using only the + second argument is equivalent to renaming the node in place. + + """ + self._g_check_open() + file_ = self._v_file + oldparent = self._v_parent + oldname = self._v_name + + # Set default arguments. + if newparent is None and newname is None: + raise NodeError( + "you should specify at least " + "a ``newparent`` or a ``newname`` parameter" + ) + if newparent is None: + newparent = oldparent + if newname is None: + newname = oldname + + # Get destination location. + if hasattr(newparent, "_v_file"): # from node + newfile = newparent._v_file + newpath = newparent._v_pathname + elif hasattr(newparent, "startswith"): # from path + newfile = file_ + newpath = newparent + else: + raise TypeError( + f"new parent is not a node nor a path: {newparent!r}" + ) + + # Validity checks on arguments. + # Is it in the same file? + if newfile is not file_: + raise NodeError( + "nodes can not be moved across databases; " + "please make a copy of the node" + ) + + # The movement always fails if the hosting file can not be modified. + file_._check_writable() + + # Moving over itself? + oldpath = oldparent._v_pathname + if newpath == oldpath and newname == oldname: + # This is equivalent to renaming the node to its current name, + # and it does not change the referenced object, + # so it is an allowed no-op. + return + + # Moving into itself? + self._g_check_not_contains(newpath) + + # Note that the previous checks allow us to go ahead and create + # the parent groups if `createparents` is true. `newparent` is + # used instead of `newpath` to avoid accepting `Node` objects + # when `createparents` is true. + newparent = file_._get_or_create_path(newparent, createparents) + self._g_check_group(newparent) # Is it a group? + + # Moving over an existing node? + self._g_maybe_remove(newparent, newname, overwrite) + + # Move the node. + oldpathname = self._v_pathname + self._g_move(newparent, newname) + + # Log the change. + if file_.is_undo_enabled(): + self._g_log_move(oldpathname) + + def _g_log_move(self, oldpathname: str) -> None: + self._v_file._log("MOVE", oldpathname, self._v_pathname) + + def _g_copy( + self, + newparent: Group, + newname: str, + recursive: bool, + _log: bool = True, + **kwargs, + ) -> Node: + """Copy this node and return the new one. + + Creates and returns a copy of the node in the given `newparent`, + with the given `newname`. If `recursive` copy is stated, all + descendents are copied as well. Additional keyword arguments may + affect the way that the copy is made. Unknown arguments must be + ignored. On recursive copies, all keyword arguments must be + passed on to the children invocation of this method. + + If `_log` is false, the change is not logged. This is *only* + intended to be used by ``_g_copy_as_child()`` as a means of + optimising sub-tree copies. + + """ + raise NotImplementedError + + def _g_copy_as_child(self, newparent: Group, **kwargs) -> Node: + """Copy this node as a child of another group. + + Copies just this node into `newparent`, not recursing children + nor overwriting nodes nor logging the copy. This is intended to + be used when copying whole sub-trees. + + """ + return self._g_copy( + newparent, self._v_name, recursive=False, _log=False, **kwargs + ) + + def _f_copy( + self, + newparent: Group | str | None = None, + newname: str | None = None, + overwrite: bool = False, + recursive: bool = False, + createparents: bool = False, + **kwargs, + ) -> Node: + """Copy this node and return the new node. + + Creates and returns a copy of the node, maybe in a different place in + the hierarchy. newparent can be a Group object (see + :ref:`GroupClassDescr`) or a pathname in string form. If it is not + specified or None, the current parent group is chosen as the new + parent. newname must be a string with a new name. If it is not + specified or None, the current name is chosen as the new name. If + recursive copy is stated, all descendants are copied as well. If + createparents is true, the needed groups for the given new parent group + path to exist will be created. + + Copying a node across databases is supported but can not be + undone. Copying a node over itself is not allowed, nor it is + recursively copying a node into itself. These result in a + NodeError. Copying over another existing node is similarly not allowed, + unless the optional overwrite argument is true, in which case that node + is recursively removed before copying. + + Additional keyword arguments may be passed to customize the copying + process. For instance, title and filters may be changed, user + attributes may be or may not be copied, data may be sub-sampled, stats + may be collected, etc. See the documentation for the particular node + type. + + Using only the first argument is equivalent to copying the node to a + new location without changing its name. Using only the second argument + is equivalent to making a copy of the node in the same group. + + """ + self._g_check_open() + srcfile = self._v_file + srcparent = self._v_parent + srcname = self._v_name + + dstparent = newparent + dstname = newname + + # Set default arguments. + if dstparent is None and dstname is None: + raise NodeError( + "you should specify at least " + "a ``newparent`` or a ``newname`` parameter" + ) + if dstparent is None: + dstparent = srcparent + if dstname is None: + dstname = srcname + + # Get destination location. + if hasattr(dstparent, "_v_file"): # from node + dstfile = dstparent._v_file + dstpath = dstparent._v_pathname + elif hasattr(dstparent, "startswith"): # from path + dstfile = srcfile + dstpath = dstparent + else: + raise TypeError( + f"new parent is not a node nor a path: {dstparent!r}" + ) + + # Validity checks on arguments. + if dstfile is srcfile: + # Copying over itself? + srcpath = srcparent._v_pathname + if dstpath == srcpath and dstname == srcname: + raise NodeError( + "source and destination nodes are the same node: ``%s``" + % self._v_pathname + ) + + # Recursively copying into itself? + if recursive: + self._g_check_not_contains(dstpath) + + # Note that the previous checks allow us to go ahead and create + # the parent groups if `createparents` is true. `dstParent` is + # used instead of `dstPath` because it may be in other file, and + # to avoid accepting `Node` objects when `createparents` is + # true. + dstparent = srcfile._get_or_create_path(dstparent, createparents) + self._g_check_group(dstparent) # Is it a group? + + # Copying to another file with undo enabled? + if dstfile is not srcfile and srcfile.is_undo_enabled(): + warnings.warn( + "copying across databases can not be undone " + "nor redone from this database", + UndoRedoWarning, + ) + + # Copying over an existing node? + self._g_maybe_remove(dstparent, dstname, overwrite) + + # Copy the node. + # The constructor of the new node takes care of logging. + return self._g_copy(dstparent, dstname, recursive, **kwargs) + + def _f_isvisible(self) -> bool: + """Return True if the node is visible.""" + self._g_check_open() + return isvisiblepath(self._v_pathname) + + def _g_check_group(self, node: Group) -> None: + # Node must be defined in order to define a Group. + # However, we need to know Group here. + # Using class_name_dict avoids a circular import. + if not isinstance(node, class_name_dict["Node"]): + raise TypeError( + "new parent is not a registered node: %s" % node._v_pathname + ) + if not isinstance(node, class_name_dict["Group"]): + raise TypeError( + "new parent node ``%s`` is not a group" % node._v_pathname + ) + + def _g_check_not_contains(self, pathname: str) -> None: + # The not-a-TARDIS test. ;) + mypathname = self._v_pathname + if ( + mypathname == "/" # all nodes fall below the root group + or pathname == mypathname + or pathname.startswith(mypathname + "/") + ): + raise NodeError( + "can not move or recursively copy node ``%s`` " + "into itself" % mypathname + ) + + def _g_maybe_remove( + self, parent: Group, name: str, overwrite: bool + ) -> None: + if name in parent: + if not overwrite: + raise NodeError( + f"destination group ``{parent._v_pathname}`` already " + f"has a node named ``{name}``; you may want to use the " + f"``overwrite`` argument" + ) + parent._f_get_child(name)._f_remove(True) + + def _g_check_name(self, name: str) -> None: + """Check validity of name for this particular kind of node. + + This is invoked once the standard HDF5 and natural naming checks + have successfully passed. + + """ + if name.startswith("_i_"): + # This is reserved for table index groups. + raise ValueError( + "node name starts with reserved prefix ``_i_``: %s" % name + ) + + def _f_getattr(self, name: str) -> Any: + """Get a PyTables attribute from this node. + + If the named attribute does not exist, an AttributeError is + raised. + + """ + return getattr(self._v_attrs, name) + + def _f_setattr(self, name: str, value: Any) -> None: + """Set a PyTables attribute for this node. + + If the node already has a large number of attributes, a + PerformanceWarning is issued. + + """ + setattr(self._v_attrs, name, value) + + def _f_delattr(self, name: str) -> None: + """Delete a PyTables attribute from this node. + + If the named attribute does not exist, an AttributeError is + raised. + + """ + delattr(self._v_attrs, name) + + +class NotLoggedMixin: + """Mixin class suppressing logging in a node tree.""" + + # Include this class in your inheritance tree + # to avoid changes to instances of your class from being logged. + + _AttributeSet = NotLoggedAttributeSet + + def _g_log_create(self) -> None: + pass + + def _g_log_move(self, oldpathname: str) -> None: + pass + + def _g_remove_and_log(self, recursive: bool, force: bool) -> None: + self._g_remove(recursive, force) diff --git a/venv/Lib/site-packages/tables/nodes/__init__.py b/venv/Lib/site-packages/tables/nodes/__init__.py new file mode 100644 index 0000000..aee8983 --- /dev/null +++ b/venv/Lib/site-packages/tables/nodes/__init__.py @@ -0,0 +1,14 @@ +"""Special node behaviours for PyTables. + +This package contains several modules that give specific behaviours +to PyTables nodes. For instance, the filenode module provides +a file interface to a PyTables node. + + +Package modules: + filenode -- A file interface to nodes for PyTables databases. + +""" + +# The list of names to be exported to the importing module. +__all__ = ["filenode"] diff --git a/venv/Lib/site-packages/tables/nodes/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/tables/nodes/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..0d2e2db Binary files /dev/null and b/venv/Lib/site-packages/tables/nodes/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/nodes/__pycache__/filenode.cpython-311.pyc b/venv/Lib/site-packages/tables/nodes/__pycache__/filenode.cpython-311.pyc new file mode 100644 index 0000000..947982f Binary files /dev/null and b/venv/Lib/site-packages/tables/nodes/__pycache__/filenode.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/nodes/filenode.py b/venv/Lib/site-packages/tables/nodes/filenode.py new file mode 100644 index 0000000..c979a8f --- /dev/null +++ b/venv/Lib/site-packages/tables/nodes/filenode.py @@ -0,0 +1,852 @@ +"""A file interface to nodes for PyTables databases. + +The FileNode module provides a file interface for using inside of +PyTables database files. Use the new_node() function to create a brand +new file node which can be read and written as any ordinary Python +file. Use the open_node() function to open an existing (i.e. created +with new_node()) node for read-only or read-write access. Read access +is always available. Write access (enabled on new files and files +opened with mode 'a+') only allows appending data to a file node. + +Currently only binary I/O is supported. + +See :ref:`filenode_usersguide` for instructions on use. + +.. versionchanged:: 3.0 + In version 3.0 the module as been completely rewritten to be fully + compliant with the interfaces defined in the :mod:`io` module. + +""" + +import io +import os +import re +import warnings +from pathlib import Path + +import numpy as np + +import tables as tb + +NodeType = "file" +"""Value for NODE_TYPE node system attribute.""" + +NodeTypeVersions = [1, 2] +"""Supported values for NODE_TYPE_VERSION node system attribute.""" + + +class RawPyTablesIO(io.RawIOBase): + """Base class for raw binary I/O on HDF5 files using PyTables.""" + + # A lambda to turn a size into a shape, for each version. + _size_to_shape = [ + None, + lambda n: (n, 1), + lambda m: (m,), + ] + + def __init__(self, node, mode=None): + super().__init__() + + self._check_node(node) + self._check_attributes(node) + + if mode is None: + mode = node._v_file.mode + else: + self._check_mode(mode) + self._cross_check_mode(mode, node._v_file.mode) + + self._node = node + self._mode = mode + self._pos = 0 + self._version = int(node.attrs.NODE_TYPE_VERSION) + self._vshape = self._size_to_shape[self._version] + self._vtype = node.atom.dtype.base.type + + # read only attribute + @property + def mode(self): + """File mode.""" + return self._mode + + # def tell(self) -> int: + def tell(self): + """Return current stream position.""" + self._check_closed() + return self._pos + + # def seek(self, pos: int, whence: int = 0) -> int: + def seek(self, pos, whence=0): + """Change stream position. + + Change the stream position to byte offset offset. offset is + interpreted relative to the position indicated by whence. Values + for whence are: + + * 0 -- start of stream (the default); offset should be zero or positive + * 1 -- current stream position; offset may be negative + * 2 -- end of stream; offset is usually negative + + Return the new absolute position. + + """ + self._check_closed() + try: + pos = pos.__index__() + # except AttributeError as err: + # raise TypeError("an integer is required") from err + except AttributeError: + raise TypeError("an integer is required") + if whence == 0: + if pos < 0: + raise ValueError(f"negative seek position {pos!r}") + self._pos = pos + elif whence == 1: + self._pos = max(0, self._pos + pos) + elif whence == 2: + self._pos = max(0, self._node.nrows + pos) + else: + raise ValueError("invalid whence value") + return self._pos + + # def seekable(self) -> bool: + def seekable(self): + """Return whether object supports random access. + + If False, seek(), tell() and truncate() will raise IOError. This + method may need to do a test seek(). + + """ + return True + + # def fileno(self) -> int: + def fileno(self): + """Return underlying file descriptor if one exists. + + An IOError is raised if the IO object does not use a file + descriptor. + + """ + self._check_closed() + return self._node._v_file.fileno() + + # def close(self) -> None: + def close(self): + """Flush and close the IO object. + + This method has no effect if the file is already closed. + + """ + if not self.closed: + if getattr(self._node, "_v_file", None) is None: + warnings.warn("host PyTables file is already closed!") + + try: + super().close() + finally: + # Release node object to allow closing the file. + self._node = None + + def flush(self): + """Flush write buffers, if applicable. + + This is not implemented for read-only and non-blocking streams. + + """ + self._check_closed() + self._node.flush() + + # def truncate(self, pos: int = None) -> int: + def truncate(self, pos=None): + """Truncate file to size bytes. + + Size defaults to the current IO position as reported by tell(). + Return the new size. + + Currently, this method only makes sense to grow the file node, + since data can not be rewritten nor deleted. + + """ + self._check_closed() + self._checkWritable() + + if pos is None: + pos = self._pos + elif pos < 0: + raise ValueError(f"negative truncate position {pos!r}") + + if pos < self._node.nrows: + raise OSError("truncating is only allowed for growing a file") + self._append_zeros(pos - self._node.nrows) + + return self.seek(pos) + + # def readable(self) -> bool: + def readable(self): + """Return whether object was opened for reading. + + If False, read() will raise IOError. + + """ + mode = self._mode + return "r" in mode or "+" in mode + + # def writable(self) -> bool: + def writable(self): + """Return whether object was opened for writing. + + If False, write() and truncate() will raise IOError. + + """ + mode = self._mode + return "w" in mode or "a" in mode or "+" in mode + + # def readinto(self, b: bytearray) -> int: + def readinto(self, b): + """Read up to len(b) bytes into b. + + Returns number of bytes read (0 for EOF), or None if the object + is set not to block as has no data to read. + + """ + self._check_closed() + self._checkReadable() + + if self._pos >= self._node.nrows: + return 0 + + n = len(b) + start = self._pos + stop = self._pos + n + + # XXX optimized path + # if stop <= self._node.nrows and isinstance(b, np.ndarray): + # self._node.read(start, stop, out=b) + # self._pos += n + # return n + + if stop > self._node.nrows: + stop = self._node.nrows + n = stop - start + + # XXX This ought to work with anything that supports the buffer API + b[:n] = self._node.read(start, stop).tobytes() + + self._pos += n + + return n + + # def readline(self, limit: int = -1) -> bytes: + def readline(self, limit=-1): + r"""Read and return a line from the stream. + + If limit is specified, at most limit bytes will be read. + + The line terminator is always ``\\n`` for binary files; for text + files, the newlines argument to open can be used to select the line + terminator(s) recognized. + + """ + self._check_closed() + self._checkReadable() + + chunksize = self._node.chunkshape[0] if self._node.chunkshape else -1 + + # XXX: check + lsep = b"\n" + lseplen = len(lsep) + + # Set the remaining bytes to read to the specified size. + remsize = limit + + partial = [] + finished = False + + while not finished: + # Read a string limited by the remaining number of bytes. + if limit <= 0: + ibuff = self.read(chunksize) + else: + ibuff = self.read(min(remsize, chunksize)) + ibufflen = len(ibuff) + remsize -= ibufflen + + if ibufflen >= lseplen: + # Separator fits, look for EOL string. + eolindex = ibuff.find(lsep) + elif ibufflen == 0: + # EOF was immediately reached. + finished = True + continue + else: # ibufflen < lseplen + # EOF was hit and separator does not fit. ;) + partial.append(ibuff) + finished = True + continue + + if eolindex >= 0: + # Found an EOL. If there are trailing characters, + # cut the input buffer and seek back; + # else add the whole input buffer. + trailing = ibufflen - lseplen - eolindex # Bytes beyond EOL. + if trailing > 0: + obuff = ibuff[:-trailing] + self.seek(-trailing, 1) + remsize += trailing + else: + obuff = ibuff + finished = True + elif lseplen > 1 and (limit <= 0 or remsize > 0): + # Seek back a little since the end of the read string + # may have fallen in the middle of the line separator. + obuff = ibuff[: -lseplen + 1] + self.seek(-lseplen + 1, 1) + remsize += lseplen - 1 + else: # eolindex<0 and (lseplen<=1 or (limit>0 and remsize<=0)) + # Did not find an EOL, add the whole input buffer. + obuff = ibuff + + # Append (maybe cut) buffer. + partial.append(obuff) + + # If a limit has been specified and the remaining count + # reaches zero, the reading is finished. + if limit > 0 and remsize <= 0: + finished = True + + return b"".join(partial) + + # def write(self, b: bytes) -> int: + def write(self, b): + """Write the given buffer to the IO stream. + + Returns the number of bytes written, which may be less than + len(b). + + """ + self._check_closed() + self._checkWritable() + + if isinstance(b, str): + raise TypeError("can't write str to binary stream") + + n = len(b) + if n == 0: + return 0 + + pos = self._pos + + # Is the pointer beyond the real end of data? + end2off = pos - self._node.nrows + if end2off > 0: + # Zero-fill the gap between the end of data and the pointer. + self._append_zeros(end2off) + + # Append data. + self._node.append( + np.ndarray(buffer=b, dtype=self._vtype, shape=self._vshape(n)) + ) + + self._pos += n + + return n + + def _check_closed(self): + """Check if file node is open. + + Checks whether the file node is open or has been closed. In the + second case, a ValueError is raised. If the host PyTables has + been closed, ValueError is also raised. + + """ + # super()._check_closed() + if getattr(self._node, "_v_file", None) is None: + raise ValueError("host PyTables file is already closed!") + + def _check_node(self, node): + if not isinstance(node, tb.EArray): + raise TypeError('the "node" parameter should be a tables.EArray') + if not isinstance(node.atom, tb.UInt8Atom): + raise TypeError('only nodes with atom "UInt8Atom" are allowed') + + def _check_mode(self, mode): + if not isinstance(mode, str): + raise TypeError("invalid mode: %r" % mode) + + modes = set(mode) + if modes - set("arwb+tU") or len(mode) > len(modes): + raise ValueError("invalid mode: %r" % mode) + + reading = "r" in modes + writing = "w" in modes + appending = "a" in modes + # updating = "+" in modes + text = "t" in modes + binary = "b" in modes + + if "U" in modes: + if writing or appending: + raise ValueError("can't use U and writing mode at once") + reading = True + + if text and binary: + raise ValueError("can't have text and binary mode at once") + + if reading + writing + appending > 1: + raise ValueError("can't have read/write/append mode at once") + + if not (reading or writing or appending): + raise ValueError("must have exactly one of read/write/append mode") + + def _cross_check_mode(self, mode, h5filemode): + # XXX: check + # readable = bool('r' in mode or '+' in mode) + # h5readable = bool('r' in h5filemode or '+' in h5filemode) + # + # if readable and not h5readable: + # raise ValueError("RawPyTablesIO can't be open in read mode if " + # "the underlying hdf5 file is not readable") + + writable = bool("w" in mode or "a" in mode or "+" in mode) + h5writable = bool( + "w" in h5filemode or "a" in h5filemode or "+" in h5filemode + ) + + if writable and not h5writable: + raise ValueError( + "RawPyTablesIO can't be open in write mode if " + "the underlying hdf5 file is not writable" + ) + + def _check_attributes(self, node): + """Check file node-specific attributes. + + Checks for the presence and validity + of the system attributes 'NODE_TYPE' and 'NODE_TYPE_VERSION' + in the specified PyTables node (leaf). + ValueError is raised if an attribute is missing or incorrect. + + """ + attrs = node.attrs + ltype = getattr(attrs, "NODE_TYPE", None) + ltypever = getattr(attrs, "NODE_TYPE_VERSION", None) + + if ltype != NodeType: + raise ValueError(f"invalid type of node object: {ltype}") + if ltypever not in NodeTypeVersions: + raise ValueError( + f"unsupported type version of node object: {ltypever}" + ) + + def _append_zeros(self, size): + """Append a string of zeros. + + Appends a string of 'size' zeros to the array, + without moving the file pointer. + + """ + # Appending an empty array would raise an error. + if size == 0: + return + + # XXX This may be redone to avoid a potentially large in-memory array. + self._node.append( + np.zeros(dtype=self._vtype, shape=self._vshape(size)) + ) + + +class FileNodeMixin: + """Mixin class for FileNode objects. + + It provides access to the attribute set of the node that becomes + available via the attrs property. You can add attributes there, but + try to avoid attribute names in all caps or starting with '_', since + they may clash with internal attributes. + + """ + + # The attribute set property methods. + @property + def attrs(self): + """Return the attribute set of the file node.""" + # self._check_closed() + return self._node.attrs + + @attrs.setter + def _set_attrs(self, value): + """Raise ValueError.""" + raise ValueError("changing the whole attribute set is not allowed") + + @attrs.deleter + def _del_attrs(self): + """Raise ValueError.""" + raise ValueError("deleting the whole attribute set is not allowed") + + +class ROFileNode(FileNodeMixin, RawPyTablesIO): + r"""Creates a new read-only file node. + + Creates a new read-only file node associated with the specified + PyTables node, providing a standard Python file interface to it. + The node has to have been created on a previous occasion + using the new_node() function. + + The node used as storage is also made available via the read-only + attribute node. Please do not tamper with this object if it's + avoidable, since you may break the operation of the file node object. + + The constructor is not intended to be used directly. + Use the open_node() function in read-only mode ('r') instead. + + :Version 1: + implements the file storage as a UInt8 uni-dimensional EArray. + :Version 2: + uses an UInt8 N vector EArray. + + .. versionchanged:: 3.0 + The offset attribute is no more available, please use seek/tell + methods instead. + + .. versionchanged:: 3.0 + The line_separator property is no more available. + The only line separator used for binary I/O is ``\\n``. + + """ + + def __init__(self, node): + RawPyTablesIO.__init__(self, node, "r") + self._checkReadable() + + @property + def node(self): + """Node property.""" + return self._node + + +class RAFileNode(FileNodeMixin, RawPyTablesIO): + r"""Creates a new read-write file node. + + The first syntax opens the specified PyTables node, while the + second one creates a new node in the specified PyTables file. + In the second case, additional named arguments 'where' and 'name' + must be passed to specify where the file node is to be created. + Other named arguments such as 'title' and 'filters' may also be + passed. The special named argument 'expectedsize', indicating an + estimate of the file size in bytes, may also be passed. + + Write access means reading as well as appending data is allowed. + + The node used as storage is also made available via the read-only + attribute node. Please do not tamper with this object if it's + avoidable, since you may break the operation of the file node object. + + The constructor is not intended to be used directly. + Use the new_node() or open_node() functions instead. + + :Version 1: + implements the file storage as a UInt8 uni-dimensional EArray. + :Version 2: + uses an UInt8 N vector EArray. + + .. versionchanged:: 3.0 + The offset attribute is no more available, please use seek/tell + methods instead. + + .. versionchanged:: 3.0 + The line_separator property is no more available. + The only line separator used for binary I/O is ``\\n``. + + """ + + # The atom representing a byte in the array, for each version. + _byte_shape = [ + None, + (0, 1), + (0,), + ] + + __allowed_init_kwargs = [ + "where", + "name", + "title", + "filters", + "expectedsize", + ] + + def __init__(self, node, h5file, **kwargs): + if node is not None: + # Open an existing node and get its version. + self._check_attributes(node) + self._version = node.attrs.NODE_TYPE_VERSION + elif h5file is not None: + # Check for allowed keyword arguments, + # to avoid unwanted arguments falling through to array constructor. + for kwarg in kwargs: + if kwarg not in self.__allowed_init_kwargs: + raise TypeError( + "%s keyword argument is not allowed" % repr(kwarg) + ) + + # Turn 'expectedsize' into 'expectedrows'. + if "expectedsize" in kwargs: + # These match since one byte is stored per row. + expectedrows = kwargs["expectedsize"] + kwargs = kwargs.copy() + del kwargs["expectedsize"] + kwargs["expectedrows"] = expectedrows + + # Create a new array in the specified PyTables file. + self._version = NodeTypeVersions[-1] + shape = self._byte_shape[self._version] + node = h5file.create_earray( + atom=tb.UInt8Atom(), shape=shape, **kwargs + ) + + # Set the node attributes, else remove the array itself. + try: + self._set_attributes(node) + except RuntimeError: + h5file.remove_node(kwargs["where"], kwargs["name"]) + raise + + RawPyTablesIO.__init__(self, node, "a+") + self._checkReadable() + self._checkWritable() + + @property + def node(self): + """Node property.""" + return self._node + + def _set_attributes(self, node) -> None: + """Add file node-specific attributes. + + Sets the system attributes 'NODE_TYPE' and 'NODE_TYPE_VERSION' + in the specified PyTables node (leaf). + + """ + attrs = node.attrs + attrs.NODE_TYPE = NodeType + attrs.NODE_TYPE_VERSION = NodeTypeVersions[-1] + + +def new_node(h5file, **kwargs): + """Create a new file node object in the specified PyTables file object. + + Additional named arguments where and name must be passed to specify where + the file node is to be created. Other named arguments such as title and + filters may also be passed. + + The special named argument expectedsize, indicating an estimate of the + file size in bytes, may also be passed. It returns the file node object. + + """ + return RAFileNode(None, h5file, **kwargs) + + +def open_node(node, mode="r"): + """Open an existing file node. + + Returns a file node object from the existing specified PyTables + node. If mode is not specified or it is 'r', the file can only be + read, and the pointer is positioned at the beginning of the file. If + mode is 'a+', the file can be read and appended, and the pointer is + positioned at the end of the file. + + """ + if mode == "r": + return ROFileNode(node) + elif mode == "a+": + return RAFileNode(node, None) + else: + raise OSError(f"invalid mode: {mode}") + + +def save_to_filenode( + h5file, filename, where, name=None, overwrite=False, title="", filters=None +): + """Save a file's contents to a filenode inside a PyTables file. + + .. versionadded:: 3.2 + + Parameters + ---------- + h5file + The PyTables file to be written to; can be either a string + giving the file's location or a :class:`File` object. If a file + with name *h5file* already exists, it will be opened in + mode ``a``. + + filename + Path of the file which shall be stored within the PyTables file. + + where, name + Location of the filenode where the data shall be stored. If + *name* is not given, and *where* is either a :class:`Group` + object or a string ending on ``/``, the leaf name will be set to + the file name of *filename*. The *name* will be modified to + adhere to Python's natural naming convention; the original + filename will be preserved in the filenode's *_filename* + attribute. + + overwrite + Whether or not a possibly existing filenode of the specified + name shall be overwritten. + + title + A description for this node (it sets the ``TITLE`` HDF5 + attribute on disk). + + filters + An instance of the :class:`Filters` class that provides + information about the desired I/O filters to be applied + during the life of this object. + + """ + path = Path(filename).resolve() + + # sanity checks + if not os.access(path, os.R_OK): + raise OSError(f"The file '{path}' could not be read") + if isinstance(h5file, tb.file.File) and h5file.mode == "r": + raise OSError(f"The file '{h5file.filename}' is opened read-only") + + # guess filenode's name if necessary + if name is None: + if isinstance(where, tb.group.Group): + name = os.path.split(filename)[1] + if isinstance(where, str): + if where.endswith("/"): + name = os.path.split(filename)[1] + else: + nodepath = where.split("/") + where = "/" + "/".join(nodepath[:-1]) + name = nodepath[-1] + + # sanitize name if necessary + if not tb.path._python_id_re.match(name): + name = re.sub( + "(?![a-zA-Z0-9_]).", "_", re.sub("^(?![a-zA-Z_]).", "_", name) + ) + + new_h5file = not isinstance(h5file, tb.file.File) + f = tb.File(h5file, "a") if new_h5file else h5file + + # check for already existing filenode + try: + f.get_node(where=where, name=name) + if not overwrite: + if new_h5file: + f.close() + raise OSError( + f"Specified node already exists in file '{f.filename}'" + ) + except tb.NoSuchNodeError: + pass + + # read data from disk + data = path.read_bytes() + + # remove existing filenode if present + try: + f.remove_node(where=where, name=name) + except tb.NoSuchNodeError: + pass + + # write file's contents to filenode + fnode = new_node(f, where=where, name=name, title=title, filters=filters) + fnode.write(data) + fnode.attrs._filename = path.name + fnode.close() + + # cleanup + if new_h5file: + f.close() + + +def read_from_filenode( + h5file, filename, where, name=None, overwrite=False, create_target=False +): + r"""Read a filenode from a PyTables file and write its contents to a file. + + .. versionadded:: 3.2 + + Parameters + ---------- + h5file + The PyTables file to be read from; can be either a string + giving the file's location or a :class:`File` object. + + filename + Path of the file where the contents of the filenode shall be + written to. If *filename* points to a directory or ends with + ``/`` (``\`` on Windows), the filename will be set to the + *_filename* (if present; otherwise the *name*) attribute of the + read filenode. + + where, name + Location of the filenode where the data shall be read from. If + no node *name* can be found at *where*, the first node at + *where* whose *_filename* attribute matches *name* will be read. + + overwrite + Whether or not a possibly existing file of the specified + *filename* shall be overwritten. + + create_target + Whether or not the folder hierarchy needed to accomodate the + given target ``filename`` will be created. + + """ + path = Path(filename).resolve() + + new_h5file = not isinstance(h5file, tb.file.File) + f = tb.File(h5file, "r") if new_h5file else h5file + try: + fnode = open_node(f.get_node(where=where, name=name)) + except tb.NoSuchNodeError: + fnode = None + for n in f.walk_nodes(where=where, classname="EArray"): + if n.attrs._filename == name: + fnode = open_node(n) + break + if fnode is None: + f.close() + raise tb.NoSuchNodeError( + "A filenode '%s' cannot be found at " "'%s'" % (name, where) + ) + + # guess output filename if necessary + # TODO: pathlib.Path strips trailing slash automatically :-( + if path.is_dir() or filename.endswith(os.path.sep): + try: + path = path / fnode.node.attrs._filename + except Exception: + path = path / fnode.node.name + + if os.access(path, os.R_OK) and not overwrite: + if new_h5file: + f.close() + raise OSError(f"The file '{path}' already exists") + + # create folder hierarchy if necessary + if create_target: + path.parent.mkdir(parents=True, exist_ok=True) + + if not os.access(path.parent, os.W_OK): + if new_h5file: + f.close() + raise OSError("The file '%s' cannot be written to" % filename) + + # read data from filenode + data = fnode.read() + fnode.close() + + # store data to file + path.write_bytes(data) + + # cleanup + del data + if new_h5file: + f.close() diff --git a/venv/Lib/site-packages/tables/nodes/tests/__init__.py b/venv/Lib/site-packages/tables/nodes/tests/__init__.py new file mode 100644 index 0000000..072ade1 --- /dev/null +++ b/venv/Lib/site-packages/tables/nodes/tests/__init__.py @@ -0,0 +1 @@ +"""Unit tests for special node behaviours.""" diff --git a/venv/Lib/site-packages/tables/nodes/tests/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/tables/nodes/tests/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..752debd Binary files /dev/null and b/venv/Lib/site-packages/tables/nodes/tests/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/nodes/tests/__pycache__/test_filenode.cpython-311.pyc b/venv/Lib/site-packages/tables/nodes/tests/__pycache__/test_filenode.cpython-311.pyc new file mode 100644 index 0000000..58e07ac Binary files /dev/null and b/venv/Lib/site-packages/tables/nodes/tests/__pycache__/test_filenode.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/nodes/tests/test_filenode.dat b/venv/Lib/site-packages/tables/nodes/tests/test_filenode.dat new file mode 100644 index 0000000..5471bc7 --- /dev/null +++ b/venv/Lib/site-packages/tables/nodes/tests/test_filenode.dat @@ -0,0 +1,46 @@ +#define test_width 64 +#define test_height 64 +static char test_bits[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xF1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xC0, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xC4, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x3F, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0x1E, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0xB8, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC7, 0xF8, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE1, 0xF1, 0xFE, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xF3, 0x1F, 0xCF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x1F, 0xC7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC7, 0xC7, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC7, 0xE3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xE3, 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF9, 0xF8, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x38, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x3D, 0xEE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x03, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x87, 0x01, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xEF, 0x70, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE1, 0xFC, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x84, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x18, 0x7C, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x19, 0x7C, + 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x31, 0x3C, 0xFC, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x63, 0x0E, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x87, 0x87, + 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xCF, 0xC7, 0x9E, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x61, 0xCF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x31, + 0xC3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x38, 0xE1, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xBC, 0xF9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x9F, + 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0xBC, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x0F, 0x07, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, + 0x03, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x9F, 0xF1, 0xF8, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xF1, 0xF9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xF9, 0xF1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF9, 0xF9, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xF1, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x63, 0x18, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x06, 0x9E, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x42, 0x84, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xF1, 0xC0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF9, 0xE1, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF8, 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x79, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x71, 0xFC, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0x1E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x07, 0x03, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8F, 0x61, 0xFC, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xDF, 0xC1, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xC1, 0xF9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x19, 0xF9, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x19, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x71, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x63, 0xFC, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xDF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, }; diff --git a/venv/Lib/site-packages/tables/nodes/tests/test_filenode.py b/venv/Lib/site-packages/tables/nodes/tests/test_filenode.py new file mode 100644 index 0000000..cda0ef0 --- /dev/null +++ b/venv/Lib/site-packages/tables/nodes/tests/test_filenode.py @@ -0,0 +1,1147 @@ +"""Unit test for the filenode module.""" + +import os +import shutil +import tempfile +import warnings +from pathlib import Path + +from ... import open_file, file, NoSuchNodeError +from ...nodes import filenode +from ...tests.common import ( + unittest, + TempFileMixin, + parse_argv, + print_versions, + make_suite, +) +from ...tests.common import PyTablesTestCase as TestCase + + +def test_file(name): + from importlib import resources + + return resources.files("tables.nodes.tests") / name + + +class NewFileTestCase(TempFileMixin, TestCase): + """Tests creating a new file node with the new_node() function.""" + + def test00_NewFile(self): + """Creation of a brand new file node.""" + + try: + fnode = filenode.new_node(self.h5file, where="/", name="test") + node = self.h5file.get_node("/test") + except LookupError: + self.fail("filenode.new_node() failed to create a new node.") + else: + self.assertEqual( + fnode.node, + node, + "filenode.new_node() created a node in the wrong place.", + ) + + def test01_NewFileTooFewArgs(self): + """Creation of a new file node without arguments for node creation.""" + + self.assertRaises(TypeError, filenode.new_node, self.h5file) + + def test02_NewFileWithExpectedSize(self): + """Creation of a new file node with 'expectedsize' argument.""" + + try: + filenode.new_node( + self.h5file, where="/", name="test", expectedsize=100_000 + ) + except TypeError: + self.fail( + "filenode.new_node() failed to accept 'expectedsize' argument." + ) + + def test03_NewFileWithExpectedRows(self): + """Creation of a new file node with illegal 'expectedrows' argument.""" + + self.assertRaises( + TypeError, + filenode.new_node, + self.h5file, + where="/", + name="test", + expectedrows=100_000, + ) + + +class ClosedFileTestCase(TempFileMixin, TestCase): + """Tests calling several methods on a closed file.""" + + def setUp(self): + """setUp() -> None + + This method sets the following instance attributes: + * 'h5fname', the name of the temporary HDF5 file + * 'h5file', the writable, temporary HDF5 file with a '/test' node + * 'fnode', the closed file node in '/test' + + """ + + super().setUp() + self.fnode = filenode.new_node(self.h5file, where="/", name="test") + self.fnode.close() + + def tearDown(self): + """tearDown() -> None + + Closes 'h5file'; removes 'h5fname'. + + """ + + self.fnode = None + super().tearDown() + + # All these tests mey seem odd, but Python (2.3) files + # do test whether the file is not closed regardless of their mode. + def test00_Close(self): + """Closing a closed file.""" + + try: + self.fnode.close() + except ValueError: + self.fail("Could not close an already closed file.") + + def test01_Flush(self): + """Flushing a closed file.""" + + self.assertRaises(ValueError, self.fnode.flush) + + def test02_Next(self): + """Getting the next line of a closed file.""" + + self.assertRaises(ValueError, next, self.fnode) + + def test03_Read(self): + """Reading a closed file.""" + + self.assertRaises(ValueError, self.fnode.read) + + def test04_Readline(self): + """Reading a line from a closed file.""" + + self.assertRaises(ValueError, self.fnode.readline) + + def test05_Readlines(self): + """Reading lines from a closed file.""" + + self.assertRaises(ValueError, self.fnode.readlines) + + def test06_Seek(self): + """Seeking a closed file.""" + + self.assertRaises(ValueError, self.fnode.seek, 0) + + def test07_Tell(self): + """Getting the pointer position in a closed file.""" + + self.assertRaises(ValueError, self.fnode.tell) + + def test08_Truncate(self): + """Truncating a closed file.""" + + self.assertRaises(ValueError, self.fnode.truncate) + + def test09_Write(self): + """Writing a closed file.""" + + self.assertRaises(ValueError, self.fnode.write, b"foo") + + def test10_Writelines(self): + """Writing lines to a closed file.""" + + self.assertRaises(ValueError, self.fnode.writelines, [b"foo\n"]) + + +def copyFileToFile(srcfile, dstfile, blocksize=4096): + """copyFileToFile(srcfile, dstfile[, blocksize]) -> None + + Copies a readable opened file 'srcfile' to a writable opened file + 'destfile' in blocks of 'blocksize' bytes (4 KiB by default). + + """ + + data = srcfile.read(blocksize) + while len(data) > 0: + dstfile.write(data) + data = srcfile.read(blocksize) + + +class WriteFileTestCase(TempFileMixin, TestCase): + """Tests writing, seeking and truncating a new file node.""" + + datafname = "test_filenode.dat" + + def setUp(self): + """setUp() -> None + + This method sets the following instance attributes: + * 'h5fname', the name of the temporary HDF5 file + * 'h5file', the writable, temporary HDF5 file with a '/test' node + * 'fnode', the writable file node in '/test' + + """ + + super().setUp() + self.fnode = filenode.new_node(self.h5file, where="/", name="test") + self.datafname = test_file(self.datafname) + + def tearDown(self): + """tearDown() -> None + + Closes 'fnode' and 'h5file'; removes 'h5fname'. + + """ + + self.fnode.close() + self.fnode = None + super().tearDown() + + def test00_WriteFile(self): + """Writing a whole file node.""" + + datafile = open(self.datafname, "rb") + try: + copyFileToFile(datafile, self.fnode) + finally: + datafile.close() + + def test01_SeekFile(self): + """Seeking and writing file node.""" + + self.fnode.write(b"0123") + self.fnode.seek(8) + self.fnode.write(b"4567") + self.fnode.seek(3) + data = self.fnode.read(6) + self.assertEqual( + data, + b"3\0\0\0\0" b"4", + "Gap caused by forward seek was not properly filled.", + ) + + self.fnode.seek(0) + self.fnode.write(b"test") + + self.fnode.seek(0) + data = self.fnode.read(4) + self.assertNotEqual( + data, b"test", "Data was overwritten instead of appended." + ) + + self.fnode.seek(-4, 2) + data = self.fnode.read(4) + self.assertEqual(data, b"test", "Written data was not appended.") + + self.fnode.seek(0, 2) + oldendoff = self.fnode.tell() + self.fnode.seek(-2, 2) + self.fnode.write(b"test") + newendoff = self.fnode.tell() + self.assertEqual( + newendoff, + oldendoff - 2 + 4, + "Pointer was not correctly moved on append.", + ) + + def test02_TruncateFile(self): + """Truncating a file node.""" + + self.fnode.write(b"test") + + self.fnode.seek(2) + self.assertRaises(IOError, self.fnode.truncate) + + self.fnode.seek(6) + self.fnode.truncate() + self.fnode.seek(0) + data = self.fnode.read() + self.assertEqual( + data, b"test\0\0", "File was not grown to the current offset." + ) + + self.fnode.truncate(8) + self.fnode.seek(0) + data = self.fnode.read() + self.assertEqual( + data, b"test\0\0\0\0", "File was not grown to an absolute size." + ) + + +class OpenFileTestCase(TempFileMixin, TestCase): + """Tests opening an existing file node for reading and writing.""" + + def setUp(self): + """setUp() -> None + + This method sets the following instance attributes: + * 'h5fname', the name of the temporary HDF5 file + * 'h5file', the writable, temporary HDF5 file with a '/test' node + + """ + + super().setUp() + fnode = filenode.new_node(self.h5file, where="/", name="test") + fnode.close() + + def test00_OpenFileRead(self): + """Opening an existing file node for reading.""" + + node = self.h5file.get_node("/test") + fnode = filenode.open_node(node) + self.assertEqual( + fnode.node, node, "filenode.open_node() opened the wrong node." + ) + self.assertEqual( + fnode.mode, + "r", + f"File was opened with an invalid mode {fnode.mode!r}.", + ) + self.assertEqual( + fnode.tell(), + 0, + "Pointer is not positioned at the beginning of the file.", + ) + fnode.close() + + def test01_OpenFileReadAppend(self): + """Opening an existing file node for reading and appending.""" + + node = self.h5file.get_node("/test") + fnode = filenode.open_node(node, "a+") + self.assertEqual( + fnode.node, node, "filenode.open_node() opened the wrong node." + ) + self.assertEqual( + fnode.mode, + "a+", + f"File was opened with an invalid mode {fnode.mode!r}.", + ) + + self.assertEqual( + fnode.tell(), + 0, + "Pointer is not positioned at the beginning of the file.", + ) + fnode.close() + + def test02_OpenFileInvalidMode(self): + """Opening an existing file node with an invalid mode.""" + + self.assertRaises( + IOError, filenode.open_node, self.h5file.get_node("/test"), "w" + ) + + # This no longer works since type and type version attributes + # are now system attributes. ivb(2004-12-29) + # def test03_OpenFileNoAttrs(self): + # "Opening a node with no type attributes." + # + # node = self.h5file.get_node('/test') + # self.h5file.del_node_attr('/test', '_type') + # # Another way to get the same result is changing the value. + # ##self.h5file.set_node_attr('/test', '_type', 'foobar') + # self.assertRaises(ValueError, filenode.open_node, node) + + +class ReadFileTestCase(TempFileMixin, TestCase): + """Tests reading from an existing file node.""" + + datafname = "test_filenode.xbm" + + def setUp(self): + """setUp() -> None + + This method sets the following instance attributes: + * 'datafile', the opened data file + * 'h5fname', the name of the temporary HDF5 file + * 'h5file', the writable, temporary HDF5 file with a '/test' node + * 'fnode', the readable file node in '/test', with data in it + + """ + + self.datafname = test_file(self.datafname) + self.datafile = open(self.datafname, "rb") + + super().setUp() + + fnode = filenode.new_node(self.h5file, where="/", name="test") + copyFileToFile(self.datafile, fnode) + fnode.close() + + self.datafile.seek(0) + self.fnode = filenode.open_node(self.h5file.get_node("/test")) + + def tearDown(self): + """tearDown() -> None + + Closes 'fnode', 'h5file' and 'datafile'; removes 'h5fname'. + + """ + + self.fnode.close() + self.fnode = None + + self.datafile.close() + self.datafile = None + + super().tearDown() + + def test00_CompareFile(self): + """Reading and comparing a whole file node.""" + + import hashlib + + dfiledigest = hashlib.md5(self.datafile.read()).digest() + fnodedigest = hashlib.md5(self.fnode.read()).digest() + + self.assertEqual( + dfiledigest, + fnodedigest, + "Data read from file node differs from that in the file on disk.", + ) + + def test01_Write(self): + """Writing on a read-only file.""" + + self.assertRaises(IOError, self.fnode.write, "no way") + + def test02_UseAsImageFile(self): + """Using a file node with Python Imaging Library.""" + + try: + from PIL import Image + + Image.open(self.fnode) + except ImportError: + # PIL not available, nothing to do. + pass + except OSError: + self.fail( + "PIL was not able to create an image from the file node." + ) + + def test_fileno(self): + self.assertIsNot(self.fnode.fileno(), None) + + +class ReadlineTestCase(TempFileMixin, TestCase): + """Base class for text line-reading test cases. + + It provides a set of tests independent of the line separator string. + Sub-classes must provide the 'line_separator' attribute. + + """ + + def setUp(self): + """This method sets the following instance attributes: + + * ``h5fname``: the name of the temporary HDF5 file. + * ``h5file``: the writable, temporary HDF5 file with a ``/test`` node. + * ``fnode``: the readable file node in ``/test``, with text in it. + + """ + + super().setUp() + + linesep = self.line_separator + + # Fill the node file with some text. + fnode = filenode.new_node(self.h5file, where="/", name="test") + # fnode.line_separator = linesep + fnode.write(linesep) + data = "short line%sshort line%s%s" % ((linesep.decode("ascii"),) * 3) + data = data.encode("ascii") + fnode.write(data) + fnode.write(b"long line " * 20 + linesep) + fnode.write(b"unterminated") + fnode.close() + + # Re-open it for reading. + self.fnode = filenode.open_node(self.h5file.get_node("/test")) + # self.fnode.line_separator = linesep + + def tearDown(self): + """tearDown() -> None + + Closes 'fnode' and 'h5file'; removes 'h5fname'. + + """ + + self.fnode.close() + self.fnode = None + super().tearDown() + + def test00_Readline(self): + """Reading individual lines.""" + + linesep = self.line_separator + + line = self.fnode.readline() + self.assertEqual(line, linesep) + + line = self.fnode.readline() # 'short line' + linesep + line = self.fnode.readline() + self.assertEqual(line, b"short line" + linesep) + line = self.fnode.readline() + self.assertEqual(line, linesep) + + line = self.fnode.readline() + self.assertEqual(line, b"long line " * 20 + linesep) + + line = self.fnode.readline() + self.assertEqual(line, b"unterminated") + + line = self.fnode.readline() + self.assertEqual(line, b"") + + line = self.fnode.readline() + self.assertEqual(line, b"") + + def test01_ReadlineSeek(self): + """Reading individual lines and seeking back and forth.""" + + linesep = self.line_separator + lseplen = len(linesep) + + self.fnode.readline() # linesep + self.fnode.readline() # 'short line' + linesep + + self.fnode.seek(-(lseplen + 4), 1) + line = self.fnode.readline() + self.assertEqual( + line, b"line" + linesep, "Seeking back yielded different data." + ) + + self.fnode.seek(lseplen + 20, 1) # Into the long line. + line = self.fnode.readline() + self.assertEqual( + line[-(lseplen + 10) :], + b"long line " + linesep, + "Seeking forth yielded unexpected data.", + ) + + def test02_Iterate(self): + """Iterating over the lines.""" + + linesep = self.line_separator + + # Iterate to the end. + for line in self.fnode: + pass + + self.assertRaises(StopIteration, next, self.fnode) + + self.fnode.seek(0) + + line = next(self.fnode) + self.assertEqual(line, linesep) + + line = next(self.fnode) + self.assertEqual(line, b"short line" + linesep) + + def test03_Readlines(self): + """Reading a list of lines.""" + + linesep = self.line_separator + + lines = self.fnode.readlines() + self.assertEqual( + lines, + [ + linesep, + b"short line" + linesep, + b"short line" + linesep, + linesep, + b"long line " * 20 + linesep, + b"unterminated", + ], + ) + + def test04_ReadlineSize(self): + """Reading individual lines of limited size.""" + + linesep = self.line_separator + lseplen = len(linesep) + + line = self.fnode.readline() # linesep + + line = self.fnode.readline(lseplen + 20) + self.assertEqual(line, b"short line" + linesep) + + line = self.fnode.readline(5) + self.assertEqual(line, b"short") + + line = self.fnode.readline(lseplen + 20) + self.assertEqual(line, b" line" + linesep) + + line = self.fnode.readline(lseplen) + self.assertEqual(line, linesep) + + self.fnode.seek(-4, 2) + line = self.fnode.readline(4) + self.assertEqual(line, b"ated") + + self.fnode.seek(-4, 2) + line = self.fnode.readline(20) + self.assertEqual(line, b"ated") + + def test05_ReadlinesSize(self): + """Reading a list of lines with a limited size.""" + + linesep = self.line_separator + + data = "%sshort line%sshort" % ((linesep.decode("ascii"),) * 2) + data = data.encode("ascii") + lines = self.fnode.readlines(len(data)) + # self.assertEqual(lines, [linesep, b'short line' + linesep, b'short']) + # + # line = self.fnode.readline() + # self.assertEqual(line, b' line' + linesep) + + # NOTE: the test is relaxed because the *hint* parameter of + # io.BaseIO.readlines controls the amout of read data in a coarse way + self.assertEqual(len(lines), len(data.split(b"\n"))) + self.assertEqual(lines[:-1], [linesep, b"short line" + linesep]) + self.assertTrue(lines[-1].startswith(b"short")) + + +class MonoReadlineTestCase(ReadlineTestCase): + """Tests reading one-byte-separated text lines from an existing + file node.""" + + line_separator = b"\n" + + +# class MultiReadlineTestCase(ReadlineTestCase): +# "Tests reading multibyte-separated text lines from an existing file node." +# +# line_separator = b'
' + + +# class LineSeparatorTestCase(TempFileMixin, TestCase): +# "Tests text line separator manipulation in a file node." +# +# def setUp(self): +# """setUp() -> None +# +# This method sets the following instance attributes: +# * 'h5fname', the name of the temporary HDF5 file +# * 'h5file', the writable, temporary HDF5 file with a '/test' node +# * 'fnode', the writable file node in '/test' +# """ +# super().setUp() +# self.fnode = filenode.new_node(self.h5file, where='/', name='test') +# +# def tearDown(self): +# """tearDown() -> None +# +# Closes 'fnode' and 'h5file'; removes 'h5fname'. +# """ +# self.fnode.close() +# self.fnode = None +# super().tearDown() +# +# def test00_DefaultLineSeparator(self): +# "Default line separator." +# +# self.assertEqual( +# self.fnode.line_separator, os.linesep.encode('ascii'), +# "Default line separator does not match that in os.linesep.") +# +# def test01_SetLineSeparator(self): +# "Setting a valid line separator." +# +# try: +# self.fnode.line_separator = b'SEPARATOR' +# except ValueError: +# self.fail("Valid line separator was not accepted.") +# else: +# self.assertEqual( +# self.fnode.line_separator, b'SEPARATOR', +# "Line separator was not correctly set.") +# +# def test02_SetInvalidLineSeparator(self): +# "Setting an invalid line separator." +# +# self.assertRaises( +# ValueError, setattr, self.fnode, 'line_separator', b'') +# self.assertRaises( +# ValueError, setattr, self.fnode, 'line_separator', b'x' * 1024) +# self.assertRaises( +# TypeError, setattr, self.fnode, 'line_separator', 'x') + + +class AttrsTestCase(TempFileMixin, TestCase): + """Tests setting and getting file node attributes.""" + + def setUp(self): + """setUp() -> None + + This method sets the following instance attributes: + * 'h5fname', the name of the temporary HDF5 file + * 'h5file', the writable, temporary HDF5 file with a '/test' node + * 'fnode', the writable file node in '/test' + + """ + + super().setUp() + self.fnode = filenode.new_node(self.h5file, where="/", name="test") + + def tearDown(self): + """tearDown() -> None + + Closes 'fnode' and 'h5file'; removes 'h5fname'. + + """ + + self.fnode.close() + self.fnode = None + super().tearDown() + + # This no longer works since type and type version attributes + # are now system attributes. ivb(2004-12-29) + # def test00_GetTypeAttr(self): + # "Getting the type attribute of a file node." + # + # self.assertEqual( + # getattr(self.fnode.attrs, '_type', None), filenode.NodeType, + # "File node has no '_type' attribute.") + def test00_MangleTypeAttrs(self): + """Mangling the type attributes on a file node.""" + + nodeType = getattr(self.fnode.attrs, "NODE_TYPE", None) + self.assertEqual( + nodeType, + filenode.NodeType, + "File node does not have a valid 'NODE_TYPE' attribute.", + ) + + nodeTypeVersion = getattr(self.fnode.attrs, "NODE_TYPE_VERSION", None) + self.assertTrue( + nodeTypeVersion in filenode.NodeTypeVersions, + "File node does not have a valid 'NODE_TYPE_VERSION' attribute.", + ) + + # System attributes are now writable. ivb(2004-12-30) + # self.assertRaises( + # AttributeError, + # setattr, self.fnode.attrs, 'NODE_TYPE', 'foobar') + # self.assertRaises( + # AttributeError, + # setattr, self.fnode.attrs, 'NODE_TYPE_VERSION', 'foobar') + + # System attributes are now removables. F. Alted (2007-03-06) + + # self.assertRaises( + # AttributeError, + # delattr, self.fnode.attrs, 'NODE_TYPE') + # self.assertRaises( + # AttributeError, + # delattr, self.fnode.attrs, 'NODE_TYPE_VERSION') + + # System attributes are now writable. ivb(2004-12-30) + # def test01_SetSystemAttr(self): + # "Setting a system attribute on a file node." + # + # self.assertRaises( + # AttributeError, setattr, self.fnode.attrs, 'CLASS', 'foobar') + def test02_SetGetDelUserAttr(self): + """Setting a user attribute on a file node.""" + + self.assertEqual( + getattr(self.fnode.attrs, "userAttr", None), + None, + "Inexistent attribute has a value that is not 'None'.", + ) + + self.fnode.attrs.userAttr = "foobar" + self.assertEqual( + getattr(self.fnode.attrs, "userAttr", None), + "foobar", + "User attribute was not correctly set.", + ) + + self.fnode.attrs.userAttr = "bazquux" + self.assertEqual( + getattr(self.fnode.attrs, "userAttr", None), + "bazquux", + "User attribute was not correctly changed.", + ) + + del self.fnode.attrs.userAttr + self.assertEqual( + getattr(self.fnode.attrs, "userAttr", None), + None, + "User attribute was not deleted.", + ) + # Another way is looking up the attribute in the attribute list. + # if 'userAttr' in self.fnode.attrs._f_list(): + # self.fail("User attribute was not deleted.") + + def test03_AttrsOnClosedFile(self): + """Accessing attributes on a closed file node.""" + + self.fnode.close() + self.assertRaises(AttributeError, getattr, self.fnode, "attrs") + + +class ClosedH5FileTestCase(TempFileMixin, TestCase): + """Tests accessing a file node in a closed PyTables file.""" + + def setUp(self): + """setUp() -> None + + This method sets the following instance attributes: + * 'h5fname', the name of the temporary HDF5 file + * 'h5file', the closed HDF5 file with a '/test' node + * 'fnode', the writable file node in '/test' + + """ + + super().setUp() + self.fnode = filenode.new_node(self.h5file, where="/", name="test") + self.h5file.close() + + def tearDown(self): + """tearDown() -> None + + Closes 'fnode'; removes 'h5fname'. + + """ + + # ivilata: We know that a UserWarning will be raised + # because the PyTables file has already been closed. + # However, we don't want it to pollute the test output. + warnings.filterwarnings("ignore", category=UserWarning) + try: + self.fnode.close() + except ValueError: + pass + finally: + warnings.filterwarnings("default", category=UserWarning) + + self.fnode = None + super().tearDown() + + def test00_Write(self): + """Writing to a file node in a closed PyTables file.""" + + self.assertRaises(ValueError, self.fnode.write, "data") + + def test01_Attrs(self): + """Accessing the attributes of a file node in a closed + PyTables file.""" + + self.assertRaises(ValueError, getattr, self.fnode, "attrs") + + +class OldVersionTestCase(TestCase): + """Base class for old version compatibility test cases. + + It provides some basic tests for file operations and attribute handling. + Sub-classes must provide the 'oldversion' attribute + and the 'oldh5fname' attribute. + + """ + + def setUp(self): + """This method sets the following instance attributes: + + * ``h5fname``: the name of the temporary HDF5 file. + * ``h5file``: the writable, temporary HDF5 file with a ``/test`` node. + * ``fnode``: the readable file node in ``/test``. + + """ + + super().setUp() + self.h5fname = tempfile.mktemp(suffix=".h5") + + self.oldh5fname = test_file(self.oldh5fname) + oldh5f = open_file(self.oldh5fname) + oldh5f.copy_file(self.h5fname) + oldh5f.close() + + self.h5file = open_file( + self.h5fname, + "r+", + title="Test for file node old version compatibility", + ) + self.fnode = filenode.open_node(self.h5file.root.test, "a+") + + def tearDown(self): + """Closes ``fnode`` and ``h5file``; removes ``h5fname``.""" + + self.fnode.close() + self.fnode = None + self.h5file.close() + self.h5file = None + Path(self.h5fname).unlink() + super().tearDown() + + def test00_Read(self): + """Reading an old version file node.""" + + # self.fnode.line_separator = '\n' + + line = self.fnode.readline() + self.assertEqual(line, "This is only\n") + + line = self.fnode.readline() + self.assertEqual(line, "a test file\n") + + line = self.fnode.readline() + self.assertEqual(line, "for FileNode version %d\n" % self.oldversion) + + line = self.fnode.readline() + self.assertEqual(line, "") + + self.fnode.seek(0) + line = self.fnode.readline() + self.assertEqual(line, "This is only\n") + + def test01_Write(self): + """Writing an old version file node.""" + + # self.fnode.line_separator = '\n' + + self.fnode.write("foobar\n") + self.fnode.seek(-7, 2) + line = self.fnode.readline() + self.assertEqual(line, "foobar\n") + + def test02_Attributes(self): + """Accessing attributes in an old version file node.""" + + self.fnode.attrs.userAttr = "foobar" + self.assertEqual( + getattr(self.fnode.attrs, "userAttr", None), + "foobar", + "User attribute was not correctly set.", + ) + + self.fnode.attrs.userAttr = "bazquux" + self.assertEqual( + getattr(self.fnode.attrs, "userAttr", None), + "bazquux", + "User attribute was not correctly changed.", + ) + + del self.fnode.attrs.userAttr + self.assertEqual( + getattr(self.fnode.attrs, "userAttr", None), + None, + "User attribute was not deleted.", + ) + + +class Version1TestCase(OldVersionTestCase): + """Basic test for version 1 format compatibility.""" + + oldversion = 1 + oldh5fname = "test_filenode_v1.h5" + + +class DirectReadWriteTestCase(TempFileMixin, TestCase): + + datafname = "test_filenode.dat" + + def setUp(self): + """This method sets the following instance attributes: + + * ``h5fname``: the name of the temporary HDF5 file. + * ``h5file``, the writable, temporary HDF5 file with a '/test' node + * ``datafname``: the name of the data file to be stored in the + temporary HDF5 file. + * ``data``: the contents of the file ``datafname`` + * ``testfname``: the name of a temporary file to be written to. + + """ + + super().setUp() + self.datafname = test_file(self.datafname) + self.testfname = tempfile.mktemp() + self.testh5fname = tempfile.mktemp(suffix=".h5") + self.data = Path(self.datafname).read_bytes() + self.testdir = tempfile.mkdtemp() + + def tearDown(self): + """tearDown() -> None + + Closes 'fnode' and 'h5file'; removes 'h5fname'. + + """ + if os.access(self.testfname, os.R_OK): + Path(self.testfname).unlink() + if os.access(self.testh5fname, os.R_OK): + Path(self.testh5fname).unlink() + shutil.rmtree(self.testdir) + super().tearDown() + + def test01_WriteToPathlibPath(self): + testh5fname = Path(self.testh5fname) + datafname = Path(self.datafname) + filenode.save_to_filenode(testh5fname, datafname, "/test1") + + def test01_WriteToFilename(self): + # write contents of datafname to h5 testfile + filenode.save_to_filenode(self.testh5fname, self.datafname, "/test1") + # make sure writing to an existing node doesn't work ... + self.assertRaises( + IOError, + filenode.save_to_filenode, + self.testh5fname, + self.datafname, + "/test1", + ) + # ... except if overwrite is True + filenode.save_to_filenode( + self.testh5fname, self.datafname, "/test1", overwrite=True + ) + # write again, this time specifying a name + filenode.save_to_filenode( + self.testh5fname, self.datafname, "/", name="test2" + ) + # read from test h5file + filenode.read_from_filenode(self.testh5fname, self.testfname, "/test1") + # and compare result to what it should be + self.assertEqual(Path(self.testfname).read_bytes(), self.data) + # make sure extracting to an existing file doesn't work ... + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + self.assertRaises( + IOError, + filenode.read_from_filenode, + self.testh5fname, + self.testfname, + "/test1", + ) + # except overwrite is True. And try reading with a name + filenode.read_from_filenode( + self.testh5fname, self.testfname, "/", name="test2", overwrite=True + ) + # and compare to what it should be + self.assertEqual(Path(self.testfname).read_bytes(), self.data) + # cleanup + Path(self.testfname).unlink() + Path(self.testh5fname).unlink() + + def test02_WriteToHDF5File(self): + # write contents of datafname to h5 testfile + filenode.save_to_filenode(self.h5file, self.datafname, "/test1") + # make sure writing to an existing node doesn't work ... + self.assertRaises( + IOError, + filenode.save_to_filenode, + self.h5file, + self.datafname, + "/test1", + ) + # ... except if overwrite is True + filenode.save_to_filenode( + self.h5file, self.datafname, "/test1", overwrite=True + ) + # read from test h5file + filenode.read_from_filenode(self.h5file, self.testfname, "/test1") + # and compare result to what it should be + self.assertEqual(Path(self.testfname).read_bytes(), self.data) + # make sure extracting to an existing file doesn't work ... + self.assertRaises( + IOError, + filenode.read_from_filenode, + self.h5file, + self.testfname, + "/test1", + ) + # make sure the original h5file is still alive and kicking + self.assertEqual(isinstance(self.h5file, file.File), True) + self.assertEqual(self.h5file.mode, "w") + + def test03_AutomaticNameGuessing(self): + # write using the filename as node name + filenode.save_to_filenode(self.testh5fname, self.datafname, "/") + # and read again + datafname = Path(self.datafname).name + filenode.read_from_filenode( + self.testh5fname, + self.testdir, + "/", + name=datafname.replace(".", "_"), + ) + # test if the output file really has the expected name + self.assertEqual( + os.access(Path(self.testdir) / datafname, os.R_OK), True + ) + # and compare result to what it should be + self.assertEqual( + (Path(self.testdir) / datafname).read_bytes(), self.data + ) + + def test04_AutomaticNameGuessingWithFilenameAttribute(self): + # write using the filename as node name + filenode.save_to_filenode(self.testh5fname, self.datafname, "/") + # and read again + datafname = Path(self.datafname).name + filenode.read_from_filenode( + self.testh5fname, self.testdir, "/", name=datafname + ) + # test if the output file really has the expected name + self.assertEqual( + os.access(Path(self.testdir) / datafname, os.R_OK), True + ) + # and compare result to what it should be + self.assertEqual( + (Path(self.testdir) / datafname).read_bytes(), self.data + ) + + def test05_ReadFromNonexistingNodeRaises(self): + # write using the filename as node name + filenode.save_to_filenode(self.testh5fname, self.datafname, "/") + # and read again + self.assertRaises( + NoSuchNodeError, + filenode.read_from_filenode, + self.testh5fname, + self.testdir, + "/", + name="THISNODEDOESNOTEXIST", + ) + + +def suite(): + """suite() -> test suite + + Returns a test suite consisting of all the test cases in the module. + + """ + + theSuite = unittest.TestSuite() + + theSuite.addTest(make_suite(NewFileTestCase)) + theSuite.addTest(make_suite(ClosedFileTestCase)) + theSuite.addTest(make_suite(WriteFileTestCase)) + theSuite.addTest(make_suite(OpenFileTestCase)) + theSuite.addTest(make_suite(ReadFileTestCase)) + theSuite.addTest(make_suite(MonoReadlineTestCase)) + # theSuite.addTest(make_suite(MultiReadlineTestCase)) + # theSuite.addTest(make_suite(LineSeparatorTestCase)) + theSuite.addTest(make_suite(AttrsTestCase)) + theSuite.addTest(make_suite(ClosedH5FileTestCase)) + theSuite.addTest(make_suite(DirectReadWriteTestCase)) + + return theSuite + + +if __name__ == "__main__": + import sys + + parse_argv(sys.argv) + print_versions() + unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/nodes/tests/test_filenode.xbm b/venv/Lib/site-packages/tables/nodes/tests/test_filenode.xbm new file mode 100644 index 0000000..5471bc7 --- /dev/null +++ b/venv/Lib/site-packages/tables/nodes/tests/test_filenode.xbm @@ -0,0 +1,46 @@ +#define test_width 64 +#define test_height 64 +static char test_bits[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xF1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xC0, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xC4, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x3F, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0x1E, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0xB8, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC7, 0xF8, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE1, 0xF1, 0xFE, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xF3, 0x1F, 0xCF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x1F, 0xC7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC7, 0xC7, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC7, 0xE3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xE3, 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF9, 0xF8, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x38, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x3D, 0xEE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x03, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x87, 0x01, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xEF, 0x70, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE1, 0xFC, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x84, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x18, 0x7C, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x19, 0x7C, + 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x31, 0x3C, 0xFC, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x63, 0x0E, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x87, 0x87, + 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xCF, 0xC7, 0x9E, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x61, 0xCF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x31, + 0xC3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x38, 0xE1, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xBC, 0xF9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x9F, + 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, 0xBC, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x0F, 0x07, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, + 0x03, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x9F, 0xF1, 0xF8, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xF1, 0xF9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xF9, 0xF1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF9, 0xF9, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xF1, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x63, 0x18, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, 0x06, 0x9E, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x42, 0x84, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xF1, 0xC0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF9, 0xE1, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF8, 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x79, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x71, 0xFC, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, 0x1E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x07, 0x03, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8F, 0x61, 0xFC, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xDF, 0xC1, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xC1, 0xF9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x19, 0xF9, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x19, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x71, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x63, 0xFC, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xDF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, }; diff --git a/venv/Lib/site-packages/tables/nodes/tests/test_filenode_v1.h5 b/venv/Lib/site-packages/tables/nodes/tests/test_filenode_v1.h5 new file mode 100644 index 0000000..a370b3d Binary files /dev/null and b/venv/Lib/site-packages/tables/nodes/tests/test_filenode_v1.h5 differ diff --git a/venv/Lib/site-packages/tables/parameters.py b/venv/Lib/site-packages/tables/parameters.py new file mode 100644 index 0000000..1def0a5 --- /dev/null +++ b/venv/Lib/site-packages/tables/parameters.py @@ -0,0 +1,442 @@ +"""Parameters for PyTables.""" + +import os as _os + +__docformat__ = "reStructuredText" +"""The format of documentation strings in this module.""" + +_KB = 1024 +"""The size of a Kilobyte in bytes""" + +_MB = 1024 * _KB +"""The size of a Megabyte in bytes""" + +# Tunable parameters +# ================== +# Be careful when touching these! + +# Parameters for different internal caches +# ---------------------------------------- + +BOUNDS_MAX_SIZE = 1 * _MB +"""The maximum size for bounds values cached during index lookups.""" + +BOUNDS_MAX_SLOTS = 4 * _KB +"""The maximum number of slots for the BOUNDS cache.""" + +ITERSEQ_MAX_ELEMENTS = 1 * _KB +"""The maximum number of iterator elements cached in data lookups.""" + +ITERSEQ_MAX_SIZE = 1 * _MB +"""The maximum space that will take ITERSEQ cache (in bytes).""" + +ITERSEQ_MAX_SLOTS = 128 +"""The maximum number of slots in ITERSEQ cache.""" + +LIMBOUNDS_MAX_SIZE = 256 * _KB +"""The maximum size for the query limits (for example, ``(lim1, lim2)`` +in conditions like ``lim1 <= col < lim2``) cached during index lookups +(in bytes).""" + +LIMBOUNDS_MAX_SLOTS = 128 +"""The maximum number of slots for LIMBOUNDS cache.""" + +TABLE_MAX_SIZE = 1 * _MB +"""The maximum size for table chunks cached during index queries.""" + +SORTED_MAX_SIZE = 1 * _MB +"""The maximum size for sorted values cached during index lookups.""" + +SORTEDLR_MAX_SIZE = 8 * _MB +"""The maximum size for chunks in last row cached in index lookups (in +bytes).""" + +SORTEDLR_MAX_SLOTS = 1 * _KB +"""The maximum number of chunks for SORTEDLR cache.""" + + +# Parameters for general cache behaviour +# -------------------------------------- +# +# The next parameters will not be effective if passed to the +# `open_file()` function (so, they can only be changed in a *global* +# way). You can change them in the file, but this is strongly +# discouraged unless you know well what you are doing. + +DISABLE_EVERY_CYCLES = 10 +"""The number of cycles in which a cache will be forced to be disabled +if the hit ratio is lower than the LOWEST_HIT_RATIO (see below). This +value should provide time enough to check whether the cache is being +efficient or not.""" + +ENABLE_EVERY_CYCLES = 50 +"""The number of cycles in which a cache will be forced to be +(re-)enabled, irregardless of the hit ratio. This will provide a chance +for checking if we are in a better scenario for doing caching again.""" + +LOWEST_HIT_RATIO = 0.6 +"""The minimum acceptable hit ratio for a cache to avoid disabling (and +freeing) it.""" + + +# Tunable parameters +# ================== +# Be careful when touching these! + +# Recommended maximum values +# -------------------------- + +# Following are the recommended values for several limits. However, +# these limits are somewhat arbitrary and can be increased if you have +# enough resources. + +MAX_COLUMNS = 512 +"""Maximum number of columns in :class:`tables.Table` objects before a +:exc:`tables.PerformanceWarning` is issued. This limit is somewhat +arbitrary and can be increased. +""" + +MAX_NODE_ATTRS = 4 * _KB +"""Maximum allowed number of attributes in a node.""" + +MAX_GROUP_WIDTH = 16 * _KB +"""Maximum allowed number of children hanging from a group.""" + +MAX_TREE_DEPTH = 2 * _KB +"""Maximum depth in object tree allowed.""" + +MAX_UNDO_PATH_LENGTH = 10 * _KB +"""Maximum length of paths allowed in undo/redo operations.""" + + +# Cache limits +# ------------ + +COND_CACHE_SLOTS = 128 +"""Maximum number of conditions for table queries to be kept in memory.""" + +CHUNK_CACHE_NELMTS = 521 +"""Number of elements for HDF5 chunk cache.""" + +CHUNK_CACHE_PREEMPT = 0.0 +"""Chunk preemption policy. This value should be between 0 and 1 +inclusive and indicates how much chunks that have been fully read are +favored for preemption. A value of zero means fully read chunks are +treated no differently than other chunks (the preemption is strictly +LRU) while a value of one means fully read chunks are always preempted +before other chunks.""" + +CHUNK_CACHE_SIZE = 16 * _MB +"""Size (in bytes) for HDF5 chunk cache.""" + +# Size for new metadata cache system +METADATA_CACHE_SIZE = 1 * _MB # 1 MB is the default for HDF5 +"""Size (in bytes) of the HDF5 metadata cache.""" + + +# NODE_CACHE_SLOTS tells the number of nodes that fits in the cache. +# +# There are several forces driving the election of this number: +# 1.- As more nodes, better chances to re-use nodes +# --> better performance +# 2.- As more nodes, the re-ordering of the LRU cache takes more time +# --> less performance +# 3.- As more nodes, the memory needs for PyTables grows, specially for table +# writings (that could take double of memory than table reads!). +# +# The default value here is quite conservative. If you have a system +# with tons of memory, and if you are touching regularly a very large +# number of leaves, try increasing this value and see if it fits better +# for you. Please report back your feedback. +# Using a lower value than 64 provides a workaround for issue #977. +NODE_CACHE_SLOTS = 32 +"""Maximum number of nodes to be kept in the metadata cache. + +It is the number of nodes to be kept in the metadata cache. Least recently +used nodes are unloaded from memory when this number of loaded nodes is +reached. To load a node again, simply access it as usual. +Nodes referenced by user variables and, in general, all nodes that are still +open are registered in the node manager and can be quickly accessed even +if they are not in the cache. + +Negative value means that all the touched nodes will be kept in an +internal dictionary. This is the faster way to load/retrieve nodes. +However, and in order to avoid a large memory consumption, the user will +be warned when the number of loaded nodes will reach the +``-NODE_CACHE_SLOTS`` value. + +Finally, a value of zero means that any cache mechanism is disabled. +""" + + +# Parameters for the I/O buffer in `Leaf` objects +# ----------------------------------------------- + +IO_BUFFER_SIZE = 16 * _MB +"""The PyTables internal buffer size for I/O purposes. Should not +exceed the amount of highest level cache size in your CPU.""" + +BUFFER_TIMES = 100 +"""The maximum buffersize/rowsize ratio before issuing a +:exc:`tables.PerformanceWarning`.""" + + +# Miscellaneous +# ------------- + +EXPECTED_ROWS_EARRAY = 1000 +"""Default expected number of rows for :class:`EArray` objects.""" + +EXPECTED_ROWS_VLARRAY = 1000 +"""Default expected number of rows for :class:`VLArray` objects. + +.. versionadded:: 3.0 + +""" + +EXPECTED_ROWS_TABLE = 10_000 +"""Default expected number of rows for :class:`Table` objects.""" + +PYTABLES_SYS_ATTRS = True +"""Set this to ``False`` if you don't want to create PyTables system +attributes in datasets. Also, if set to ``False`` the possible existing +system attributes are not considered for guessing the class of the node +during its loading from disk (this work is delegated to the PyTables' +class discoverer function for general HDF5 files).""" + +MAX_NUMEXPR_THREADS = int(_os.environ.get("NUMEXPR_MAX_THREADS", 4)) +"""The maximum number of threads that PyTables should use internally in +Numexpr. If `None`, it is automatically set to the number of cores in +your machine. In general, it is a good idea to set this to the number of +cores in your machine or, when your machine has many of them (e.g. > 8), +perhaps stay at 8 at maximum. In general, 4 threads is a good tradeoff.""" + +MAX_BLOSC_THREADS = 1 # 1 is safe for concurrency +"""The maximum number of threads that PyTables should use internally in +Blosc. If `None`, it is automatically set to the number of cores in +your machine. For applications that use several PyTables instances +concurrently and so as to avoid locking problems, the recommended value +is 1. In other cases a value of 2 or 4 could make sense. + +""" + +USER_BLOCK_SIZE = 0 +"""Sets the user block size of a file. + +The default user block size is 0; it may be set to any power of 2 equal +to 512 or greater (512, 1024, 2048, etc.). + +.. versionadded:: 3.0 + +""" + +ALLOW_PADDING = True +"""Allow padding in compound data types. + +Starting on version 3.5 padding is honored during copies, or when tables +are created from NumPy structured arrays with padding (e.g. `align=True`). +If you actually want to get rid of any possible padding in new +datasets/attributes (i.e. the previous behaviour), set this to `False`. + +.. versionadded:: 3.5 + +""" + + +# HDF5 driver management +# ---------------------- +DRIVER = None +"""The HDF5 driver that should be used for reading/writing to the file. + +Following drivers are supported: + + * H5FD_SEC2: this driver uses POSIX file-system functions like read + and write to perform I/O to a single, permanent file on local + disk with no system buffering. + This driver is POSIX-compliant and is the default file driver for + all systems. + + * H5FD_DIRECT: this is the H5FD_SEC2 driver except data is written + to or read from the file synchronously without being cached by + the system. + + * H5FD_WINDOWS: this driver was modified in HDF5-1.8.8 to be a + wrapper of the POSIX driver, H5FD_SEC2. This change should not + affect user applications. + + * H5FD_STDIO: this driver uses functions from the standard C + stdio.h to perform I/O to a single, permanent file on local disk + with additional system buffering. + + * H5FD_CORE: with this driver, an application can work with a file + in memory for faster reads and writes. File contents are kept in + memory until the file is closed. At closing, the memory version + of the file can be written back to disk or abandoned. + + * H5FD_SPLIT: this file driver splits a file into two parts. + One part stores metadata, and the other part stores raw data. + This splitting a file into two parts is a limited case of the + Multi driver. + +The following drivers are not currently supported: + + * H5FD_LOG: this is the H5FD_SEC2 driver with logging capabilities. + + * H5FD_FAMILY: with this driver, the HDF5 file’s address space is + partitioned into pieces and sent to separate storage files using + an underlying driver of the user’s choice. + This driver is for systems that do not support files larger than + 2 gigabytes. + + * H5FD_MULTI: with this driver, data can be stored in multiple + files according to the type of the data. I/O might work better if + data is stored in separate files based on the type of data. + The Split driver is a special case of this driver. + + * H5FD_MPIO: this is the standard HDF5 file driver for parallel + file systems. This driver uses the MPI standard for both + communication and file I/O. + + * H5FD_MPIPOSIX: this parallel file system driver uses MPI for + communication and POSIX file-system calls for file I/O. + + * H5FD_STREAM: this driver is no longer available. + +.. seealso:: the `Drivers section`_ of the `HDF5 User's Guide`_ for + more information. + +.. note:: + + not all supported drivers are always available. For example the + H5FD_WINDOWS driver is not available on non Windows platforms. + + If the user try to use a driver that is not available on the target + platform a :exc:`RuntimeError` is raised. + +.. versionadded:: 3.0 + +.. _`Drivers section`: + http://www.hdfgroup.org/HDF5/doc/UG/08_TheFile.html#Drivers +.. _`HDF5 User's Guide`: http://www.hdfgroup.org/HDF5/doc/UG/index.html + +""" + +DRIVER_DIRECT_ALIGNMENT = 0 +"""Specifies the required alignment boundary in memory. + +A value of 0 (zero) means to use HDF5 Library’s default value. + +.. versionadded:: 3.0 + +""" + +DRIVER_DIRECT_BLOCK_SIZE = 0 +"""Specifies the file system block size. + +A value of 0 (zero) means to use HDF5 Library’s default value of 4KB. + +.. versionadded:: 3.0 + +""" + +DRIVER_DIRECT_CBUF_SIZE = 0 +"""Specifies the copy buffer size. + +A value of 0 (zero) means to use HDF5 Library’s default value. + +.. versionadded:: 3.0 + +""" + +# DRIVER_LOG_FLAGS = 0x0001ffff +# """Flags specifying the types of logging activity. +# +# .. versionadded:: 3.0 +# +# .. seeealso:: +# http://www.hdfgroup.org/HDF5/doc/RM/RM_H5P.html#Property-SetFaplLog +# +# """ +# +# DRIVER_LOG_BUF_SIZE = 4 * _KB +# """The size of the logging buffers, in bytes. +# +# One buffer of size DRIVER_LOG_BUF_SIZE will be created for each of +# H5FD_LOG_FILE_READ, H5FD_LOG_FILE_WRITE and H5FD_LOG_FLAVOR when those +# flags are set; these buffers will not grow as the file increases in +# size. +# +# .. versionadded:: 3.0 +# +# """ + +DRIVER_CORE_INCREMENT = 64 * _KB +"""Core driver memory increment. + +Specifies the increment by which allocated memory is to be increased +each time more memory is required. + +.. versionadded:: 3.0 + +""" + +DRIVER_CORE_BACKING_STORE = 1 +"""Enables backing store for the core driver. + +With the H5FD_CORE driver, if the DRIVER_CORE_BACKING_STORE is set +to 1 (True), the file contents are flushed to a file with the same name +as this core file when the file is closed or access to the file is +terminated in memory. + +The application is allowed to open an existing file with H5FD_CORE +driver. In that case, if the DRIVER_CORE_BACKING_STORE is set to 1 and +the flags for :func:`tables.open_file` is set to H5F_ACC_RDWR, any change +to the file contents are saved to the file when the file is closed. +If backing_store is set to 0 and the flags for :func:`tables.open_file` +is set to H5F_ACC_RDWR, any change to the file contents will be lost +when the file is closed. If the flags for :func:`tables.open_file` is +set to H5F_ACC_RDONLY, no change to the file is allowed either in +memory or on file. + +.. versionadded:: 3.0 + +""" + +DRIVER_CORE_IMAGE = None +"""String containing an HDF5 file image. + +If this option is passed to the :func:`tables.open_file` function then the +returned file object is set up using the specified image. + +A file image can be retrieved from an existing (and opened) file object +using the :meth:`tables.File.get_file_image` method. + +.. versionadded:: 3.0 + +""" + +DRIVER_SPLIT_META_EXT = "-m.h5" +"""The extension for the metadata file used by the H5FD_SPLIT driver. + +If this option is passed to the :func:`tables.openFile` function along +with driver='H5FD_SPLIT', the extension is appended to the name passed +as the first parameter to form the name of the metadata file. If the +string '%s' is used in the extension, the metadata file name is formed +by replacing '%s' with the name passed as the first parameter instead. + +.. versionadded:: 3.1 + +""" + +DRIVER_SPLIT_RAW_EXT = "-r.h5" +"""The extension for the raw data file used by the H5FD_SPLIT driver. + +If this option is passed to the :func:`tables.openFile` function along +with driver='H5FD_SPLIT', the extension is appended to the name passed +as the first parameter to form the name of the raw data file. If the +string '%s' is used in the extension, the raw data file name is formed +by replacing '%s' with the name passed as the first parameter instead. + +.. versionadded:: 3.1 + +""" diff --git a/venv/Lib/site-packages/tables/path.py b/venv/Lib/site-packages/tables/path.py new file mode 100644 index 0000000..8589420 --- /dev/null +++ b/venv/Lib/site-packages/tables/path.py @@ -0,0 +1,227 @@ +"""Functionality related with node paths in a PyTables file. + +Variables +========= + +`__docformat`__ + The format of documentation strings in this module. + +""" + +import re +import keyword +import warnings + +from .exceptions import NaturalNameWarning + +__docformat__ = "reStructuredText" +"""The format of documentation strings in this module.""" + + +_python_id_re = re.compile("^[a-zA-Z_][a-zA-Z0-9_]*$") +"""Python identifier regular expression.""" + +_reserved_id_re = re.compile("^_[cfgv]_") +"""PyTables reserved identifier regular expression. + +- c: class variables +- f: class public methods +- g: class private methods +- v: instance variables +""" + +_hidden_name_re = re.compile("^_[pi]_") +"""Nodes with a name *matching* this expression are considered hidden. + +For instance, ``name`` would be visible while ``_i_name`` would not. +""" + +_hidden_path_re = re.compile("/_[pi]_") +"""Nodes with a path *containing* this expression are considered hidden. + +For instance, a node with a pathname like ``/a/b/c`` would be visible +while nodes with pathnames like ``/a/c/_i_x`` or ``/a/_p_x/y`` would +not. +""" + +_warn_info = ( + "you will not be able to use natural naming to access this object; " + "using ``getattr()`` will still work, though" +) +"""Warning printed when a name will not be reachable through natural naming""" + + +def check_attribute_name(name: str) -> None: + """Check the validity of the `name` of an attribute in AttributeSet. + + If the name is not valid, a ``ValueError`` is raised. If it is + valid but it can not be used with natural naming, a + `NaturalNameWarning` is issued. + + >>> warnings.simplefilter("ignore") + >>> check_attribute_name('a') + >>> check_attribute_name('a_b') + >>> check_attribute_name('a:b') # NaturalNameWarning + >>> check_attribute_name('/a/b') # NaturalNameWarning + >>> check_attribute_name('/') # NaturalNameWarning + >>> check_attribute_name('.') # NaturalNameWarning + >>> check_attribute_name('__members__') + Traceback (most recent call last): + ... + ValueError: ``__members__`` is not allowed as an object name + >>> check_attribute_name(1) + Traceback (most recent call last): + ... + TypeError: object name is not a string: 1 + >>> check_attribute_name('') + Traceback (most recent call last): + ... + ValueError: the empty string is not allowed as an object name + """ + if not isinstance(name, str): # Python >= 2.3 + raise TypeError(f"object name is not a string: {name!r}") + + if name == "": + raise ValueError("the empty string is not allowed as an object name") + + # Check whether `name` is a valid Python identifier. + if not _python_id_re.match(name): + warnings.warn( + "object name is not a valid Python identifier: %r; " + "it does not match the pattern ``%s``; %s" + % (name, _python_id_re.pattern, _warn_info), + NaturalNameWarning, + stacklevel=2, + ) + return + + # However, Python identifiers and keywords have the same form. + if keyword.iskeyword(name): + warnings.warn( + f"object name is a Python keyword: {name!r}; {_warn_info}", + NaturalNameWarning, + stacklevel=2, + ) + return + + # Still, names starting with reserved prefixes are not allowed. + if _reserved_id_re.match(name): + raise ValueError( + "object name starts with a reserved prefix: %r; " + "it matches the pattern ``%s``" % (name, _reserved_id_re.pattern) + ) + + # ``__members__`` is the only exception to that rule. + if name == "__members__": + raise ValueError("``__members__`` is not allowed as an object name") + + +def check_name_validity(name: str) -> None: + """Check the validity of the `name` of a Node object. + + Validity of Node names is more limited than attribute names. + + If the name is not valid, a ``ValueError`` is raised. If it is + valid but it can not be used with natural naming, a + `NaturalNameWarning` is issued. + + >>> warnings.simplefilter("ignore") + >>> check_name_validity('a') + >>> check_name_validity('a_b') + >>> check_name_validity('a:b') # NaturalNameWarning + >>> check_name_validity('/a/b') + Traceback (most recent call last): + ... + ValueError: the ``/`` character is not allowed in object names: '/a/b' + >>> check_name_validity('.') + Traceback (most recent call last): + ... + ValueError: ``.`` is not allowed as an object name + >>> check_name_validity('') + Traceback (most recent call last): + ... + ValueError: the empty string is not allowed as an object name + + """ + check_attribute_name(name) + + # Check whether `name` is a valid HDF5 name. + # http://hdfgroup.org/HDF5/doc/UG/03_Model.html#Structure + if name == ".": + raise ValueError("``.`` is not allowed as an object name") + elif "/" in name: + raise ValueError( + "the ``/`` character is not allowed " "in object names: %r" % name + ) + + +def join_path(parentpath: str, name: str) -> str: + """Join a *canonical* `parentpath` with a *non-empty* `name`. + + .. versionchanged:: 3.0 + The *parentPath* parameter has been renamed into *parentpath*. + + >>> join_path('/', 'foo') + '/foo' + >>> join_path('/foo', 'bar') + '/foo/bar' + >>> join_path('/foo', '/foo2/bar') + '/foo/foo2/bar' + >>> join_path('/foo', '/') + '/foo' + + """ + if name.startswith("./"): # Support relative paths (mainly for links) + name = name[2:] + if parentpath == "/" and name.startswith("/"): + pstr = "%s" % name + elif parentpath == "/" or name.startswith("/"): + pstr = f"{parentpath}{name}" + else: + pstr = f"{parentpath}/{name}" + if pstr.endswith("/"): + pstr = pstr[:-1] + return pstr + + +def split_path(path: str) -> (str, str): + """Split a *canonical* `path` into a parent path and a node name. + + The result is returned as a tuple. The parent path does not + include a trailing slash. + + >>> split_path('/') + ('/', '') + >>> split_path('/foo/bar') + ('/foo', 'bar') + + """ + lastslash = path.rfind("/") + ppath = path[:lastslash] + name = path[lastslash + 1 :] + + if ppath == "": + ppath = "/" + + return (ppath, name) + + +def isvisiblename(name: str) -> bool: + """Return `True` if `name` makes the named node visible.""" + return _hidden_name_re.match(name) is None + + +def isvisiblepath(path: str) -> bool: + """Return `True` if `path` makes the named node visible.""" + return _hidden_path_re.search(path) is None + + +def _test() -> None: + """Run ``doctest`` on this module.""" + import doctest + + doctest.testmod() + + +if __name__ == "__main__": + _test() diff --git a/venv/Lib/site-packages/tables/registry.py b/venv/Lib/site-packages/tables/registry.py new file mode 100644 index 0000000..d99b2f4 --- /dev/null +++ b/venv/Lib/site-packages/tables/registry.py @@ -0,0 +1,75 @@ +"""Miscellaneous mappings used to avoid circular imports. + +Variables: + +`class_name_dict` + Node class name to class object mapping. +`class_id_dict` + Class identifier to class object mapping. + +Misc variables: + +`__docformat__` + The format of documentation strings in this module. + +""" + +from __future__ import annotations + +# Important: no modules from PyTables should be imported here +# (but standard modules are OK), since the main reason for this module +# is avoiding circular imports! + +__docformat__ = "reStructuredText" + +"""The format of documentation strings in this module.""" + +class_name_dict: dict[str, type] = {} +"""Node class name to class object mapping. + +This dictionary maps class names (e.g. ``'Group'``) to actual class +objects (e.g. `Group`). Classes are registered here when they are +defined, and they are not expected to be unregistered (by now), but they +can be replaced when the module that defines them is reloaded. + +.. versionchanged:: 3.0 + The *classNameDict* dictionary has been renamed into *class_name_dict*. + +""" + +class_id_dict: dict[str, type] = {} +"""Class identifier to class object mapping. + +This dictionary maps class identifiers (e.g. ``'GROUP'``) to actual +class objects (e.g. `Group`). Classes defining a new ``_c_classid`` +attribute are registered here when they are defined, and they are not +expected to be unregistered (by now), but they can be replaced when the +module that defines them is reloaded. + +.. versionchanged:: 3.0 + The *classIdDict* dictionary has been renamed into *class_id_dict*. + +""" + + +def get_class_by_name(classname: str | None) -> type: + """Get the node class matching the `classname`. + + If the name is not registered, a ``TypeError`` is raised. The empty + string and ``None`` are also accepted, and mean the ``Node`` class. + + .. versionadded:: 3.0 + + """ + # The empty string is accepted for compatibility + # with old default arguments. + if classname is None or classname == "": + classname = "Node" + + # Get the class object corresponding to `classname`. + if classname not in class_name_dict: + raise TypeError( + f"there is no registered node class named ``{classname}``" + ) + + return class_name_dict[classname] diff --git a/venv/Lib/site-packages/tables/req_versions.py b/venv/Lib/site-packages/tables/req_versions.py new file mode 100644 index 0000000..894a54c --- /dev/null +++ b/venv/Lib/site-packages/tables/req_versions.py @@ -0,0 +1,16 @@ +"""Required versions for PyTables dependencies.""" + +from packaging.version import Version + +# ********************************************************************** +# Runtime requirements, keep versions in sync with +# "Prerequisites" in the User's Guide. +# ********************************************************************** + +# Minimum recommended versions for mandatory packages +min_numpy_version = Version("1.20.0") +min_numexpr_version = Version("2.6.2") +# These are library versions, not the python modules +min_hdf5_version = Version("1.10.5") +min_blosc_version = Version("1.11.1") +min_blosc2_version = Version("2.11.0") diff --git a/venv/Lib/site-packages/tables/scripts/__init__.py b/venv/Lib/site-packages/tables/scripts/__init__.py new file mode 100644 index 0000000..12fc730 --- /dev/null +++ b/venv/Lib/site-packages/tables/scripts/__init__.py @@ -0,0 +1,6 @@ +"""Utility scripts for PyTables. + +This package contains some modules which provide a ``main()`` function +(with no arguments), so that they can be used as scripts. + +""" diff --git a/venv/Lib/site-packages/tables/scripts/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/tables/scripts/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..1f824ba Binary files /dev/null and b/venv/Lib/site-packages/tables/scripts/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/scripts/__pycache__/pt2to3.cpython-311.pyc b/venv/Lib/site-packages/tables/scripts/__pycache__/pt2to3.cpython-311.pyc new file mode 100644 index 0000000..4636abe Binary files /dev/null and b/venv/Lib/site-packages/tables/scripts/__pycache__/pt2to3.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/scripts/__pycache__/ptdump.cpython-311.pyc b/venv/Lib/site-packages/tables/scripts/__pycache__/ptdump.cpython-311.pyc new file mode 100644 index 0000000..d837698 Binary files /dev/null and b/venv/Lib/site-packages/tables/scripts/__pycache__/ptdump.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/scripts/__pycache__/ptrepack.cpython-311.pyc b/venv/Lib/site-packages/tables/scripts/__pycache__/ptrepack.cpython-311.pyc new file mode 100644 index 0000000..7bbae23 Binary files /dev/null and b/venv/Lib/site-packages/tables/scripts/__pycache__/ptrepack.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/scripts/__pycache__/pttree.cpython-311.pyc b/venv/Lib/site-packages/tables/scripts/__pycache__/pttree.cpython-311.pyc new file mode 100644 index 0000000..9898d93 Binary files /dev/null and b/venv/Lib/site-packages/tables/scripts/__pycache__/pttree.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/scripts/pt2to3.py b/venv/Lib/site-packages/tables/scripts/pt2to3.py new file mode 100644 index 0000000..30bca01 --- /dev/null +++ b/venv/Lib/site-packages/tables/scripts/pt2to3.py @@ -0,0 +1,541 @@ +"""Utility to helps the migration from PyTables 2.x APIs to 3.x APIs. + +The new API is PEP 8 compliant. + +""" + +import re +import sys +import argparse +from pathlib import Path + +old2newnames = dict( # noqa: C406 + [ + # from __init__.py + ("hdf5Version", "hdf5_version"), # data + # from array.py + ("parentNode", "parentnode"), # kwarg + ("getEnum", "get_enum"), + ("_initLoop", "_init_loop"), + ("_fancySelection", "_fancy_selection"), + ("_checkShape", "_check_shape"), + ("_readSlice", "_read_slice"), + ("_readCoords", "_read_coords"), + ("_readSelection", "_read_selection"), + ("_writeSlice", "_write_slice"), + ("_writeCoords", "_write_coords"), + ("_writeSelection", "_write_selection"), + ("_g_copyWithStats", "_g_copy_with_stats"), + ("_c_classId", "_c_classid"), # attr + # from atom.py + ("_checkBase", "_checkbase"), + # from attributeset.py + ("newSet", "newset"), # kwarg + ("copyClass", "copyclass"), # kwarg + ("_g_updateNodeLocation", "_g_update_node_location"), + ("_g_logAdd", "_g_log_add"), + ("_g_delAndLog", "_g_del_and_log"), + ("_v__nodeFile", "_v__nodefile"), # attr (private) + ("_v__nodePath", "_v__nodepath"), # attr (private) + # from carray.py + # ('parentNode', 'parentnode'), # kwarg + # from description.py + ("_g_setNestedNamesDescr", "_g_set_nested_names_descr"), + ("_g_setPathNames", "_g_set_path_names"), + ("_v_colObjects", "_v_colobjects"), # attr + ("_v_nestedFormats", "_v_nested_formats"), # attr + ("_v_nestedNames", "_v_nested_names"), # attr + ("_v_nestedDescr", "_v_nested_descr"), # attr + ("getColsInOrder", "get_cols_in_order"), + ("joinPaths", "join_paths"), + ("metaIsDescription", "MetaIsDescription"), + # from earray.py + # ('parentNode', 'parentnode'), # kwarg + ("_checkShapeAppend", "_check_shape_append"), + # from expression.py + ("_exprvarsCache", "_exprvars_cache"), # attr (private) + ("_requiredExprVars", "_required_expr_vars"), + ("setInputsRange", "set_inputs_range"), + ("setOutput", "set_output"), + ("setOutputRange", "set_output_range"), + # from file.py + ("_opToCode", "_op_to_code"), # data (private) + ("_codeToOp", "_code_to_op"), # data (private) + ("_transVersion", "_trans_version"), # data (private) + ("_transGroupParent", "_trans_group_parent"), # data (private) + ("_transGroupName", "_trans_group_name"), # data (private) + ("_transGroupPath", "_trans_group_path"), # data (private) + ("_actionLogParent", "_action_log_parent"), # data (private) + ("_actionLogName", "_action_log_name"), # data (private) + ("_actionLogPath", "_action_log_path"), # data (private) + ("_transParent", "_trans_parent"), # data (private) + ("_transName", "_trans_name"), # data (private) + ("_transPath", "_trans_path"), # data (private) + ("_shadowParent", "_shadow_parent"), # data (private) + ("_shadowName", "_shadow_name"), # data (private) + ("_shadowPath", "_shadow_path"), # data (private) + ("copyFile", "copy_file"), + ("openFile", "open_file"), + ("_getValueFromContainer", "_get_value_from_container"), + ("__getRootGroup", "__get_root_group"), + ("rootUEP", "root_uep"), # attr + ("_getOrCreatePath", "_get_or_create_path"), + ("_createPath", "_create_path"), + ("createGroup", "create_group"), + ("createTable", "create_table"), + ("createArray", "create_array"), + ("createCArray", "create_carray"), + ("createEArray", "create_earray"), + ("createVLArray", "create_vlarray"), + ("createHardLink", "create_hard_link"), + ("createSoftLink", "create_soft_link"), + ("createExternalLink", "create_external_link"), + ("_getNode", "_get_node"), + ("getNode", "get_node"), + ("isVisibleNode", "is_visible_node"), + ("renameNode", "rename_node"), + ("moveNode", "move_node"), + ("copyNode", "copy_node"), + ("removeNode", "remove_node"), + ("getNodeAttr", "get_node_attr"), + ("setNodeAttr", "set_node_attr"), + ("delNodeAttr", "del_node_attr"), + ("copyNodeAttrs", "copy_node_attrs"), + ("copyChildren", "copy_children"), + ("listNodes", "list_nodes"), + ("iterNodes", "iter_nodes"), + ("walkNodes", "walk_nodes"), + ("walkGroups", "walk_groups"), + ("_checkOpen", "_check_open"), + ("_isWritable", "_iswritable"), + ("_checkWritable", "_check_writable"), + ("_checkGroup", "_check_group"), + ("isUndoEnabled", "is_undo_enabled"), + ("_checkUndoEnabled", "_check_undo_enabled"), + ("_createTransactionGroup", "_create_transaction_group"), + ("_createTransaction", "_create_transaction"), + ("_createMark", "_create_mark"), + ("enableUndo", "enable_undo"), + ("disableUndo", "disable_undo"), + ("_getMarkID", "_get_mark_id"), + ("_getFinalAction", "_get_final_action"), + ("getCurrentMark", "get_current_mark"), + ("_updateNodeLocations", "_update_node_locations"), + # from group.py + # ('parentNode', 'parentnode'), # kwarg + # ('ptFile', 'ptfile'), # kwarg + ("_getValueFromContainer", "_get_value_from_container"), + ("_g_postInitHook", "_g_post_init_hook"), + ("_g_getChildGroupClass", "_g_get_child_group_class"), + ("_g_getChildLeafClass", "_g_get_child_leaf_class"), + ("_g_addChildrenNames", "_g_add_children_names"), + ("_g_checkHasChild", "_g_check_has_child"), + ("_f_walkNodes", "_f_walknodes"), + ("_g_widthWarning", "_g_width_warning"), + ("_g_refNode", "_g_refnode"), + ("_g_unrefNode", "_g_unrefnode"), + ("_g_copyChildren", "_g_copy_children"), + ("_f_getChild", "_f_get_child"), + ("_f_listNodes", "_f_list_nodes"), + ("_f_iterNodes", "_f_iter_nodes"), + ("_f_walkGroups", "_f_walk_groups"), + ("_g_closeDescendents", "_g_close_descendents"), + ("_f_copyChildren", "_f_copy_children"), + ("_v_maxGroupWidth", "_v_max_group_width"), # attr + ("_v_objectID", "_v_objectid"), # attr + ("_g_loadChild", "_g_load_child"), + ("childName", "childname"), # ??? + ("_c_shadowNameRE", "_c_shadow_name_re"), # attr (private) + # from hdf5extension.p{yx,xd} + ("hdf5Extension", "hdf5extension"), + ("_getFileId", "_get_file_id"), + ("_flushFile", "_flush_file"), + ("_closeFile", "_close_file"), + ("_g_listAttr", "_g_list_attr"), + ("_g_setAttr", "_g_setattr"), + ("_g_getAttr", "_g_getattr"), + ("_g_listGroup", "_g_list_group"), + ("_g_getGChildAttr", "_g_get_gchild_attr"), + ("_g_getLChildAttr", "_g_get_lchild_attr"), + ("_g_flushGroup", "_g_flush_group"), + ("_g_closeGroup", "_g_close_group"), + ("_g_moveNode", "_g_move_node"), + ("_convertTime64", "_convert_time64"), + ("_createArray", "_create_array"), + ("_createCArray", "_create_carray"), + ("_openArray", "_open_array"), + ("_readArray", "_read_array"), + ("_g_readSlice", "_g_read_slice"), + ("_g_readCoords", "_g_read_coords"), + ("_g_readSelection", "_g_read_selection"), + ("_g_writeSlice", "_g_write_slice"), + ("_g_writeCoords", "_g_write_coords"), + ("_g_writeSelection", "_g_write_selection"), + # from idxutils.py + ("calcChunksize", "calc_chunksize"), + ("infinityF", "infinityf"), # data + ("infinityMap", "infinitymap"), # data + ("infType", "inftype"), + ("StringNextAfter", "string_next_after"), + ("IntTypeNextAfter", "int_type_next_after"), + ("BoolTypeNextAfter", "bool_type_next_after"), + # from index.py + # ('parentNode', 'parentnode'), # kwarg + ("defaultAutoIndex", "default_auto_index"), # data + ("defaultIndexFilters", "default_index_filters"), # data + ("_tableColumnPathnameOfIndex", "_table_column_pathname_of_index"), + ("_is_CSI", "_is_csi"), + ("is_CSI", "is_csi"), # property + ("appendLastRow", "append_last_row"), + ("read_sliceLR", "read_slice_lr"), + ("readSorted", "read_sorted"), + ("readIndices", "read_indices"), + ("_processRange", "_process_range"), + ("searchLastRow", "search_last_row"), + ("getLookupRange", "get_lookup_range"), + ("_g_checkName", "_g_check_name"), + # from indexes.py + # ('parentNode', 'parentnode'), # kwarg + ("_searchBin", "_search_bin"), + # from indexesextension + ("indexesExtension", "indexesextension"), + ("initRead", "initread"), + ("readSlice", "read_slice"), + ("_readIndexSlice", "_read_index_slice"), + ("_initSortedSlice", "_init_sorted_slice"), + ("_g_readSortedSlice", "_g_read_sorted_slice"), + ("_readSortedSlice", "_read_sorted_slice"), + ("getLRUbounds", "get_lru_bounds"), + ("getLRUsorted", "get_lru_sorted"), + ("_searchBinNA_b", "_search_bin_na_b"), + ("_searchBinNA_ub", "_search_bin_na_ub"), + ("_searchBinNA_s", "_search_bin_na_s"), + ("_searchBinNA_us", "_search_bin_na_us"), + ("_searchBinNA_i", "_search_bin_na_i"), + ("_searchBinNA_ui", "_search_bin_na_ui"), + ("_searchBinNA_ll", "_search_bin_na_ll"), + ("_searchBinNA_ull", "_search_bin_na_ull"), + ("_searchBinNA_e", "_search_bin_na_e"), + ("_searchBinNA_f", "_search_bin_na_f"), + ("_searchBinNA_d", "_search_bin_na_d"), + ("_searchBinNA_g", "_search_bin_na_g"), + # from leaf.py + # ('parentNode', 'parentnode'), # kwarg + ("objectID", "object_id"), # property + ("_processRangeRead", "_process_range_read"), + ("_pointSelection", "_point_selection"), + ("isVisible", "isvisible"), + ("getAttr", "get_attr"), + ("setAttr", "set_attr"), + ("delAttr", "del_attr"), + # from link.py + # ('parentNode', 'parentnode'), # kwarg + ("_g_getLinkClass", "_g_get_link_class"), + # from linkextension + ("linkExtension", "linkextension"), + ("_getLinkClass", "_get_link_class"), + ("_g_createHardLink", "_g_create_hard_link"), + # from lrucacheextension + ("lrucacheExtension", "lrucacheextension"), + # from misc/enum.py + ("_checkAndSetPair", "_check_and_set_pair"), + ("_getContainer", "_get_container"), + # from misc/proxydict.py + ("containerRef", "containerref"), # attr + # from node.py + # ('parentNode', 'parentnode'), # kwarg + ("_g_logCreate", "_g_log_create"), + ("_g_preKillHook", "_g_pre_kill_hook"), + ("_g_checkOpen", "_g_check_open"), + ("_g_setLocation", "_g_set_location"), + ("_g_updateLocation", "_g_update_location"), + ("_g_delLocation", "_g_del_location"), + ("_g_updateDependent", "_g_update_dependent"), + ("_g_removeAndLog", "_g_remove_and_log"), + ("_g_logMove", "_g_log_move"), + ("oldPathname", "oldpathname"), # ?? + ("_g_copyAsChild", "_g_copy_as_child"), + ("_f_isVisible", "_f_isvisible"), + ("_g_checkGroup", "_g_check_group"), + ("_g_checkNotContains", "_g_check_not_contains"), + ("_g_maybeRemove", "_g_maybe_remove"), + ("_f_getAttr", "_f_getattr"), + ("_f_setAttr", "_f_setattr"), + ("_f_delAttr", "_f_delattr"), + ("_v_maxTreeDepth", "_v_maxtreedepth"), # attr + # from nodes/filenode.py + ("newNode", "new_node"), + ("openNode", "open_node"), + ("_lineChunkSize", "_line_chunksize"), # attr (private) + ("_lineSeparator", "_line_separator"), # attr (private) + # ('getLineSeparator', 'get_line_separator'), # dropped + # ('setLineSeparator', 'set_line_separator'), # dropped + # ('delLineSeparator', 'del_line_separator'), # dropped + # ('lineSeparator', 'line_separator'), # property -- dropped + ("_notReadableError", "_not_readable_error"), + ("_appendZeros", "_append_zeros"), + ("getAttrs", "_get_attrs"), + ("setAttrs", "_set_attrs"), + ("delAttrs", "_del_attrs"), + ("_setAttributes", "_set_attributes"), + ("_checkAttributes", "_check_attributes"), + ("_checkNotClosed", "_check_not_closed"), + ("__allowedInitKwArgs", "__allowed_init_kwargs"), # attr (private) + ("_byteShape", "_byte_shape"), # attr (private) + ("_sizeToShape", "_size_to_shape"), # attr (private) + ("_vType", "_vtype"), # attr (private) + ("_vShape", "_vshape"), # attr (private) + # from path.py + ("parentPath", "parentpath"), # kwarg + ("_pythonIdRE", "_python_id_re"), # attr (private) + ("_reservedIdRE", "_reserved_id_re"), # attr (private) + ("_hiddenNameRE", "_hidden_name_re"), # attr (private) + ("_hiddenPathRE", "_hidden_path_re"), # attr (private) + ("checkNameValidity", "check_name_validity"), + ("joinPath", "join_path"), + ("splitPath", "split_path"), + ("isVisibleName", "isvisiblename"), + ("isVisiblePath", "isvisiblepath"), + # from registry.py + ("className", "classname"), # kwarg + ("classNameDict", "class_name_dict"), # data + ("classIdDict", "class_id_dict"), # data + ("getClassByName", "get_class_by_name"), + # from scripts/ptdump.py + ("dumpLeaf", "dump_leaf"), + ("dumpGroup", "dump_group"), + # from scripts/ptrepack.py + ("newdstGroup", "newdst_group"), + ("recreateIndexes", "recreate_indexes"), + ("copyLeaf", "copy_leaf"), + # from table.py + # ('parentNode', 'parentnode'), # kwarg + ("_nxTypeFromNPType", "_nxtype_from_nptype"), # data (private) + ("_npSizeType", "_npsizetype"), # data (private) + ("_indexNameOf", "_index_name_of"), + ("_indexPathnameOf", "_index_pathname_of"), + ("_indexPathnameOfColumn", "_index_pathname_of_column"), + ("_indexNameOf_", "_index_name_of_"), + ("_indexPathnameOf_", "_index_pathname_of_"), + ("_indexPathnameOfColumn_", "_index_pathname_of_column_"), + ("_table__setautoIndex", "_table__setautoindex"), + ("_table__getautoIndex", "_table__getautoindex"), + ("_table__autoIndex", "_table__autoindex"), # data (private) + ("_table__whereIndexed", "_table__where_indexed"), + ("createIndexesTable", "create_indexes_table"), + ("createIndexesDescr", "create_indexes_descr"), + ("_column__createIndex", "_column__create_index"), + ("_autoIndex", "_autoindex"), # attr + ("autoIndex", "autoindex"), # attr + ("_useIndex", "_use_index"), + ("_whereCondition", "_where_condition"), # attr (private) + ("_conditionCache", "_condition_cache"), # attr (private) + # ('_exprvarsCache', '_exprvars_cache'), + ( + "_enabledIndexingInQueries", + "_enabled_indexing_in_queries", + ), # attr (private) + ("_emptyArrayCache", "_empty_array_cache"), # attr (private) + ("_getTypeColNames", "_get_type_col_names"), + ("_getEnumMap", "_get_enum_map"), + ("_cacheDescriptionData", "_cache_description_data"), + ("_getColumnInstance", "_get_column_instance"), + ("_checkColumn", "_check_column"), + ("_disableIndexingInQueries", "_disable_indexing_in_queries"), + ("_enableIndexingInQueries", "_enable_indexing_in_queries"), + # ('_requiredExprVars', '_required_expr_vars'), + ("_getConditionKey", "_get_condition_key"), + ("_compileCondition", "_compile_condition"), + ("willQueryUseIndexing", "will_query_use_indexing"), + ("readWhere", "read_where"), + ("whereAppend", "append_where"), + ("getWhereList", "get_where_list"), + ("_check_sortby_CSI", "_check_sortby_csi"), + ("_readCoordinates", "_read_coordinates"), + ("readCoordinates", "read_coordinates"), + ("_saveBufferedRows", "_save_buffered_rows"), + ("modifyCoordinates", "modify_coordinates"), + ("modifyRows", "modify_rows"), + ("modifyColumn", "modify_column"), + ("modifyColumns", "modify_columns"), + ("flushRowsToIndex", "flush_rows_to_index"), + ("_addRowsToIndex", "_add_rows_to_index"), + ("removeRows", "remove_rows"), + ("_setColumnIndexing", "_set_column_indexing"), + ("_markColumnsAsDirty", "_mark_columns_as_dirty"), + ("_reIndex", "_reindex"), + ("_doReIndex", "_do_reindex"), + ("reIndex", "reindex"), + ("reIndexDirty", "reindex_dirty"), + ("_g_copyRows", "_g_copy_rows"), + ("_g_copyRows_optim", "_g_copy_rows_optim"), + ("_g_propIndexes", "_g_prop_indexes"), + ("_g_updateTableLocation", "_g_update_table_location"), + ("_tableFile", "_table_file"), # attr (private) + ("_tablePath", "_table_path"), # attr (private) + ("createIndex", "create_index"), + ("createCSIndex", "create_csindex"), + ("removeIndex", "remove_index"), + # from tableextension + ("tableExtension", "tableextension"), + ("getNestedFieldCache", "get_nested_field_cache"), + ("getNestedType", "get_nested_type"), + ("_createTable", "_create_table"), + ("_getInfo", "_get_info"), + ("indexChunk", "indexchunk"), # attr + ("indexValid", "indexvalid"), # attr + ("indexValues", "indexvalues"), # attr + ("bufcoordsData", "bufcoords_data"), # attr + ("indexValuesData", "index_values_data"), # attr + ("chunkmapData", "chunkmap_data"), # attr + ("indexValidData", "index_valid_data"), # attr + ("whereCond", "wherecond"), # attr + ("iterseqMaxElements", "iterseq_max_elements"), # attr + ("IObuf", "iobuf"), # attr + ("IObufcpy", "iobufcpy"), # attr + ("_convertTime64_", "_convert_time64_"), + ("_convertTypes", "_convert_types"), + ("_newBuffer", "_new_buffer"), + ("__next__inKernel", "__next__inkernel"), + ("_fillCol", "_fill_col"), + ("_flushBufferedRows", "_flush_buffered_rows"), + ("_getUnsavedNrows", "_get_unsaved_nrows"), + ("_flushModRows", "_flush_mod_rows"), + # from undoredo.py + ("moveToShadow", "move_to_shadow"), + ("moveFromShadow", "move_from_shadow"), + ("undoCreate", "undo_create"), + ("redoCreate", "redo_create"), + ("undoRemove", "undo_remove"), + ("redoRemove", "redo_remove"), + ("undoMove", "undo_move"), + ("redoMove", "redo_move"), + ("attrToShadow", "attr_to_shadow"), + ("attrFromShadow", "attr_from_shadow"), + ("undoAddAttr", "undo_add_attr"), + ("redoAddAttr", "redo_add_attr"), + ("undoDelAttr", "undo_del_attr"), + ("redoDelAttr", "redo_del_attr"), + # from utils.py + ("convertToNPAtom", "convert_to_np_atom"), + ("convertToNPAtom2", "convert_to_np_atom2"), + ("checkFileAccess", "check_file_access"), + ("logInstanceCreation", "log_instance_creation"), + ("fetchLoggedInstances", "fetch_logged_instances"), + ("countLoggedInstances", "count_logged_instances"), + ("listLoggedInstances", "list_logged_instances"), + ("dumpLoggedInstances", "dump_logged_instances"), + ("detectNumberOfCores", "detect_number_of_cores"), + # from utilsextension + ("utilsExtension", "utilsextension"), + ("PTTypeToHDF5", "pttype_to_hdf5"), # data + ("PTSpecialKinds", "pt_special_kinds"), # data + ("NPExtPrefixesToPTKinds", "npext_prefixes_to_ptkinds"), # data + ("HDF5ClassToString", "hdf5_class_to_string"), # data + ("setBloscMaxThreads", "set_blosc_max_threads"), + ("silenceHDF5Messages", "silence_hdf5_messages"), + ("isHDF5File", "is_hdf5_file"), + ("isPyTablesFile", "is_pytables_file"), + ("getHDF5Version", "get_hdf5_version"), + ("getPyTablesVersion", "get_pytables_version"), + ("whichLibVersion", "which_lib_version"), + ("whichClass", "which_class"), + ("getNestedField", "get_nested_field"), + ("getFilters", "get_filters"), + ("getTypeEnum", "get_type_enum"), + ("enumFromHDF5", "enum_from_hdf5"), + ("enumToHDF5", "enum_to_hdf5"), + ("AtomToHDF5Type", "atom_to_hdf5_type"), + ("loadEnum", "load_enum"), + ("HDF5ToNPNestedType", "hdf5_to_np_nested_type"), + ("HDF5ToNPExtType", "hdf5_to_np_ext_type"), + ("AtomFromHDF5Type", "atom_from_hdf5_type"), + ("createNestedType", "create_nested_type"), + # from unimlemented.py + ("_openUnImplemented", "_open_unimplemented"), + # from vlarray.py + # ('parentNode', 'parentnode'), # kwarg + # ('expectedsizeinMB', 'expected_mb'), # --> expectedrows + # ('_v_expectedsizeinMB', '_v_expected_mb'), # --> expectedrows + ] +) + +new2oldnames = {v: k for k, v in old2newnames.items()} + +# Note that it is tempting to use the ast module here, but then this +# breaks transforming cython files. So instead we are going to do the +# dumb thing with replace. + + +def make_subs(ns): + """Make stubs.""" + names = new2oldnames if ns.reverse else old2newnames + s = r"(?<=\W)({})(?=\W)".format("|".join(list(names))) + if ns.ignore_previous: + s += r"(?!\s*?=\s*?previous_api(_property)?\()" + s += r"(?!\* to \*\w+\*)" + s += r"(?!\* parameter has been renamed into \*\w+\*\.)" + s += r"(?! is pending deprecation, import \w+ instead\.)" + subs = re.compile(s, flags=re.MULTILINE) + + def repl(m): + return names.get(m.group(1), m.group(0)) + + return subs, repl + + +def main(): + """Implement the main CLI interface.""" + desc = ( + "PyTables 2.x -> 3.x API transition tool\n\n" + "This tool displays to standard out, so it is \n" + "common to pipe this to another file:\n\n" + "$ pt2to3 oldfile.py > newfile.py" + ) + parser = argparse.ArgumentParser(description=desc) + parser.add_argument( + "-r", + "--reverse", + action="store_true", + default=False, + dest="reverse", + help="reverts changes, going from 3.x -> 2.x.", + ) + parser.add_argument( + "-p", + "--no-ignore-previous", + action="store_false", + default=True, + dest="ignore_previous", + help="ignores previous_api() calls.", + ) + parser.add_argument( + "-o", default=None, dest="output", help="output file to write to." + ) + parser.add_argument( + "-i", + "--inplace", + action="store_true", + default=False, + dest="inplace", + help="overwrites the file in-place.", + ) + parser.add_argument("filename", help="path to input file.") + ns = parser.parse_args() + + if not Path(ns.filename).is_file(): + sys.exit(f"file {ns.filename!r} not found") + src = Path(ns.filename).read_text() + + subs, repl = make_subs(ns) + targ = subs.sub(repl, src) + + ns.output = ns.filename if ns.inplace else ns.output + if ns.output is None: + sys.stdout.write(targ) + else: + Path(ns.output).write_text(targ) + + +if __name__ == "__main__": + main() diff --git a/venv/Lib/site-packages/tables/scripts/ptdump.py b/venv/Lib/site-packages/tables/scripts/ptdump.py new file mode 100644 index 0000000..6acfd40 --- /dev/null +++ b/venv/Lib/site-packages/tables/scripts/ptdump.py @@ -0,0 +1,197 @@ +"""This utility lets you look into the data and metadata of your data files. + +Pass the flag -h to this for help on usage. + +""" + +import argparse +import operator + +import tables as tb + +# default options +options = argparse.Namespace( + rng=slice(None), + showattrs=0, + verbose=0, + dump=0, + colinfo=0, + idxinfo=0, +) + + +def dump_leaf(leaf): + """Dump an HDF5 leaf node.""" + if options.verbose: + print(repr(leaf)) + else: + print(str(leaf)) + if options.showattrs: + print(f" {leaf.attrs!r}") + if options.dump and not isinstance(leaf, tb.unimplemented.UnImplemented): + print(" Data dump:") + # print((leaf.read(options.rng.start, options.rng.stop, + # options.rng.step)) + # This is better for large objects + if options.rng.start is None: + start = 0 + else: + start = options.rng.start + if options.rng.stop is None: + if leaf.shape != (): + stop = leaf.shape[0] + else: + stop = options.rng.stop + if options.rng.step is None: + step = 1 + else: + step = options.rng.step + if leaf.shape == (): + print("[SCALAR] %s" % (leaf[()])) + else: + for i in range(start, stop, step): + print(f"[{i}] {leaf[i]}") + + if isinstance(leaf, tb.table.Table) and options.colinfo: + # Show info of columns + for colname in leaf.colnames: + print(repr(leaf.cols._f_col(colname))) + + if isinstance(leaf, tb.table.Table) and options.idxinfo: + # Show info of indexes + for colname in leaf.colnames: + col = leaf.cols._f_col(colname) + if isinstance(col, tb.table.Column) and col.index is not None: + idx = col.index + print(repr(idx)) + + +def dump_group(pgroup, sort=False): + """Dump an HDF5 group.""" + node_kinds = pgroup._v_file._node_kinds[1:] + what = pgroup._f_walk_groups() + if sort: + what = sorted(what, key=operator.attrgetter("_v_pathname")) + for group in what: + print(str(group)) + if options.showattrs: + print(f" {group._v_attrs!r}") + for kind in node_kinds: + for node in group._f_list_nodes(kind): + if options.verbose or options.dump: + dump_leaf(node) + else: + print(str(node)) + + +def _get_parser(): + parser = argparse.ArgumentParser( + description=( + "The ptdump utility allows you look into the contents of your " + "PyTables files. It lets you see not only the data but also " + "the metadata (that is, the *structure* and additional " + "information in the form of *attributes*)." + ) + ) + + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="dump more metainformation on nodes", + ) + parser.add_argument( + "-d", + "--dump", + action="store_true", + help="dump data information on leaves", + ) + parser.add_argument( + "-a", + "--showattrs", + action="store_true", + help="show attributes in nodes (only useful when -v or -d are active)", + ) + parser.add_argument( + "-s", + "--sort", + action="store_true", + help="sort output by node name", + ) + parser.add_argument( + "-c", + "--colinfo", + action="store_true", + help="""show info of columns in tables (only useful when -v or -d + are active)""", + ) + parser.add_argument( + "-i", + "--idxinfo", + action="store_true", + help="""show info of indexed columns (only useful when -v or -d are + active)""", + ) + parser.add_argument( + "-R", + "--range", + dest="rng", + metavar="RANGE", + help=( + "select a RANGE of rows (in the form 'start,stop,step') " + "during the copy of *all* the leaves. " + "Default values are 'None, None, 1', which means a copy of all " + "the rows." + ), + ) + parser.add_argument( + "src", + metavar="filename[:nodepath]", + help="name of the HDF5 file to dump", + ) + + return parser + + +def main(): + """Implement the main CLI interface.""" + parser = _get_parser() + + args = parser.parse_args(namespace=options) + + # Get the options + if isinstance(args.rng, str): + try: + options.rng = eval("slice(" + args.rng + ")") + except Exception: + parser.error("Error when getting the range parameter.") + else: + args.dump = 1 + + # Catch the files passed as the last arguments + src = args.src.rsplit(":", 1) + if len(src) == 1: + filename, nodename = src[0], "/" + else: + filename, nodename = src + if nodename == "": + # case where filename == "filename:" instead of "filename:/" + nodename = "/" + + try: + h5file = tb.open_file(filename, "r") + except Exception as e: + return f"Cannot open input file: {e}" + + with h5file: + # Check whether the specified node is a group or a leaf + nodeobject = h5file.get_node(nodename) + if isinstance(nodeobject, tb.group.Group): + # Close the file again and reopen using the root_uep + dump_group(nodeobject, args.sort) + elif isinstance(nodeobject, tb.leaf.Leaf): + # If it is not a Group, it must be a Leaf + dump_leaf(nodeobject) + else: + # This should never happen + print("Unrecognized object:", nodeobject) diff --git a/venv/Lib/site-packages/tables/scripts/ptrepack.py b/venv/Lib/site-packages/tables/scripts/ptrepack.py new file mode 100644 index 0000000..46c7028 --- /dev/null +++ b/venv/Lib/site-packages/tables/scripts/ptrepack.py @@ -0,0 +1,775 @@ +"""This utility lets you repack your data files in a flexible way. + +Pass the flag -h to this for help on usage. + +""" + +import sys +import argparse +import warnings +from time import perf_counter as clock +from time import process_time as cpuclock +from pathlib import Path + +import tables as tb + +# Global variables +verbose = False +regoldindexes = True +createsysattrs = True + +numpy_aliases = [ + "numeric", + "Numeric", + "numarray", + "NumArray", + "CharArray", +] + + +def newdst_group(dstfileh, dstgroup, title, filters): + """Return a new destination group.""" + group = dstfileh.root + # Now, create the new group. This works even if dstgroup == '/' + for nodename in dstgroup.split("/"): + if nodename == "": + continue + # First try if possible intermediate groups does already exist. + try: + group2 = dstfileh.get_node(group, nodename) + except tb.exceptions.NoSuchNodeError: + # The group does not exist. Create it. + group2 = dstfileh.create_group( + group, nodename, title=title, filters=filters + ) + group = group2 + return group + + +def recreate_indexes(table, dstfileh, dsttable): + """Re-create indexes.""" + listoldindexes = table._listoldindexes + if listoldindexes != []: + if not regoldindexes: + if verbose: + print( + f"[I]Not regenerating indexes for table: " + f"'{dstfileh.filename}:{dsttable._v_pathname}'" + ) + return + # Now, recreate the indexed columns + if verbose: + print( + f"[I]Regenerating indexes for table: " + f"'{dstfileh.filename}:{dsttable._v_pathname}'" + ) + for colname in listoldindexes: + if verbose: + print("[I]Indexing column: '%s'. Please wait..." % colname) + colobj = dsttable.cols._f_col(colname) + # We don't specify the filters for the indexes + colobj.create_index(filters=None) + + +def copy_leaf( + srcfile, + dstfile, + srcnode, + dstnode, + title, + filters, + copyuserattrs, + overwritefile, + overwrtnodes, + stats, + start, + stop, + step, + chunkshape, + sortby, + check_CSI, # noqa: N803 + propindexes, + upgradeflavors, + allow_padding, +): + """Copy a leaf node.""" + # Open the source file + srcfileh = tb.open_file(srcfile, "r", allow_padding=allow_padding) + # Get the source node (that should exist) + srcnode = srcfileh.get_node(srcnode) + + # Get the destination node and its parent + last_slash = dstnode.rindex("/") + if last_slash == len(dstnode) - 1: + # print("Detected a trailing slash in destination node. " + # "Interpreting it as a destination group.") + dstgroup = dstnode[:-1] + elif last_slash > 0: + dstgroup = dstnode[:last_slash] + else: + dstgroup = "/" + dstleaf = dstnode[last_slash + 1 :] + if dstleaf == "": + dstleaf = srcnode.name + # Check whether the destination group exists or not + if Path(dstfile).is_file() and not overwritefile: + dstfileh = tb.open_file( + dstfile, + "a", + pytables_sys_attrs=createsysattrs, + allow_padding=allow_padding, + ) + try: + dstgroup = dstfileh.get_node(dstgroup) + except Exception: + # The dstgroup does not seem to exist. Try creating it. + dstgroup = newdst_group(dstfileh, dstgroup, title, filters) + else: + # The node exists, but it is really a group? + if not isinstance(dstgroup, tb.group.Group): + # No. Should we overwrite it? + if overwrtnodes: + parent = dstgroup._v_parent + last_slash = dstgroup._v_pathname.rindex("/") + dstgroupname = dstgroup._v_pathname[last_slash + 1 :] + dstgroup.remove() + dstgroup = dstfileh.create_group( + parent, dstgroupname, title=title, filters=filters + ) + else: + raise RuntimeError( + "Please check that the node names are " + "not duplicated in destination, and " + "if so, add the --overwrite-nodes " + "flag if desired." + ) + else: + # The destination file does not exist or will be overwritten. + dstfileh = tb.open_file( + dstfile, + "w", + title=title, + filters=filters, + pytables_sys_attrs=createsysattrs, + allow_padding=allow_padding, + ) + dstgroup = newdst_group(dstfileh, dstgroup, title="", filters=filters) + + # Finally, copy srcnode to dstnode + try: + dstnode = srcnode.copy( + dstgroup, + dstleaf, + filters=filters, + copyuserattrs=copyuserattrs, + overwrite=overwrtnodes, + stats=stats, + start=start, + stop=stop, + step=step, + chunkshape=chunkshape, + sortby=sortby, + check_CSI=check_CSI, + propindexes=propindexes, + ) + except Exception: + type_, value, traceback = sys.exc_info() + print( + f"Problems doing the copy from '{srcfile}:{srcnode}' to " + f"'{dstfile}:{dstnode}'" + ) + print(f"The error was --> {type_}: {value}") + print("The destination file looks like:\n", dstfileh) + # Close all the open files: + srcfileh.close() + dstfileh.close() + raise RuntimeError( + "Please check that the node names are not " + "duplicated in destination, and if so, add " + "the --overwrite-nodes flag if desired." + ) + + # Upgrade flavors in dstnode, if required + if upgradeflavors: + if srcfileh.format_version.startswith("1"): + # Remove original flavor in case the source file has 1.x format + dstnode.del_attr("FLAVOR") + elif srcfileh.format_version < "2.1": + if dstnode.get_attr("FLAVOR") in numpy_aliases: + dstnode.set_attr("FLAVOR", tb.flavor.internal_flavor) + + # Recreate possible old indexes in destination node + if srcnode._c_classid == "TABLE": + recreate_indexes(srcnode, dstfileh, dstnode) + + # Close all the open files: + srcfileh.close() + dstfileh.close() + + +def copy_children( + srcfile, + dstfile, + srcgroup, + dstgroup, + title, + recursive, + filters, + copyuserattrs, + overwritefile, + overwrtnodes, + stats, + start, + stop, + step, + chunkshape, + sortby, + check_CSI, # noqa: N803 + propindexes, + upgradeflavors, + allow_padding, + use_hardlinks=True, +): + """Copy the children from source group to destination group.""" + # Open the source file with srcgroup as root_uep + srcfileh = tb.open_file( + srcfile, "r", root_uep=srcgroup, allow_padding=allow_padding + ) + # Assign the root to srcgroup + srcgroup = srcfileh.root + + created_dstgroup = False + # Check whether the destination group exists or not + if Path(dstfile).is_file() and not overwritefile: + dstfileh = tb.open_file( + dstfile, + "a", + pytables_sys_attrs=createsysattrs, + allow_padding=allow_padding, + ) + try: + dstgroup = dstfileh.get_node(dstgroup) + except tb.exceptions.NoSuchNodeError: + # The dstgroup does not seem to exist. Try creating it. + dstgroup = newdst_group(dstfileh, dstgroup, title, filters) + created_dstgroup = True + else: + # The node exists, but it is really a group? + if not isinstance(dstgroup, tb.group.Group): + # No. Should we overwrite it? + if overwrtnodes: + parent = dstgroup._v_parent + last_slash = dstgroup._v_pathname.rindex("/") + dstgroupname = dstgroup._v_pathname[last_slash + 1 :] + dstgroup.remove() + dstgroup = dstfileh.create_group( + parent, dstgroupname, title=title, filters=filters + ) + else: + raise RuntimeError( + "Please check that the node names are " + "not duplicated in destination, and " + "if so, add the --overwrite-nodes " + "flag if desired." + ) + else: + # The destination file does not exist or will be overwritten. + dstfileh = tb.open_file( + dstfile, + "w", + title=title, + filters=filters, + pytables_sys_attrs=createsysattrs, + allow_padding=allow_padding, + ) + dstgroup = newdst_group(dstfileh, dstgroup, title="", filters=filters) + created_dstgroup = True + + # Copy the attributes to dstgroup, if needed + if created_dstgroup and copyuserattrs: + srcgroup._v_attrs._f_copy(dstgroup) + + # Finally, copy srcgroup children to dstgroup + try: + srcgroup._f_copy_children( + dstgroup, + recursive=recursive, + filters=filters, + copyuserattrs=copyuserattrs, + overwrite=overwrtnodes, + stats=stats, + start=start, + stop=stop, + step=step, + chunkshape=chunkshape, + sortby=sortby, + check_CSI=check_CSI, + propindexes=propindexes, + use_hardlinks=use_hardlinks, + ) + except Exception: + type_, value, traceback = sys.exc_info() + print( + "Problems doing the copy from '%s:%s' to '%s:%s'" + % (srcfile, srcgroup, dstfile, dstgroup) + ) + print(f"The error was --> {type_}: {value}") + print("The destination file looks like:\n", dstfileh) + # Close all the open files: + srcfileh.close() + dstfileh.close() + raise RuntimeError( + "Please check that the node names are not " + "duplicated in destination, and if so, add the " + "--overwrite-nodes flag if desired. In " + "particular, pay attention that root_uep is not " + "fooling you." + ) + + # Upgrade flavors in dstnode, if required + if upgradeflavors: + for dstnode in dstgroup._f_walknodes("Leaf"): + if srcfileh.format_version.startswith("1"): + # Remove original flavor in case the source file has 1.x format + dstnode.del_attr("FLAVOR") + elif srcfileh.format_version < "2.1": + if dstnode.get_attr("FLAVOR") in numpy_aliases: + dstnode.set_attr("FLAVOR", tb.flavor.internal_flavor) + + # Convert the remaining tables with old indexes (if any) + for table in srcgroup._f_walknodes("Table"): + dsttable = dstfileh.get_node(dstgroup, table._v_pathname) + recreate_indexes(table, dstfileh, dsttable) + + # Close all the open files: + srcfileh.close() + dstfileh.close() + + +def _get_parser(): + parser = argparse.ArgumentParser( + description=( + "This utility is very powerful and lets you copy any " + "leaf, group or complete subtree into another file. " + "During the copy process you are allowed to change the filter " + "properties if you want so. Also, in the case of duplicated " + "pathnames, you can decide if you want to overwrite already " + "existing nodes on the destination file. Generally speaking, " + "ptrepack can be useful in may situations, like replicating a " + "subtree in another file, change the filters in objects and see " + "how affect this to the compression degree or I/O performance, " + "consolidating specific data in repositories or even *importing* " + "generic HDF5 files and create true PyTables counterparts." + ) + ) + + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="show verbose information", + ) + parser.add_argument( + "-o", + "--overwrite", + action="store_true", + dest="overwritefile", + help="overwrite destination file", + ) + parser.add_argument( + "-R", + "--range", + dest="rng", + metavar="RANGE", + help=( + "select a RANGE of rows (in the form 'start,stop,step') " + "during the copy of *all* the leaves. " + "Default values are 'None, None, 1', which means a copy of all " + "the rows." + ), + ) + parser.add_argument( + "--non-recursive", + action="store_false", + default=True, + dest="recursive", + help="do not do a recursive copy. Default is to do it", + ) + parser.add_argument( + "--dest-title", + dest="title", + default="", + help="title for the new file (if not specified, the source is copied)", + ) + parser.add_argument( + "--dont-create-sysattrs", + action="store_false", + default=True, + dest="createsysattrs", + help="do not create sys attrs (default is to do it)", + ) + parser.add_argument( + "--dont-copy-userattrs", + action="store_false", + default=True, + dest="copyuserattrs", + help="do not copy the user attrs (default is to do it)", + ) + parser.add_argument( + "--overwrite-nodes", + action="store_true", + dest="overwrtnodes", + help=( + "overwrite destination nodes if they exist. " + "Default is to not overwrite them" + ), + ) + parser.add_argument( + "--complevel", + type=int, + default=0, + help=( + "set a compression level (0 for no compression, which is the " + "default)" + ), + ) + parser.add_argument( + "--complib", + choices=( + "zlib", + "lzo", + "bzip2", + "blosc", + "blosc:blosclz", + "blosc:lz4", + "blosc:lz4hc", + "blosc:zlib", + "blosc:zstd", + "blosc2", + "blosc2:blosclz", + "blosc2:lz4", + "blosc2:lz4hc", + "blosc2:zlib", + "blosc2:zstd", + ), + default="zlib", + help=( + "set the compression library to be used during the copy. " + "Defaults to %(default)s" + ), + ) + parser.add_argument( + "--shuffle", + type=int, + choices=(0, 1), + help=( + "activate or not the shuffle filter (default is active if " + "complevel > 0)" + ), + ) + parser.add_argument( + "--bitshuffle", + type=int, + choices=(0, 1), + help="activate or not the bitshuffle filter (not active by default)", + ) + parser.add_argument( + "--fletcher32", + type=int, + choices=(0, 1), + help=( + "whether to activate or not the fletcher32 filter (not active " + "by default)" + ), + ) + parser.add_argument( + "--keep-source-filters", + action="store_true", + dest="keepfilters", + help=( + "use the original filters in source files. " + "The default is not doing that if any of --complevel, --complib, " + "--shuffle --bitshuffle or --fletcher32 option is specified" + ), + ) + parser.add_argument( + "--chunkshape", + default="keep", + help=( + "set a chunkshape. " + "Possible options are: 'keep' | 'auto' | int | tuple. " + "A value of 'auto' computes a sensible value for the chunkshape " + "of the leaves copied. " + "The default is to 'keep' the original value" + ), + ) + parser.add_argument( + "--upgrade-flavors", + action="store_true", + dest="upgradeflavors", + help=( + "when repacking PyTables 1.x or PyTables 2.x files, the flavor " + "of leaves will be unset. With this, such a leaves will be " + "serialized as objects with the internal flavor " + "('numpy' for 3.x series)" + ), + ) + parser.add_argument( + "--dont-regenerate-old-indexes", + action="store_false", + default=True, + dest="regoldindexes", + help=( + "disable regenerating old indexes. " + "The default is to regenerate old indexes as they are found" + ), + ) + parser.add_argument( + "--sortby", + metavar="COLUMN", + help=( + "do a table copy sorted by the index in 'column'. " + "For reversing the order, use a negative value in the 'step' " + "part of 'RANGE' (see '-r' flag). Only applies to table objects" + ), + ) + parser.add_argument( + "--checkCSI", + action="store_true", + help="force the check for a CSI index for the --sortby column", + ) + parser.add_argument( + "--propindexes", + action="store_true", + help=( + "propagate the indexes existing in original tables. The default " + "is to not propagate them. Only applies to table objects" + ), + ) + parser.add_argument( + "--dont-allow-padding", + action="store_true", + dest="dont_allow_padding", + help=( + "remove the possible padding in compound types in source files. " + "The default is to propagate it. Only applies to table objects" + ), + ) + parser.add_argument( + "src", + metavar="sourcefile:sourcegroup", + help="source file/group", + ) + parser.add_argument( + "dst", + metavar="destfile:destgroup", + help="destination file/group", + ) + + return parser + + +def main(): + """Implement the main CLI interface.""" + global verbose + global regoldindexes + global createsysattrs + + parser = _get_parser() + args = parser.parse_args() + + # check arguments + if args.rng: + try: + args.rng = eval("slice(" + args.rng + ")") + except Exception: + parser.error("Error when getting the range parameter.") + + if args.chunkshape.isdigit() or args.chunkshape.startswith("("): + args.chunkshape = eval(args.chunkshape) + + if args.complevel < 0 or args.complevel > 9: + parser.error( + 'invalid "complevel" value, it sould be in te range [0, 9]' + ) + + # Catch the files passed as the last arguments + src = args.src.rsplit(":", 1) + dst = args.dst.rsplit(":", 1) + if len(src) == 1: + srcfile, srcnode = src[0], "/" + else: + srcfile, srcnode = src + if len(dst) == 1: + dstfile, dstnode = dst[0], "/" + else: + dstfile, dstnode = dst + + if srcnode == "": + # case where filename == "filename:" instead of "filename:/" + srcnode = "/" + + if dstnode == "": + # case where filename == "filename:" instead of "filename:/" + dstnode = "/" + + # Ignore the warnings for tables that contains oldindexes + # (these will be handled by the copying routines) + warnings.filterwarnings("ignore", category=tb.exceptions.OldIndexWarning) + + # Ignore the flavors warnings during upgrading flavor operations + if args.upgradeflavors: + warnings.filterwarnings("ignore", category=tb.exceptions.FlavorWarning) + + # Build the Filters instance + filter_params = ( + args.complevel, + args.complib, + args.shuffle, + args.bitshuffle, + args.fletcher32, + ) + if filter_params == (None,) * 4 or args.keepfilters: + filters = None + else: + if args.complevel is None: + args.complevel = 0 + if args.shuffle is None: + if args.complevel > 0: + args.shuffle = True + else: + args.shuffle = False + if args.bitshuffle is None: + args.bitshuffle = False + if args.bitshuffle: + # Shuffle and bitshuffle are mutually exclusive + args.shuffle = False + if args.complib is None: + args.complib = "zlib" + if args.fletcher32 is None: + args.fletcher32 = False + filters = tb.leaf.Filters( + complevel=args.complevel, + complib=args.complib, + shuffle=args.shuffle, + bitshuffle=args.bitshuffle, + fletcher32=args.fletcher32, + ) + + # The start, stop and step params: + start, stop, step = None, None, 1 # Defaults + if args.rng: + start, stop, step = args.rng.start, args.rng.stop, args.rng.step + + # Set globals + verbose = args.verbose + regoldindexes = args.regoldindexes + createsysattrs = args.createsysattrs + + # Some timing + t1 = clock() + cpu1 = cpuclock() + # Copy the file + if verbose: + print("+=+" * 20) + print("Recursive copy:", args.recursive) + print("Applying filters:", filters) + if args.sortby is not None: + print("Sorting table(s) by column:", args.sortby) + print("Forcing a CSI creation:", args.checkCSI) + if args.propindexes: + print("Recreating indexes in copied table(s)") + print(f"Start copying {srcfile}:{srcnode} to {dstfile}:{dstnode}") + print("+=+" * 20) + + allow_padding = not args.dont_allow_padding + # Check whether the specified source node is a group or a leaf + h5srcfile = tb.open_file(srcfile, "r", allow_padding=allow_padding) + srcnodeobject = h5srcfile.get_node(srcnode) + + # Close the file again + h5srcfile.close() + + stats = {"groups": 0, "leaves": 0, "links": 0, "bytes": 0, "hardlinks": 0} + if isinstance(srcnodeobject, tb.group.Group): + copy_children( + srcfile, + dstfile, + srcnode, + dstnode, + title=args.title, + recursive=args.recursive, + filters=filters, + copyuserattrs=args.copyuserattrs, + overwritefile=args.overwritefile, + overwrtnodes=args.overwrtnodes, + stats=stats, + start=start, + stop=stop, + step=step, + chunkshape=args.chunkshape, + sortby=args.sortby, + check_CSI=args.checkCSI, + propindexes=args.propindexes, + upgradeflavors=args.upgradeflavors, + allow_padding=allow_padding, + use_hardlinks=True, + ) + else: + # If not a Group, it should be a Leaf + copy_leaf( + srcfile, + dstfile, + srcnode, + dstnode, + title=args.title, + filters=filters, + copyuserattrs=args.copyuserattrs, + overwritefile=args.overwritefile, + overwrtnodes=args.overwrtnodes, + stats=stats, + start=start, + stop=stop, + step=step, + chunkshape=args.chunkshape, + sortby=args.sortby, + check_CSI=args.checkCSI, + propindexes=args.propindexes, + upgradeflavors=args.upgradeflavors, + allow_padding=allow_padding, + ) + + # Gather some statistics + t2 = clock() + cpu2 = cpuclock() + tcopy = t2 - t1 + cpucopy = cpu2 - cpu1 + if verbose: + ngroups = stats["groups"] + nleaves = stats["leaves"] + nlinks = stats["links"] + nhardlinks = stats["hardlinks"] + nbytescopied = stats["bytes"] + nnodes = ngroups + nleaves + nlinks + nhardlinks + + print( + "Groups copied:", + ngroups, + ", Leaves copied:", + nleaves, + ", Links copied:", + nlinks, + ", Hard links copied:", + nhardlinks, + ) + if args.copyuserattrs: + print("User attrs copied") + else: + print("User attrs not copied") + print(f"KBytes copied: {nbytescopied / 1024:.3f}") + print( + f"Time copying: {tcopy:.3f} s (real) {cpucopy:.3f} s " + f"(cpu) {cpucopy / tcopy:.0%}" + ) + print(f"Copied nodes/sec: {nnodes / tcopy:.1f}") + print(f"Copied KB/s : {nbytescopied / tcopy / 1024:.0f}") diff --git a/venv/Lib/site-packages/tables/scripts/pttree.py b/venv/Lib/site-packages/tables/scripts/pttree.py new file mode 100644 index 0000000..458030a --- /dev/null +++ b/venv/Lib/site-packages/tables/scripts/pttree.py @@ -0,0 +1,505 @@ +"""This utility prints the contents of an HDF5 file as a tree. + +Pass the flag -h to this for help on usage. + +""" + +import sys +import argparse +import warnings +from pathlib import Path +from collections import defaultdict, deque + +import numpy as np + +import tables as tb + + +def _get_parser(): + parser = argparse.ArgumentParser( + description=( + "`pttree` is designed to give a quick overview of the contents " + "of a PyTables HDF5 file by printing a depth-indented list of " + "nodes, similar to the output of the Unix `tree` function. " + "It can also display the size, shape and compression states of " + "individual nodes, as well as summary information for the whole " + "file. " + "For a more verbose output (including metadata), see `ptdump`. " + ), + ) + + parser.add_argument( + "-L", + "--max-level", + type=int, + dest="max_depth", + help="maximum branch depth of tree to display (-1 == no limit)", + ) + parser.add_argument( + "-S", + "--sort-by", + type=str, + dest="sort_by", + help='artificially order nodes, can be either "size", "name" or "none"', + ) + parser.add_argument( + "--print-size", + action="store_true", + dest="print_size", + help="print size of each node/branch", + ) + parser.add_argument( + "--no-print-size", + action="store_false", + dest="print_size", + ) + parser.add_argument( + "--print-shape", + action="store_true", + dest="print_shape", + help="print shape of each node", + ) + parser.add_argument( + "--no-print-shape", + action="store_false", + dest="print_shape", + ) + parser.add_argument( + "--print-compression", + action="store_true", + dest="print_compression", + help="print compression library(level) for each compressed node", + ) + parser.add_argument( + "--no-print-compression", + action="store_false", + dest="print_compression", + ) + parser.add_argument( + "--print-percent", + action="store_true", + dest="print_percent", + help="print size of each node as a %% of the total tree size on disk", + ) + parser.add_argument( + "--no-print-percent", + action="store_false", + dest="print_percent", + ) + parser.add_argument( + "--use-si-units", + action="store_true", + dest="use_si_units", + help="report sizes in SI units (1 MB == 10^6 B)", + ) + parser.add_argument( + "--use-binary-units", + action="store_false", + dest="use_si_units", + help="report sizes in binary units (1 MiB == 2^20 B)", + ) + + parser.add_argument( + "src", + metavar="filename[:nodepath]", + help="path to the root of the tree structure", + ) + + parser.set_defaults( + max_depth=1, + sort_by="size", + print_size=True, + print_percent=True, + print_shape=False, + print_compression=False, + use_si_units=False, + ) + + return parser + + +def main(): + """Implement the main CLI interface.""" + parser = _get_parser() + args = parser.parse_args() + + # Catch the files passed as the last arguments + src = args.__dict__.pop("src").rsplit(":", 1) + if len(src) == 1: + filename, nodename = src[0], "/" + else: + filename, nodename = src + if nodename == "": + # case where filename == "filename:" instead of "filename:/" + nodename = "/" + + with tb.open_file(filename, "r") as f: + tree_str = get_tree_str(f, nodename, **args.__dict__) + print(tree_str) + + pass + + +def get_tree_str( + f, + where="/", + max_depth=-1, + print_class=True, + print_size=True, + print_percent=True, + print_shape=False, + print_compression=False, + print_total=True, + sort_by=None, + use_si_units=False, +): + """Return a string representing the tree structure, and the summary info.""" + root = f.get_node(where) + root._g_check_open() + start_depth = root._v_depth + if max_depth < 0: + max_depth = sys.maxsize + + b2h = bytes2human(use_si_units) + + # we will pass over each node in the tree twice + + # on the first pass we'll start at the root node and recurse down the + # branches, finding all of the leaf nodes and calculating the total size + # over all tables and arrays + total_in_mem = 0 + total_on_disk = 0 + total_items = 0 + + # defaultdicts for holding the cumulative branch sizes at each node + in_mem = defaultdict(int) + on_disk = defaultdict(int) + leaf_count = defaultdict(int) + + # keep track of node addresses within the HDF5 file so that we don't count + # nodes with multiple references (i.e. hardlinks) multiple times + ref_count = defaultdict(int) + ref_idx = defaultdict(int) + hl_addresses = defaultdict(lambda: None) + hl_targets = defaultdict(str) + + stack = deque(root) + leaves = deque() + + while stack: + + node = stack.pop() + + if isinstance(node, tb.link.Link): + # we treat links like leaves, except we don't dereference them to + # get their sizes or addresses + leaves.append(node) + continue + + path = node._v_pathname + addr, rc = node._get_obj_info() + ref_count[addr] += 1 + ref_idx[path] = ref_count[addr] + hl_addresses[path] = addr + + if isinstance(node, tb.UnImplemented): + leaves.append(node) + + elif isinstance(node, tb.Leaf): + + # only count the size of a hardlinked leaf the first time it is + # visited + if ref_count[addr] == 1: + + try: + m = node.size_in_memory + d = node.size_on_disk + + # size of this node + in_mem[path] += m + on_disk[path] += d + leaf_count[path] += 1 + + # total over all nodes + total_in_mem += m + total_on_disk += d + total_items += 1 + + # arbitrarily treat this node as the 'target' for all other + # hardlinks that point to the same address + hl_targets[addr] = path + + except NotImplementedError as e: + # size_on_disk is not implemented for VLArrays + warnings.warn(str(e)) + + # push leaf nodes onto the stack for the next pass + leaves.append(node) + + elif isinstance(node, tb.Group): + + # don't recurse down the same hardlinked branch multiple times! + if ref_count[addr] == 1: + stack.extend(list(node._v_children.values())) + hl_targets[addr] = path + + # if we've already visited this group's address, treat it as a leaf + # instead + else: + leaves.append(node) + + # on the second pass we start at each leaf and work upwards towards the + # root node, computing the cumulative size of each branch at each node, and + # instantiating a PrettyTree object for each node to create an ASCII + # representation of the tree structure + + # this will store the PrettyTree objects for every node we're printing + pretty = {} + + stack = leaves + + while stack: + + node = stack.pop() + path = node._v_pathname + + parent = node._v_parent + parent_path = parent._v_pathname + + # cumulative size at parent node + in_mem[parent_path] += in_mem[path] + on_disk[parent_path] += on_disk[path] + leaf_count[parent_path] += leaf_count[path] + + depth = node._v_depth - start_depth + + # if we're deeper than the max recursion depth, we print nothing + if not depth > max_depth: + + # create a PrettyTree representation of this node + name = node._v_name + if print_class: + name += " (%s)" % node.__class__.__name__ + + labels = [] + ratio = on_disk[path] / total_on_disk + + # if the address of this object has a ref_count > 1, it has + # multiple hardlinks + if ref_count[hl_addresses[path]] > 1: + name += ", addr=%i, ref=%i/%i" % ( + hl_addresses[path], + ref_idx[path], + ref_count[hl_addresses[path]], + ) + + if isinstance(node, tb.link.Link): + labels.append("softlink --> %s" % node.target) + + elif ref_idx[path] > 1: + labels.append( + "hardlink --> %s" % hl_targets[hl_addresses[path]] + ) + + elif isinstance(node, (tb.Array, tb.Table)): + + if print_size: + sizestr = "mem={}, disk={}".format( + b2h(in_mem[path]), b2h(on_disk[path]) + ) + if print_percent: + sizestr += f" [{ratio:5.1%}]" + labels.append(sizestr) + + if print_shape: + labels.append("shape=%s" % repr(node.shape)) + + if print_compression: + lib = node.filters.complib + level = node.filters.complevel + if level: + compstr = "%s(%i)" % (lib, level) + else: + compstr = "None" + labels.append("compression=%s" % compstr) + + # if we're at our max recursion depth, we'll print summary + # information for this branch + elif depth == max_depth: + itemstr = "... %i leaves" % leaf_count[path] + if print_size: + itemstr += ", mem={}, disk={}".format( + b2h(in_mem[path]), b2h(on_disk[path]) + ) + if print_percent: + itemstr += f" [{ratio:5.1%}]" + labels.append(itemstr) + + # create a PrettyTree for this node, if one doesn't exist already + if path not in pretty: + pretty.update({path: PrettyTree()}) + pretty[path].name = name + pretty[path].labels = labels + if sort_by == "size": + # descending size order + pretty[path].sort_by = -ratio + elif sort_by == "name": + pretty[path].sort_by = node._v_name + else: + # natural order + if path == "/": + # root is not in root._v_children + pretty[path].sort_by = 0 + else: + pretty[path].sort_by = list( + parent._v_children.values() + ).index(node) + + # exclude root node or we'll get infinite recursions (since '/' is + # the parent of '/') + if path != "/": + + # create a PrettyTree for the parent of this node, if one + # doesn't exist already + if parent_path not in pretty: + pretty.update({parent_path: PrettyTree()}) + + # make this PrettyTree a child of the parent PrettyTree + pretty[parent_path].add_child(pretty[path]) + + if node is not root and parent not in stack: + # we append to the 'bottom' of the stack, so that we exhaust all of + # the nodes at this level before going up a level in the tree + stack.appendleft(parent) + + out_str = "\n" + "-" * 60 + "\n" * 2 + out_str += str(pretty[root._v_pathname]) + "\n" * 2 + + if print_total: + avg_ratio = total_on_disk / total_in_mem + fsize = Path(f.filename).stat().st_size + + out_str += "-" * 60 + "\n" + out_str += "Total branch leaves: %i\n" % total_items + out_str += "Total branch size: {} in memory, {} on disk\n".format( + b2h(total_in_mem), b2h(total_on_disk) + ) + out_str += "Mean compression ratio: %.2f\n" % avg_ratio + out_str += "HDF5 file size: %s\n" % b2h(fsize) + out_str += "-" * 60 + "\n" + + return out_str + + +class PrettyTree: + """Pretty ASCII representation of a recursive tree structure. + + Each node can have multiple labels, given as a list of strings. + + Example: + -------- + A = PrettyTree('A', labels=['wow']) + B = PrettyTree('B', labels=['such tree']) + C = PrettyTree('C', children=[A, B]) + D = PrettyTree('D', labels=['so recursive']) + root = PrettyTree('root', labels=['many nodes'], children=[C, D]) + print root + + Credit to Andrew Cooke's blog: + + + """ + + def __init__(self, name=None, children=None, labels=None, sort_by=None): + + # NB: do NOT assign default list/dict arguments in the function + # declaration itself - these objects are shared between ALL instances + # of PrettyTree, and by assigning to them it's easy to get into + # infinite recursions, e.g. when 'self in self.children == True' + if children is None: + children = [] + if labels is None: + labels = [] + + self.name = name + self.children = children + self.labels = labels + self.sort_by = sort_by + + def add_child(self, child): + """Add a child to the tree.""" + # some basic checks to help to avoid infinite recursion + assert child is not self + assert self not in child.children + if child not in self.children: + self.children.append(child) + + def tree_lines(self): + """Generate lines of teh string representation of a tree.""" + yield self.name + for label in self.labels: + yield " " + label + children = sorted(self.children, key=(lambda c: c.sort_by)) + last = children[-1] if children else None + for child in children: + prefix = "`--" if child is last else "+--" + for line in child.tree_lines(): + yield prefix + line + prefix = " " if child is last else "| " + + def __str__(self): + return "\n".join(self.tree_lines()) + + def __repr__(self): + return f"<{self.__class__.__name__} at 0x{id(self):x}>" + + +def bytes2human(use_si_units=False): + """Return the string representation of the number of bytes with units.""" + if use_si_units: + prefixes = "TB", "GB", "MB", "kB", "B" + values = 1e12, 1e9, 1e6, 1e3, 1 + else: + prefixes = "TiB", "GiB", "MiB", "KiB", "B" + values = 2**40, 2**30, 2**20, 2**10, 1 + + def b2h(nbytes): + + for prefix, value in zip(prefixes, values): + scaled = nbytes / value + if scaled >= 1: + break + + return f"{scaled:.1f}{prefix}" + + return b2h + + +def make_test_file(prefix="/tmp"): + """Create a test file.""" + f = tb.open_file(str(Path(prefix) / "test_pttree.hdf5"), "w") + + g1 = f.create_group("/", "group1") + g1a = f.create_group(g1, "group1a") + g1b = f.create_group(g1, "group1b") + + filters = tb.Filters(complevel=5, complib="bzip2") + + for gg in g1a, g1b: + f.create_carray( + gg, + "zeros128b", + obj=np.zeros(32, dtype=np.float64), + filters=filters, + ) + f.create_carray( + gg, "random128b", obj=np.random.rand(32), filters=filters + ) + + g2 = f.create_group("/", "group2") + + f.create_soft_link(g2, "softlink_g1_z128", "/group1/group1a/zeros128b") + f.create_hard_link(g2, "hardlink_g1a_z128", "/group1/group1a/zeros128b") + f.create_hard_link(g2, "hardlink_g1a", "/group1/group1a") + + return f diff --git a/venv/Lib/site-packages/tables/table.py b/venv/Lib/site-packages/tables/table.py new file mode 100644 index 0000000..c1acc5b --- /dev/null +++ b/venv/Lib/site-packages/tables/table.py @@ -0,0 +1,4114 @@ +"""Here is defined the Table class.""" + +from __future__ import annotations + +import sys +import math +import weakref +import operator +import warnings +import functools +from time import perf_counter as clock +from typing import Any, Literal, TYPE_CHECKING +from pathlib import Path +from collections.abc import Callable, Generator, Iterator, Sequence + +import numpy as np +import numexpr as ne +import numpy.typing as npt + +from . import tableextension +from .atom import Atom +from .leaf import Leaf +from .path import join_path, split_path +from .index import ( + OldIndex, + default_index_filters, + default_auto_index, + Index, + IndexesDescG, + IndexesTableG, +) +from .utils import is_idx, lazyattr, SizeType +from .utils import NailedDict as CacheDict +from .flavor import flavor_of, array_as_internal, internal_to_flavor +from .conditions import compile_condition +from .exceptions import ( + NodeError, + HDF5ExtError, + PerformanceWarning, + OldIndexWarning, + NoSuchNodeError, +) +from .description import IsDescription, Description, Col, descr_from_dtype +from .utilsextension import get_nested_field +from .lrucacheextension import ObjectCache, NumCache + +profile = False +# profile = True # Uncomment for profiling +if profile: + from .utils import show_stats + +if TYPE_CHECKING: + from .node import Node + from .group import Group + from .filters import Filters + from .misc.enum import Enum + from .conditions import CompiledCondition + +# 2.2: Added support for complex types. Introduced in version 0.9. +# 2.2.1: Added support for time types. +# 2.3: Changed the indexes naming schema. +# 2.4: Changed indexes naming schema (again). +# 2.5: Added the FIELD_%d_FILL attributes. +# 2.6: Added the FLAVOR attribute (optional). +# 2.7: Numeric and numarray flavors are gone. +obversion = "2.7" # The Table VERSION number + + +# Maps NumPy types to the types used by Numexpr. +_nxtype_from_nptype = { + np.bool_: bool, + np.int8: ne.necompiler.int_, + np.int16: ne.necompiler.int_, + np.int32: ne.necompiler.int_, + np.int64: ne.necompiler.long_, + np.uint8: ne.necompiler.int_, + np.uint16: ne.necompiler.int_, + np.uint32: ne.necompiler.long_, + np.uint64: ne.necompiler.long_, + np.float32: float, + np.float64: ne.necompiler.double, + np.complex64: complex, + np.complex128: complex, + np.bytes_: bytes, +} + +_nxtype_from_nptype[np.str_] = str + +if hasattr(np, "float16"): + _nxtype_from_nptype[np.float16] = float # XXX: check +if hasattr(np, "float96"): + _nxtype_from_nptype[np.float96] = ne.necompiler.double # XXX: check +if hasattr(np, "float128"): + _nxtype_from_nptype[np.float128] = ne.necompiler.double # XXX: check +if hasattr(np, "complex192"): + _nxtype_from_nptype[np.complex192] = complex # XXX: check +if hasattr(np, "complex256"): + _nxtype_from_nptype[np.complex256] = complex # XXX: check + + +# The NumPy scalar type corresponding to `SizeType`. +_npsizetype = np.array(SizeType(0)).dtype.type + + +def _index_name_of(node: Node) -> str: + return "_i_%s" % node._v_name + + +def _index_pathname_of(node: Node) -> str: + node_parent_path = split_path(node._v_pathname)[0] + return join_path(node_parent_path, _index_name_of(node)) + + +def _index_pathname_of_column(table: Table, colpathname: str) -> str: + return join_path(_index_pathname_of(table), colpathname) + + +# The next are versions that work with just paths (i.e. we don't need +# a node instance for using them, which can be critical in certain +# situations) + + +def _index_name_of_(nodeName: str) -> str: # noqa: N803 + return "_i_%s" % nodeName + + +def _index_pathname_of_(nodePath: str) -> str: # noqa: N803 + node_parent_path, node_name = split_path(nodePath) + return join_path(node_parent_path, _index_name_of_(node_name)) + + +def _index_pathname_of_column_(table_path: str, colpathname: str) -> str: + return join_path(_index_pathname_of_(table_path), colpathname) + + +def restorecache(self: Table) -> None: + """Restore the chunk cache.""" + # Define a cache for sparse table reads + params = self._v_file.params + chunksize = self._v_chunkshape[0] + nslots = params["TABLE_MAX_SIZE"] / (chunksize * self._v_dtype.itemsize) + self._chunkcache = NumCache( + (nslots, chunksize), self._v_dtype, "table chunk cache" + ) + self._seqcache = ObjectCache( + params["ITERSEQ_MAX_SLOTS"], + params["ITERSEQ_MAX_SIZE"], + "Iter sequence cache", + ) + self._dirtycache = False + + +def _table__where_indexed( + self: Table, + compiled: CompiledCondition, + condition: str, + condvars: dict[str, Column | np.ndarray], + start: int, + stop: int, + step: int, +) -> Iterator[tableextension.Row] | np.ndarray: + if profile: + tref = clock() + if profile: + show_stats("Entering table_whereIndexed", tref) + self._use_index = True + # Clean the table caches for indexed queries if needed + if self._dirtycache: + restorecache(self) + + # Get the values in expression that are not columns + values = [] + for key, value in condvars.items(): + if isinstance(value, np.ndarray): + values.append((key, value.item())) + # Build a key for the sequence cache + seqkey = (condition, tuple(values), (start, stop, step)) + # Do a lookup in sequential cache for this query + nslot = self._seqcache.getslot(seqkey) + if nslot >= 0: + # Get the row sequence from the cache + seq = self._seqcache.getitem(nslot) + if len(seq) == 0: + return iter([]) + # seq is a list. + seq = np.array(seq, dtype="int64") + # Correct the ranges in cached sequence + if (start, stop, step) != (0, self.nrows, 1): + seq = seq[ + (seq >= start) & (seq < stop) & ((seq - start) % step == 0) + ] + return self.itersequence(seq) + else: + # No luck. self._seqcache will be populated + # in the iterator if possible. (Row._finish_riterator) + self._seqcache_key = seqkey + + # Compute the chunkmap for every index in indexed expression + idxexprs = compiled.index_expressions + strexpr = compiled.string_expression + cmvars = {} + tcoords = 0 + for i, idxexpr in enumerate(idxexprs): + var, ops, lims = idxexpr + col = condvars[var] + index = col.index + assert index is not None, "the chosen column is not indexed" + assert not index.dirty, "the chosen column has a dirty index" + + # Get the number of rows that the indexed condition yields. + range_ = index.get_lookup_range(ops, lims) + ncoords = index.search(range_) + tcoords += ncoords + if index.reduction == 1 and ncoords == 0: + # No values from index condition, thus the chunkmap should be empty + nrowsinchunk = self.chunkshape[0] + nchunks = math.ceil(self.nrows / nrowsinchunk) + chunkmap = np.zeros(shape=nchunks, dtype="bool") + else: + # Get the chunkmap from the index + chunkmap = index.get_chunkmap() + # Assign the chunkmap to the cmvars dictionary + cmvars["e%d" % i] = chunkmap + + if index.reduction == 1 and tcoords == 0: + # No candidates found in any indexed expression component, so leave now + self._seqcache.setitem(seqkey, [], 1) + return iter([]) + + # Compute the final chunkmap + chunkmap = ne.evaluate(strexpr, cmvars) + if not chunkmap.any(): + # The chunkmap is all False, so the result is empty + self._seqcache.setitem(seqkey, [], 1) + return iter([]) + + if profile: + show_stats("Exiting table_whereIndexed", tref) + return chunkmap + + +def create_indexes_table(table: Table) -> IndexesTableG: + """Create indexes for a table.""" + itgroup = IndexesTableG( + table._v_parent, + _index_name_of(table), + "Indexes container for table " + table._v_pathname, + new=True, + ) + return itgroup + + +def create_indexes_descr( + igroup: Group, dname: str, iname: str, filters: Filters | None +) -> IndexesDescG: + """Create indexes descriptor.""" + idgroup = IndexesDescG( + igroup, + iname, + "Indexes container for sub-description " + dname, + filters=filters, + new=True, + ) + return idgroup + + +def _column__create_index( + self: Column, + optlevel: int, + kind: str, + filters: Filters | None, + tmp_dir: str, + blocksizes: tuple[int, int, int, int], + verbose: bool, +) -> int: + name = self.name + table = self.table + dtype = self.dtype + descr = self.descr + index = self.index + get_node = table._v_file._get_node + + # Warn if the index already exists + if index: + raise ValueError( + "%s for column '%s' already exists. If you want to " + "re-create it, please, try with reindex() method " + "better" % (str(index), str(self.pathname)) + ) + + # Check that the datatype is indexable. + if dtype.str[1:] == "u8": + raise NotImplementedError( + "indexing 64-bit unsigned integer columns " + "is not supported yet, sorry" + ) + if dtype.kind == "c": + raise TypeError("complex columns can not be indexed") + if dtype.shape != (): + raise TypeError("multidimensional columns can not be indexed") + + # Get the indexes group for table, and if not exists, create it + try: + itgroup = get_node(_index_pathname_of(table)) + except NoSuchNodeError: + itgroup = create_indexes_table(table) + + # Create the necessary intermediate groups for descriptors + idgroup = itgroup + dname = "" + pathname = descr._v_pathname + if pathname != "": + inames = pathname.split("/") + for iname in inames: + if dname == "": + dname = iname + else: + dname += "/" + iname + try: + idgroup = get_node(f"{itgroup._v_pathname}/{dname}") + except NoSuchNodeError: + idgroup = create_indexes_descr(idgroup, dname, iname, filters) + + # Create the atom + assert dtype.shape == () + atom = Atom.from_dtype(np.dtype((dtype, (0,)))) + + # Protection on tables larger than the expected rows (perhaps the + # user forgot to pass this parameter to the Table constructor?) + expectedrows = table._v_expectedrows + if table.nrows > expectedrows: + expectedrows = table.nrows + + # Create the index itself + index = Index( + idgroup, + name, + atom=atom, + title="Index for %s column" % name, + kind=kind, + optlevel=optlevel, + filters=filters, + tmp_dir=tmp_dir, + expectedrows=expectedrows, + byteorder=table.byteorder, + blocksizes=blocksizes, + ) + + table._set_column_indexing(self.pathname, True) + + # Feed the index with values + + # Add rows to the index if necessary + if table.nrows > 0: + indexedrows = table._add_rows_to_index( + self.pathname, 0, table.nrows, lastrow=True, update=False + ) + else: + indexedrows = 0 + index.dirty = False + table._indexedrows = indexedrows + table._unsaved_indexedrows = table.nrows - indexedrows + + # Optimize the index that has been already filled-up + index.optimize(verbose=verbose) + + # We cannot do a flush here because when reindexing during a + # flush, the indexes are created anew, and that creates a nested + # call to flush(). + # table.flush() + + return indexedrows + + +class _ColIndexes(dict): + """Provide a nice representation of column indexes.""" + + def __repr__(self) -> str: + """Return a detailed Description column representation.""" + rep = [f' "{k}": {v}' for k, v in self.items()] + return "{\n %s}" % (",\n ".join(rep)) + + +class Table(tableextension.Table, Leaf): + """This class represents heterogeneous datasets in an HDF5 file. + + Tables are leaves (see the Leaf class in :ref:`LeafClassDescr`) whose data + consists of a unidimensional sequence of *rows*, where each row contains + one or more *fields*. Fields have an associated unique *name* and + *position*, with the first field having position 0. All rows have the same + fields, which are arranged in *columns*. + + Fields can have any type supported by the Col class (see + :ref:`ColClassDescr`) and its descendants, which support multidimensional + data. Moreover, a field can be *nested* (to an arbitrary depth), meaning + that it includes further fields inside. A field named x inside a nested + field a in a table can be accessed as the field a/x (its *path name*) from + the table. + + The structure of a table is declared by its description, which is made + available in the Table.description attribute (see :class:`Table`). + + This class provides new methods to read, write and search table data + efficiently. It also provides special Python methods to allow accessing + the table as a normal sequence or array (with extended slicing supported). + + PyTables supports *in-kernel* searches working simultaneously on several + columns using complex conditions. These are faster than selections using + Python expressions. See the :meth:`Table.where` method for more + information on in-kernel searches. + + Non-nested columns can be *indexed*. Searching an indexed column can be + several times faster than searching a non-nested one. Search methods + automatically take advantage of indexing where available. + + When iterating a table, an object from the Row (see :ref:`RowClassDescr`) + class is used. This object allows to read and write data one row at a + time, as well as to perform queries which are not supported by in-kernel + syntax (at a much lower speed, of course). + + Objects of this class support access to individual columns via *natural + naming* through the :attr:`Table.cols` accessor. Nested columns are + mapped to Cols instances, and non-nested ones to Column instances. + See the Column class in :ref:`ColumnClassDescr` for examples of this + feature. + + Parameters + ---------- + parentnode + The parent :class:`Group` object. + + .. versionchanged:: 3.0 + Renamed from *parentNode* to *parentnode*. + + name : str + The name of this node in its parent group. + description + An IsDescription subclass or a dictionary where the keys are the field + names, and the values the type definitions. In addition, a pure NumPy + dtype is accepted. If None, the table metadata is read from disk, + else, it's taken from previous parameters. + title + Sets a TITLE attribute on the HDF5 table entity. + filters : Filters + An instance of the Filters class that provides information about the + desired I/O filters to be applied during the life of this object. + expectedrows + A user estimate about the number of rows that will be on table. If not + provided, the default value is ``EXPECTED_ROWS_TABLE`` (see + ``tables/parameters.py``). If you plan to save bigger tables, try + providing a guess; this will optimize the HDF5 B-Tree creation and + management process time and memory used. + chunkshape + The shape of the data chunk to be read or written as a single HDF5 I/O + operation. The filters are applied to those chunks of data. Its rank + for tables has to be 1. If ``None``, a sensible value is calculated + based on the `expectedrows` parameter (which is recommended). + byteorder + The byteorder of the data *on-disk*, specified as 'little' or 'big'. If + this is not specified, the byteorder is that of the platform, unless + you passed a recarray as the `description`, in which case the recarray + byteorder will be chosen. + track_times + Whether time data associated with the leaf are recorded (object + access time, raw data modification time, metadata change time, object + birth time); default True. Semantics of these times depend on their + implementation in the HDF5 library: refer to documentation of the + H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata + change time) is implemented. + + .. versionadded:: 3.4.3 + + Notes + ----- + The instance variables below are provided in addition to those in + Leaf (see :ref:`LeafClassDescr`). Please note that there are several + col* dictionaries to ease retrieving information about a column + directly by its path name, avoiding the need to walk through + Table.description or Table.cols. + + + .. rubric:: Table attributes + + .. attribute:: coldescrs + + Maps the name of a column to its Col description (see + :ref:`ColClassDescr`). + + .. attribute:: coldflts + + Maps the name of a column to its default value. + + .. attribute:: coldtypes + + Maps the name of a column to its NumPy data type. + + .. attribute:: colindexed + + Is the column which name is used as a key indexed? + + .. attribute:: colinstances + + Maps the name of a column to its Column (see + :ref:`ColumnClassDescr`) or Cols (see :ref:`ColsClassDescr`) + instance. + + .. attribute:: colnames + + A list containing the names of *top-level* columns in the table. + + .. attribute:: colpathnames + + A list containing the pathnames of *bottom-level* columns in + the table. + + These are the leaf columns obtained when walking the table + description left-to-right, bottom-first. Columns inside a + nested column have slashes (/) separating name components in + their pathname. + + .. attribute:: cols + + A Cols instance that provides *natural naming* access to + non-nested (Column, see :ref:`ColumnClassDescr`) and nested + (Cols, see :ref:`ColsClassDescr`) columns. + + .. attribute:: coltypes + + Maps the name of a column to its PyTables data type. + + .. attribute:: description + + A Description instance (see :ref:`DescriptionClassDescr`) + reflecting the structure of the table. + + .. attribute:: extdim + + The index of the enlargeable dimension (always 0 for tables). + + .. attribute:: indexed + + Does this table have any indexed columns? + + .. attribute:: nrows + + The current number of rows in the table. + + """ + + # Class identifier. + _c_classid = "TABLE" + + @lazyattr + def row(self) -> tableextension.Row: + """Row instance (see :ref:`RowClassDescr`) associated to the Table.""" + return tableextension.Row(self) + + @lazyattr + def dtype(self) -> np.dtype: + """Numpy ``dtype`` that most closely matches this table.""" + return self.description._v_dtype + + @property + def shape(self) -> tuple[int]: + """Shape of this table.""" + return (self.nrows,) + + @property + def rowsize(self) -> int: + """Size in bytes of each row in the table.""" + return self.description._v_dtype.itemsize + + @property + def size_in_memory(self) -> int: + """Size of the table's data in bytes when fully loaded in memory. + + This may be used in combination with size_on_disk to calculate + the compression ratio of the data. + """ + return self.nrows * self.rowsize + + @lazyattr + def _v_iobuf(self) -> np.ndarray: + """Buffer for doing I/O.""" + return self._get_container(self.nrowsinbuf) + + @lazyattr + def _v_wdflts(self) -> np.ndarray | None: + """Contain the default values for writing in recarray format.""" + # First, do a check to see whether we need to set default values + # different from 0 or not. + for coldflt in self.coldflts.values(): + if isinstance(coldflt, np.ndarray) or coldflt: + break + else: + # No default different from 0 found. Returning None. + return None + wdflts = self._get_container(1) + for colname, coldflt in self.coldflts.items(): + ra = get_nested_field(wdflts, colname) + ra[:] = coldflt + return wdflts + + @lazyattr + def _colunaligned(self) -> frozenset: + """Pathnames of unaligned, *unidimensional* columns.""" + colunaligned, rarr = [], self._get_container(0) + for colpathname in self.colpathnames: + carr = get_nested_field(rarr, colpathname) + if not carr.flags.aligned and carr.ndim == 1: + colunaligned.append(colpathname) + return frozenset(colunaligned) + + # **************** WARNING! *********************** + # This function can be called during the destruction time of a table + # so measures have been taken so that it doesn't have to revive + # another node (which can fool the LRU cache). The solution devised + # has been to add a cache for autoindex (Table._autoindex), populate + # it in creation time of the cache (which is a safe period) and then + # update the cache whenever it changes. + # This solves the error when running test_indexes.py ManyNodesTestCase. + # F. Alted 2007-04-20 + # ************************************************** + + @property + def autoindex(self) -> bool: + """Is True if the `Table` automatically keep column indexes up to date. + + Setting this value states whether existing indexes should be + automatically updated after an append operation or recomputed + after an index-invalidating operation (i.e. removal and + modification of rows). The default is true. + + This value gets into effect whenever a column is altered. If you + don't have automatic indexing activated and you want to do an + immediate update use `Table.flush_rows_to_index()`; for an immediate + reindexing of invalidated indexes, use `Table.reindex_dirty()`. + + This value is persistent. + + .. versionchanged:: 3.0 + The *autoIndex* property has been renamed into *autoindex*. + """ + if self._autoindex is None: + try: + indexgroup = self._v_file._get_node(_index_pathname_of(self)) + except NoSuchNodeError: + self._autoindex = default_auto_index # update cache + return self._autoindex + else: + self._autoindex = indexgroup.auto # update cache + return self._autoindex + else: + # The value is in cache, return it + return self._autoindex + + @autoindex.setter + def autoindex(self, auto: bool) -> None: + auto = bool(auto) + try: + indexgroup = self._v_file._get_node(_index_pathname_of(self)) + except NoSuchNodeError: + indexgroup = create_indexes_table(self) + indexgroup.auto = auto + # Update the cache in table instance as well + self._autoindex = auto + + @property + def indexedcolpathnames(self) -> list[str]: + """List of pathnames of indexed columns in the table.""" + return [ + _colpname + for _colpname in self.colpathnames + if self.colindexed[_colpname] + ] + + @property + def colindexes(self) -> _ColIndexes: + """Return a dictionary with the indexes of the indexed columns.""" + return _ColIndexes( + (_colpname, self.cols._f_col(_colpname).index) + for _colpname in self.colpathnames + if self.colindexed[_colpname] + ) + + @property + def _dirtyindexes(self) -> bool: + """Whether some index in table is dirty.""" + return self._condition_cache._nailcount > 0 + + def __init__( + self, + parentnode: Group, + name: str, + description: ( + dict | type[IsDescription] | Description | npt.DTypeLike | None + ) = None, + title: str = "", + filters: Filters | None = None, + expectedrows: int | None = None, + chunkshape: int | tuple[int] | None = None, + byteorder: Literal["little", "big", None] = None, + _log: bool = True, + track_times: bool = True, + ) -> None: + + self._v_new = new = description is not None + """Is this the first time the node has been created?""" + self._v_new_title = title + """New title for this node.""" + self._v_new_filters = filters + """New filter properties for this node.""" + self.extdim = 0 # Tables only have one dimension currently + """The index of the enlargeable dimension (always 0 for tables).""" + self._v_recarray = None + """A structured array to be stored in the table.""" + self._rabyteorder: str | None = None + """The computed byteorder of the self._v_recarray.""" + if expectedrows is None: + expectedrows = parentnode._v_file.params["EXPECTED_ROWS_TABLE"] + self._v_expectedrows: int = expectedrows + """The expected number of rows to be stored in the table.""" + self.nrows = SizeType(0) + """The current number of rows in the table.""" + self.description: Description | None = None + """A Description instance (see :ref:`DescriptionClassDescr`) + reflecting the structure of the table.""" + self._time64colnames: list[str] = [] + """The names of ``Time64`` columns.""" + self._strcolnames: list[str] = [] + """The names of ``String`` columns.""" + self._colenums: dict[str, Enum] = {} + """Maps the name of an enumerated column to its ``Enum`` instance.""" + self._v_chunkshape: tuple[int] | None = None + """Private storage for the `chunkshape` property of the leaf.""" + + self.indexed = False + """Does this table have any indexed columns?""" + self._indexedrows = 0 + """Number of rows indexed in disk.""" + self._unsaved_indexedrows = 0 + """Number of rows indexed in memory but still not in disk.""" + self._listoldindexes: list[str] = [] + """The list of columns with old indexes.""" + self._autoindex: bool | None = None + """Private variable that caches the value for autoindex.""" + + self.colnames: list[str] = [] + """A list containing the names of *top-level* columns in the table.""" + self.colpathnames: list[str] = [] + """A list containing the pathnames of *bottom-level* columns in the + table. + + These are the leaf columns obtained when walking the + table description left-to-right, bottom-first. Columns inside a + nested column have slashes (/) separating name components in + their pathname. + """ + self.colinstances: dict[str, Column | Cols] = {} + """Maps the name of a column to its Column (see + :ref:`ColumnClassDescr`) or Cols (see :ref:`ColsClassDescr`) + instance.""" + self.coldescrs: dict[str, Col] = {} + """Maps the name of a column to its Col description (see + :ref:`ColClassDescr`).""" + self.coltypes: dict[str, str] = {} + """Maps the name of a column to its PyTables data type.""" + self.coldtypes: dict[str, np.dtype] = {} + """Maps the name of a column to its NumPy data type.""" + self.coldflts: dict[str, Any] = {} + """Maps the name of a column to its default value.""" + self.colindexed: dict[str, bool] = {} + """Is the column which name is used as a key indexed?""" + + self._use_index = False + """Whether an index can be used or not in a search. Boolean.""" + self._where_condition: tuple[Callable, Any, Any] | None = None + """Condition function and argument list for selection of values.""" + self._seqcache_key = None + """The key under which to save a query's results (list of row indexes) + or None to not save.""" + max_slots = parentnode._v_file.params["COND_CACHE_SLOTS"] + self._condition_cache = CacheDict(max_slots) + """Cache of already compiled conditions.""" + self._exprvars_cache: dict[str, list[str]] = {} + """Cache of variables participating in numexpr expressions.""" + self._enabled_indexing_in_queries = True + """Is indexing enabled in queries? *Use only for testing.*""" + self._empty_array_cache: dict[np.dtype, np.ndarray] = {} + """Cache of empty arrays.""" + + self._v_dtype: np.dtype | None = None + """The NumPy datatype fopr this table.""" + self.cols: Cols | None = None + """ + A Cols instance that provides *natural naming* access to non-nested + (Column, see :ref:`ColumnClassDescr`) and nested (Cols, see + :ref:`ColsClassDescr`) columns. + """ + self._dirtycache = True + """Whether the data caches are dirty or not. Initially set to yes.""" + self._descflavor: str | None = None + """Temporarily keeps the flavor of a description with data.""" + + # Initialize this object in case is a new Table + + # Try purely descriptive description objects. + if new and isinstance(description, dict): + # Dictionary case + self.description = Description( + description, ptparams=parentnode._v_file.params + ) + elif new and ( + type(description) is type(IsDescription) + and issubclass(description, IsDescription) + ): + # IsDescription subclass case + descr = description() + self.description = Description( + descr.columns, ptparams=parentnode._v_file.params + ) + elif new and isinstance(description, Description): + # It is a Description instance already + self.description = description + + # No description yet? + if new and self.description is None: + # Try NumPy dtype instances + if isinstance(description, np.dtype): + tup = descr_from_dtype( + description, ptparams=parentnode._v_file.params + ) + self.description, self._rabyteorder = tup + + # No description yet? + if new and self.description is None: + # Try structured array description objects. + try: + self._descflavor = flavor = flavor_of(description) + except TypeError: # probably not an array + pass + else: + if flavor == "python": + nparray = np.rec.array(description) + else: + nparray = array_as_internal(description, flavor) + self.nrows = nrows = SizeType(nparray.size) + # If `self._v_recarray` is set, it will be used as the + # initial buffer. + if nrows > 0: + self._v_recarray = nparray + tup = descr_from_dtype( + nparray.dtype, ptparams=parentnode._v_file.params + ) + self.description, self._rabyteorder = tup + + # No description yet? + if new and self.description is None: + raise TypeError( + "the ``description`` argument is not of a supported type: " + "``IsDescription`` subclass, ``Description`` instance, " + "dictionary, or structured array" + ) + + # Check the chunkshape parameter + if new and chunkshape is not None: + if isinstance(chunkshape, (int, np.integer)): + chunkshape = (chunkshape,) + try: + chunkshape = tuple(chunkshape) + except TypeError: + raise TypeError( + "`chunkshape` parameter must be an integer or sequence " + "and you passed a %s" % type(chunkshape) + ) + if len(chunkshape) != 1: + raise ValueError( + f"`chunkshape` rank (length) must be 1: {chunkshape!r}" + ) + self._v_chunkshape = tuple(SizeType(s) for s in chunkshape) + + super().__init__( + parentnode, name, new, filters, byteorder, _log, track_times + ) + + def _g_post_init_hook(self) -> None: + # We are putting here the index-related issues + # as well as filling general info for table + # This is needed because we need first the index objects created + + # First, get back the flavor of input data (if any) for + # `Leaf._g_post_init_hook()`. + self._flavor, self._descflavor = self._descflavor, None + super()._g_post_init_hook() + + # Create a cols accessor. + self.cols = Cols(self, self.description) + + # Place the `Cols` and `Column` objects into `self.colinstances`. + colinstances, cols = self.colinstances, self.cols + for colpathname in self.description._v_pathnames: + colinstances[colpathname] = cols._g_col(colpathname) + + if self._v_new: + # Columns are never indexed on creation. + self.colindexed = dict.fromkeys(self.colpathnames, False) + return + + # The following code is only for opened tables. + + # Do the indexes group exist? + indexesgrouppath = _index_pathname_of(self) + igroup = indexesgrouppath in self._v_file + oldindexes = False + for colobj in self.description._f_walk(type="Col"): + colname = colobj._v_pathname + # Is this column indexed? + if igroup: + indexname = _index_pathname_of_column(self, colname) + indexed = indexname in self._v_file + self.colindexed[colname] = indexed + if indexed: + column = self.cols._g_col(colname) + indexobj = column.index + if isinstance(indexobj, OldIndex): + indexed = False # Not a vaild index + oldindexes = True + self._listoldindexes.append(colname) + else: + # Tell the condition cache about columns with dirty + # indexes. + if indexobj.dirty: + self._condition_cache.nail() + else: + indexed = False + self.colindexed[colname] = False + if indexed: + self.indexed = True + + if oldindexes: # this should only appear under 2.x Pro + warnings.warn( + "table ``%s`` has column indexes with PyTables 1.x format. " + "Unfortunately, this format is not supported in " + "PyTables 2.x series. Note that you can use the " + "``ptrepack`` utility in order to recreate the indexes. " + "The 1.x indexed columns found are: %s" + % (self._v_pathname, self._listoldindexes), + OldIndexWarning, + ) + + # It does not matter to which column 'indexobj' belongs, + # since their respective index objects share + # the same number of elements. + if self.indexed: + self._indexedrows = indexobj.nelements + self._unsaved_indexedrows = self.nrows - self._indexedrows + # Put the autoindex value in a cache variable + self._autoindex = self.autoindex + + def _calc_nrowsinbuf(self) -> int: + """Calculate the number of rows that fits on a PyTables buffer.""" + params = self._v_file.params + # Compute the nrowsinbuf + rowsize = self.rowsize + buffersize = params["IO_BUFFER_SIZE"] + if rowsize != 0: + nrowsinbuf = buffersize // rowsize + # The number of rows in buffer needs to be an exact multiple of + # chunkshape[0] for queries using indexed columns. + # Fixes #319 and probably #409 too. + nrowsinbuf -= nrowsinbuf % self.chunkshape[0] + else: + nrowsinbuf = 1 + + # tableextension.pyx performs an assertion + # to make sure nrowsinbuf is greater than or + # equal to the chunksize. + # See gh-206 and gh-238 + if self.chunkshape is not None: + if nrowsinbuf < self.chunkshape[0]: + nrowsinbuf = self.chunkshape[0] + + # Safeguard against row sizes being extremely large + if nrowsinbuf == 0: + nrowsinbuf = 1 + # If rowsize is too large, issue a Performance warning + maxrowsize = params["BUFFER_TIMES"] * buffersize + if rowsize > maxrowsize: + warnings.warn( + f"""\ +The Table ``{self._v_pathname}`` is exceeding the maximum recommended rowsize +({maxrowsize} bytes); +be ready to see PyTables asking for *lots* of memory and possibly slow +I/O. You may want to reduce the rowsize by trimming the value of +dimensions that are orthogonal (and preferably close) to the *main* +dimension of this leave. Alternatively, in case you have specified a +very small/large chunksize, you may want to increase/decrease it.""", + PerformanceWarning, + ) + return nrowsinbuf + + def _getemptyarray(self, dtype: np.dtype) -> np.ndarray: + # Acts as a cache for empty arrays + key = dtype + if key in self._empty_array_cache: + return self._empty_array_cache[key] + else: + self._empty_array_cache[key] = arr = np.empty(shape=0, dtype=key) + return arr + + def _get_container(self, shape: int) -> np.ndarray: + """Get the appropriate buffer for data depending on table nestedness.""" + # This is *much* faster than the numpy.rec.array counterpart + return np.empty(shape=shape, dtype=self._v_dtype) + + def _get_type_col_names(self, type_: str) -> list[str]: + """Return a list containing 'type_' column names.""" + return [ + colobj._v_pathname + for colobj in self.description._f_walk("Col") + if colobj.type == type_ + ] + + def _get_enum_map(self) -> dict[str, Enum]: + """Return mapping from enumerated column names to `Enum` instances.""" + enum_map = {} + for colobj in self.description._f_walk("Col"): + if colobj.kind == "enum": + enum_map[colobj._v_pathname] = colobj.enum + return enum_map + + def _g_create(self) -> int: + """Create a new table on disk.""" + # Warning against assigning too much columns... + # F. Alted 2005-06-05 + max_columns = self._v_file.params["MAX_COLUMNS"] + if len(self.description._v_names) > max_columns: + warnings.warn( + "table ``%s`` is exceeding the recommended " + "maximum number of columns (%d); " + "be ready to see PyTables asking for *lots* of memory " + "and possibly slow I/O" % (self._v_pathname, max_columns), + PerformanceWarning, + ) + + # 1. Create the HDF5 table (some parameters need to be computed). + + # Fix the byteorder of the recarray and update the number of + # expected rows if necessary + if self._v_recarray is not None: + self._v_recarray = self._g_fix_byteorder_data( + self._v_recarray, self._rabyteorder + ) + if len(self._v_recarray) > self._v_expectedrows: + self._v_expectedrows = len(self._v_recarray) + # Compute a sensible chunkshape + if self._v_chunkshape is None: + self._v_chunkshape = self._calc_chunkshape( + self._v_expectedrows, self.rowsize, self.rowsize + ) + # Correct the byteorder, if still needed + if self.byteorder is None: + self.byteorder = sys.byteorder + + # Cache some data which is already in the description. + # This is necessary to happen before creation time in order + # to be able to populate the self._v_wdflts + self._cache_description_data() + + # After creating the table, ``self._v_objectid`` needs to be + # set because it is needed for setting attributes afterwards. + self._v_objectid = self._create_table( + self._v_new_title, self.filters.complib or "", obversion + ) + self._v_recarray = None # not useful anymore + self._rabyteorder = None # not useful anymore + + # 2. Compute or get chunk shape and buffer size parameters. + self.nrowsinbuf = self._calc_nrowsinbuf() + + # 3. Get field fill attributes from the table description and + # set them on disk. + if self._v_file.params["PYTABLES_SYS_ATTRS"]: + set_attr = self._v_attrs._g__setattr + for i, colobj in enumerate(self.description._f_walk(type="Col")): + fieldname = "FIELD_%d_FILL" % i + set_attr(fieldname, colobj.dflt) + + return self._v_objectid + + def _g_open(self) -> int: + """Open a table from disk and read the metadata on it. + + Creates an user description on the flight to easy the access to + the actual data. + + """ + # 1. Open the HDF5 table and get some data from it. + self._v_objectid, description, chunksize = self._get_info() + self._v_expectedrows = self.nrows # the actual number of rows + + # 2. Create an instance description to host the record fields. + validate = not self._v_file._isPTFile # only for non-PyTables files + self.description = Description( + description, validate=validate, ptparams=self._v_file.params + ) + + # 3. Compute or get chunk shape and buffer size parameters. + if chunksize == 0: + self._v_chunkshape = self._calc_chunkshape( + self._v_expectedrows, self.rowsize, self.rowsize + ) + else: + self._v_chunkshape = (chunksize,) + self.nrowsinbuf = self._calc_nrowsinbuf() + + # 4. If there are field fill attributes, get them from disk and + # set them in the table description. + if self._v_file.params["PYTABLES_SYS_ATTRS"]: + if "FIELD_0_FILL" in self._v_attrs._f_list("sys"): + i = 0 + get_attr = self._v_attrs.__getattr__ + for objcol in self.description._f_walk(type="Col"): + colname = objcol._v_pathname + # Get the default values for each column + fieldname = "FIELD_%s_FILL" % i + defval = get_attr(fieldname) + if defval is not None: + objcol.dflt = defval + else: + warnings.warn( + "could not load default value " + "for the ``%s`` column of table ``%s``; " + "using ``%r`` instead" + % (colname, self._v_pathname, objcol.dflt) + ) + defval = objcol.dflt + i += 1 + + # Set also the correct value in the desc._v_dflts dictionary + for descr in self.description._f_walk(type="Description"): + for name in descr._v_names: + objcol = descr._v_colobjects[name] + if isinstance(objcol, Col): + descr._v_dflts[objcol._v_name] = objcol.dflt + + # 5. Cache some data which is already in the description. + self._cache_description_data() + + return self._v_objectid + + def _cache_description_data(self) -> None: + """Cache some data which is already in the description. + + Some information is extracted from `self.description` to build + some useful (but redundant) structures: + + * `self.colnames` + * `self.colpathnames` + * `self.coldescrs` + * `self.coltypes` + * `self.coldtypes` + * `self.coldflts` + * `self._v_dtype` + * `self._time64colnames` + * `self._strcolnames` + * `self._colenums` + + """ + self.colnames = list(self.description._v_names) + self.colpathnames = [ + col._v_pathname + for col in self.description._f_walk() + if not hasattr(col, "_v_names") + ] # bottom-level + + # Find ``time64`` column names. + self._time64colnames = self._get_type_col_names("time64") + # Find ``string`` column names. + self._strcolnames = self._get_type_col_names("string") + # Get a mapping of enumerated columns to their `Enum` instances. + self._colenums = self._get_enum_map() + + # Get info about columns + for colobj in self.description._f_walk(type="Col"): + colname = colobj._v_pathname + # Get the column types, types and defaults + self.coldescrs[colname] = colobj + self.coltypes[colname] = colobj.type + self.coldtypes[colname] = colobj.dtype + self.coldflts[colname] = colobj.dflt + + # Assign _v_dtype for this table + self._v_dtype = self.description._v_dtype + + def _get_column_instance(self, colpathname: str): + """Get the instance of the column with the given `colpathname`. + + If the column does not exist in the table, a `KeyError` is + raised. + + """ + try: + return functools.reduce( + getattr, colpathname.split("/"), self.description + ) + except AttributeError: + raise KeyError( + "table ``%s`` does not have a column named ``%s``" + % (self._v_pathname, colpathname) + ) + + _check_column = _get_column_instance + + def _disable_indexing_in_queries(self) -> None: + """Force queries not to use indexing. + + *Use only for testing.* + + """ + if not self._enabled_indexing_in_queries: + return # already disabled + # The nail avoids setting/getting compiled conditions in/from + # the cache where indexing is used. + self._condition_cache.nail() + self._enabled_indexing_in_queries = False + + def _enable_indexing_in_queries(self) -> None: + """Allow queries to use indexing. + + *Use only for testing.* + + """ + if self._enabled_indexing_in_queries: + return # already enabled + self._condition_cache.unnail() + self._enabled_indexing_in_queries = True + + def _required_expr_vars( + self, + expression: str, + uservars: dict[str, Column | np.ndarray] | None, + depth: int = 1, + ) -> dict[str, Column | np.ndarray]: + """Get the variables required by the `expression`. + + A new dictionary defining the variables used in the `expression` + is returned. Required variables are first looked up in the + `uservars` mapping, then in the set of top-level columns of the + table. Unknown variables cause a `NameError` to be raised. + + When `uservars` is `None`, the local and global namespace where + the API callable which uses this method is called is sought + instead. This mechanism will not work as expected if this + method is not used *directly* from an API callable. To disable + this mechanism, just specify a mapping as `uservars`. + + Nested columns and columns from other tables are not allowed + (`TypeError` and `ValueError` are raised, respectively). Also, + non-column variable values are converted to NumPy arrays. + + `depth` specifies the depth of the frame in order to reach local + or global variables. + + """ + # Get the names of variables used in the expression. + exprvarscache = self._exprvars_cache + if expression not in exprvarscache: + # Protection against growing the cache too much + if len(exprvarscache) > 256: + # Remove 10 (arbitrary) elements from the cache + for k in list(exprvarscache)[:10]: + del exprvarscache[k] + cexpr = compile(expression, "", "eval") + exprvars = [ + var + for var in cexpr.co_names + if var not in ["None", "False", "True"] + and var not in ne.expressions.functions + ] + exprvarscache[expression] = exprvars + else: + exprvars = exprvarscache[expression] + + # Get the local and global variable mappings of the user frame + # if no mapping has been explicitly given for user variables. + user_locals, user_globals = {}, {} + if uservars is None: + # We use specified depth to get the frame where the API + # callable using this method is called. For instance: + # + # * ``table._required_expr_vars()`` (depth 0) is called by + # * ``table._where()`` (depth 1) is called by + # * ``table.where()`` (depth 2) is called by + # * user-space functions (depth 3) + user_frame = sys._getframe(depth) + user_locals = user_frame.f_locals + user_globals = user_frame.f_globals + + colinstances = self.colinstances + tblfile, tblpath = self._v_file, self._v_pathname + # Look for the required variables first among the ones + # explicitly provided by the user, then among implicit columns, + # then among external variables (only if no explicit variables). + reqvars: dict[str, np.ndarray] = {} + for var in exprvars: + # Get the value. + if uservars is not None and var in uservars: + val = uservars[var] + elif var in colinstances: + val = colinstances[var] + elif uservars is None and var in user_locals: + val = user_locals[var] + elif uservars is None and var in user_globals: + val = user_globals[var] + else: + raise NameError("name ``%s`` is not defined" % var) + + # Check the value. + if hasattr(val, "pathname"): # non-nested column + if val.shape[1:] != (): + raise NotImplementedError( + f"variable ``{var}`` refers to a multidimensional " + f"column, not yet supported in conditions, sorry" + ) + if ( + val._table_file is not tblfile + or val._table_path != tblpath + ): + raise ValueError( + f"variable ``{var}`` refers to a column " + f"which is not part of table ``{tblpath}``" + ) + if val.dtype.str[1:] == "u8": + raise NotImplementedError( + f"variable ``{var}`` refers to a 64-bit unsigned " + f"integer column, not yet supported in conditions, " + f"sorry; please use regular Python selections" + ) + elif hasattr(val, "_v_colpathnames"): # nested column + raise TypeError( + f"variable ``{var}`` refers to a nested column, " + f"not allowed in conditions" + ) + else: # only non-column values are converted to arrays + # XXX: not 100% sure about this + if isinstance(val, str): + val = np.asarray(val.encode("ascii")) + else: + val = np.asarray(val) + reqvars[var] = val + return reqvars + + def _get_condition_key( + self, + condition: str, + condvars: dict[str, Column], + ) -> tuple[ + str, + tuple[str, ...], + tuple[str, ...], + tuple[str, ...], + tuple[Any, ...], + ]: + """Get the condition cache key for `condition` with `condvars`. + + Currently, the key is a tuple of `condition`, column variables + names, normal variables names, column paths and variable paths + (all are tuples). + + """ + # Variable names for column and normal variables. + colnames, varnames = [], [] + # Column paths and types for each of the previous variable. + colpaths, vartypes = [], [] + for var, val in condvars.items(): + if hasattr(val, "pathname"): # column + colnames.append(var) + colpaths.append(val.pathname) + else: # array + try: + varnames.append(var) + vartypes.append(ne.necompiler.getType(val)) # expensive + except ValueError: + # This is more clear than the error given by Numexpr. + raise TypeError( + "variable ``%s`` has data type ``%s``, " + "not allowed in conditions" % (var, val.dtype.name) + ) + colnames, varnames = tuple(colnames), tuple(varnames) + colpaths, vartypes = tuple(colpaths), tuple(vartypes) + condkey = (condition, colnames, varnames, colpaths, vartypes) + return condkey + + def _compile_condition( + self, + condition: str, + condvars: dict[str, Column | np.ndarray], + ) -> CompiledCondition: + """Compile the `condition` and extract usable index conditions. + + This method returns an instance of ``CompiledCondition``. See + the ``compile_condition()`` function in the ``conditions`` + module for more information about the compilation process. + + This method makes use of the condition cache when possible. + + """ + # Look up the condition in the condition cache. + condcache = self._condition_cache + condkey = self._get_condition_key(condition, condvars) + compiled = condcache.get(condkey) + if compiled: + return compiled.with_replaced_vars(condvars) # bingo! + + # Bad luck, the condition must be parsed and compiled. + # Fortunately, the key provides some valuable information. ;) + condition, colnames, varnames, colpaths, vartypes = condkey + + # Extract more information from referenced columns. + + # start with normal variables + typemap = dict(list(zip(varnames, vartypes))) + indexedcols = [] + for colname in colnames: + col = condvars[colname] + + # Extract types from *all* the given variables. + coltype = col.dtype.type + typemap[colname] = _nxtype_from_nptype[coltype] + + # Get the set of columns with usable indexes. + if ( + self._enabled_indexing_in_queries # no in-kernel searches + and self.colindexed[col.pathname] + and not col.index.dirty + ): + indexedcols.append(colname) + + indexedcols = frozenset(indexedcols) + # Now let ``compile_condition()`` do the Numexpr-related job. + compiled = compile_condition(condition, typemap, indexedcols) + + # Check that there actually are columns in the condition. + if not set(compiled.parameters).intersection(set(colnames)): + raise ValueError( + f"there are no columns taking part in " + f"condition ``{condition}``" + ) + + # Store the compiled condition in the cache and return it. + condcache[condkey] = compiled + return compiled.with_replaced_vars(condvars) + + def will_query_use_indexing( + self, + condition: str, + condvars: dict[str, Column | np.ndarray] | None = None, + ) -> frozenset: + """Return True if the query for the condition will use indexing. + + The meaning of the condition and *condvars* arguments is the same as in + the :meth:`Table.where` method. If condition can use indexing, this + method returns a frozenset with the path names of the columns whose + index is usable. Otherwise, it returns an empty list. + + This method is mainly intended for testing. Keep in mind that changing + the set of indexed columns or their dirtiness may make this method + return different values for the same arguments at different times. + + """ + # Compile the condition and extract usable index conditions. + condvars = self._required_expr_vars(condition, condvars, depth=2) + compiled = self._compile_condition(condition, condvars) + # Return the columns in indexed expressions + idxcols = [condvars[var].pathname for var in compiled.index_variables] + return frozenset(idxcols) + + def where( + self, + condition: str, + condvars: dict[str, Column | np.ndarray] | None = None, + start: str | None = None, + stop: str | None = None, + step: str | None = None, + ) -> Iterator[tableextension.Row]: + r"""Iterate over values fulfilling a condition. + + This method returns a Row iterator (see :ref:`RowClassDescr`) which + only selects rows in the table that satisfy the given condition (an + expression-like string). + + The condvars mapping may be used to define the variable names appearing + in the condition. condvars should consist of identifier-like strings + pointing to Column (see :ref:`ColumnClassDescr`) instances *of this + table*, or to other values (which will be converted to arrays). A + default set of condition variables is provided where each top-level, + non-nested column with an identifier-like name appears. Variables in + condvars override the default ones. + + When condvars is not provided or None, the current local and global + namespace is sought instead of condvars. The previous mechanism is + mostly intended for interactive usage. To disable it, just specify a + (maybe empty) mapping as condvars. + + If a range is supplied (by setting some of the start, stop or step + parameters), only the rows in that range and fulfilling the condition + are used. The meaning of the start, stop and step parameters is the + same as for Python slices. + + When possible, indexed columns participating in the condition will be + used to speed up the search. It is recommended that you place the + indexed columns as left and out in the condition as possible. Anyway, + this method has always better performance than regular Python + selections on the table. + + You can mix this method with regular Python selections in order to + support even more complex queries. It is strongly recommended that you + pass the most restrictive condition as the parameter to this method if + you want to achieve maximum performance. + + .. warning:: + + When in the middle of a table row iterator, you should not + use methods that can change the number of rows in the table + (like :meth:`Table.append` or :meth:`Table.remove_rows`) or + unexpected errors will happen. + + Examples + -------- + :: + + passvalues = [ row['col3'] for row in + table.where('(col1 > 0) & (col2 <= 20)', step=5) + if your_function(row['col2']) ] + print("Values that pass the cuts:", passvalues) + + .. note:: + + A special care should be taken when the query condition includes + string literals. + + Let's assume that the table ``table`` has the following + structure:: + + class Record(IsDescription): + col1 = StringCol(4) # 4-character String of bytes + col2 = IntCol() + col3 = FloatCol() + + The type of "col1" corresponds to strings of bytes. + + Any condition involving "col1" should be written using the + appropriate type for string literals in order to avoid + :exc:`TypeError`\ s. + + The code below will fail with a :exc:`TypeError`:: + + condition = 'col1 == "AAAA"' + for record in table.where(condition): # TypeError in Python3 + # do something with "record" + + The reason is that in Python 3 "condition" implies a comparison + between a string of bytes ("col1" contents) and a unicode literal + ("AAAA"). + + The correct way to write the condition is:: + + condition = 'col1 == b"AAAA"' + + .. versionchanged:: 3.0 + The start, stop and step parameters now behave like in slice. + + """ + return self._where(condition, condvars, start, stop, step) + + def _where( + self, + condition: str, + condvars: dict[str, Column | np.ndarray] | None, + start: str | None = None, + stop: str | None = None, + step: str | None = None, + ) -> Iterator[tableextension.Row]: + """Low-level counterpart of `self.where()`.""" + if profile: + tref = clock() + if profile: + show_stats("Entering table._where", tref) + # Adjust the slice to be used. + start, stop, step = self._process_range_read(start, stop, step) + if start >= stop: # empty range, reset conditions + self._use_index = False + self._where_condition = None + return iter([]) + + # Compile the condition and extract usable index conditions. + condvars = self._required_expr_vars(condition, condvars, depth=3) + compiled = self._compile_condition(condition, condvars) + + # Can we use indexes? + if compiled.index_expressions: + chunkmap = _table__where_indexed( + self, compiled, condition, condvars, start, stop, step + ) + if not isinstance(chunkmap, np.ndarray): + # If it is not a NumPy array it should be an iterator + # Reset conditions + self._use_index = False + self._where_condition = None + # ...and return the iterator + return chunkmap + else: + chunkmap = None # default to an in-kernel query + + args = [condvars[param] for param in compiled.parameters] + self._where_condition = (compiled.function, args, compiled.kwargs) + row = tableextension.Row(self) + if profile: + show_stats("Exiting table._where", tref) + return row._iter(start, stop, step, chunkmap=chunkmap) + + def read_where( + self, + condition: str, + condvars: dict[str, Column | np.ndarray] | None = None, + field=None, + start: str | None = None, + stop: str | None = None, + step: str | None = None, + ) -> np.ndarray: + """Read table data fulfilling the given *condition*. + + This method is similar to :meth:`Table.read`, having their common + arguments and return values the same meanings. However, only the rows + fulfilling the *condition* are included in the result. + + The meaning of the other arguments is the same as in the + :meth:`Table.where` method. + + """ + self._g_check_open() + coords = [ + p.nrow for p in self._where(condition, condvars, start, stop, step) + ] + self._where_condition = None # reset the conditions + if len(coords) > 1: + cstart, cstop = coords[0], coords[-1] + 1 + if cstop - cstart == len(coords): + # Chances for monotonically increasing row values. Refine. + inc_seq = np.all(np.arange(cstart, cstop) == np.array(coords)) + if inc_seq: + return self.read(cstart, cstop, field=field) + return self.read_coordinates(coords, field) + + def append_where( + self, + dstTable: Table, # noqa: N803 + condition: str | None = None, + condvars: dict[str, Column | np.ndarray] | None = None, + start: str | None = None, + stop: str | None = None, + step: str | None = None, + ) -> int: + """Append rows fulfilling the condition to the dstTable table. + + dstTable must be capable of taking the rows resulting from the query, + i.e. it must have columns with the expected names and compatible + types. The meaning of the other arguments is the same as in the + :meth:`Table.where` method. + + The number of rows appended to dstTable is returned as a result. + + .. versionchanged:: 3.0 + The *whereAppend* method has been renamed into *append_where*. + + """ + self._g_check_open() + + # Check that the destination file is not in read-only mode. + dstTable._v_file._check_writable() + + # Row objects do not support nested columns, so we must iterate + # over the flat column paths. When rows support nesting, + # ``self.colnames`` can be directly iterated upon. + col_names = tuple(col_name for col_name in self.colpathnames) + dst_row = dstTable.row + nrows = 0 + if condition is not None: + src_rows = self._where(condition, condvars, start, stop, step) + else: + src_rows = self.iterrows(start, stop, step) + for src_row in src_rows: + for col_name in col_names: + dst_row[col_name] = src_row[col_name] + dst_row.append() + nrows += 1 + dstTable.flush() + return nrows + + def get_where_list( + self, + condition: str, + condvars: dict[str, Column | np.ndarray] | None = None, + sort: bool = False, + start: str | None = None, + stop: str | None = None, + step: str | None = None, + ) -> np.ndarray: + """Get the row coordinates fulfilling the given condition. + + The coordinates are returned as a list of the current flavor. sort + means that you want to retrieve the coordinates ordered. The default is + to not sort them. + + The meaning of the other arguments is the same as in the + :meth:`Table.where` method. + + """ + self._g_check_open() + + coords = [ + p.nrow for p in self._where(condition, condvars, start, stop, step) + ] + coords = np.array(coords, dtype=SizeType) + # Reset the conditions + self._where_condition = None + if sort: + coords = np.sort(coords) + return internal_to_flavor(coords, self.flavor) + + def itersequence(self, sequence: Sequence) -> Iterator[tableextension.Row]: + """Iterate over a sequence of row coordinates.""" + if not hasattr(sequence, "__getitem__"): + raise TypeError( + "Wrong 'sequence' parameter type. Only sequences " + "are suported." + ) + # start, stop and step are necessary for the new iterator for + # coordinates, and perhaps it would be useful to add them as + # parameters in the future (not now, because I've just removed + # the `sort` argument for 2.1). + # + # *Important note*: Negative values for step are not supported + # for the general case, but only for the itersorted() and + # read_sorted() purposes! The self._process_range_read will raise + # an appropriate error. + # F. Alted 2008-09-18 + # A.V. 20130513: _process_range_read --> _process_range + start, stop, step = self._process_range(None, None, None) + if (start > stop) or (len(sequence) == 0): + return iter([]) + row = tableextension.Row(self) + return row._iter(start, stop, step, coords=sequence) + + def _check_sortby_csi( + self, sortby: Column | str, check_csi: bool + ) -> Index: + if isinstance(sortby, Column): + icol = sortby + elif isinstance(sortby, str): + icol = self.cols._f_col(sortby) + else: + raise TypeError( + f"`sortby` can only be a `Column` or string object, " + f"but you passed an object of type: {type(sortby)}" + ) + if icol.is_indexed and icol.index.kind == "full": + if check_csi and not icol.index.is_csi: + # The index exists, but it is not a CSI one. + raise ValueError( + "Field `{sortby}` must have associated a CSI index " + "in table `{self}`, but the existing one is not. " + ) + return icol.index + else: + raise ValueError( + f"Field `{sortby}` must have associated a 'full' index " + f"in table `{self}`." + ) + + def itersorted( + self, + sortby: Column | str, + checkCSI: bool = False, # noqa: N803 + start: int | None = None, + stop: int | None = None, + step: int | None = None, + ) -> Iterator[tableextension.Row]: + """Iterate table data in the order of the index of sortby column. + + The sortby column must have associated a full index. If you want to + ensure a fully sorted order, the index must be a CSI one. You may want + to use the checkCSI argument in order to explicitly check for the + existence of a CSI index. + + The meaning of the start, stop and step arguments is the same as in + :meth:`Table.read`. + + .. versionchanged:: 3.0 + If the *start* parameter is provided and *stop* is None then the + table is iterated from *start* to the last line. + In PyTables < 3.0 only one element was returned. + + """ + index = self._check_sortby_csi(sortby, checkCSI) + # Adjust the slice to be used. + start, stop, step = self._process_range( + start, stop, step, warn_negstep=False + ) + if (start > stop and 0 < step) or (start < stop and 0 > step): + # Fall-back action is to return an empty iterator + return iter([]) + row = tableextension.Row(self) + return row._iter(start, stop, step, coords=index) + + def read_sorted( + self, + sortby: Column | str, + checkCSI: bool = False, # noqa: N803 + field: str | None = None, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + ) -> np.ndarray: + """Read table data following the order of the index of sortby column. + + The sortby column must have associated a full index. If you want to + ensure a fully sorted order, the index must be a CSI one. You may want + to use the checkCSI argument in order to explicitly check for the + existence of a CSI index. + + If field is supplied only the named column will be selected. If the + column is not nested, an *array* of the current flavor will be + returned; if it is, a *structured array* will be used instead. If no + field is specified, all the columns will be returned in a structured + array of the current flavor. + + The meaning of the start, stop and step arguments is the same as in + :meth:`Table.read`. + + .. versionchanged:: 3.0 + The start, stop and step parameters now behave like in slice. + + """ + self._g_check_open() + index = self._check_sortby_csi(sortby, checkCSI) + coords = index[start:stop:step] + return self.read_coordinates(coords, field) + + def iterrows( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + ) -> Iterator[tableextension.Row]: + """Iterate over the table using a Row instance. + + If a range is not supplied, *all the rows* in the table are iterated + upon - you can also use the :meth:`Table.__iter__` special method for + that purpose. If you want to iterate over a given *range of rows* in + the table, you may use the start, stop and step parameters. + + .. warning:: + + When in the middle of a table row iterator, you should not + use methods that can change the number of rows in the table + (like :meth:`Table.append` or :meth:`Table.remove_rows`) or + unexpected errors will happen. + + See Also + -------- + tableextension.Row : the table row iterator and field accessor + + Examples + -------- + :: + + result = [ row['var2'] for row in table.iterrows(step=5) + if row['var1'] <= 20 ] + + .. versionchanged:: 3.0 + If the *start* parameter is provided and *stop* is None then the + table is iterated from *start* to the last line. + In PyTables < 3.0 only one element was returned. + + """ + start, stop, step = self._process_range( + start, stop, step, warn_negstep=False + ) + if (start > stop and 0 < step) or (start < stop and 0 > step): + # Fall-back action is to return an empty iterator + return iter([]) + row = tableextension.Row(self) + return row._iter(start, stop, step) + + def __iter__(self) -> Iterator[tableextension.Row]: + """Iterate over the table using a Row instance. + + This is equivalent to calling :meth:`Table.iterrows` with default + arguments, i.e. it iterates over *all the rows* in the table. + + See Also + -------- + tableextension.Row : the table row iterator and field accessor + + Examples + -------- + :: + + result = [ row['var2'] for row in table if row['var1'] <= 20 ] + + Which is equivalent to:: + + result = [ row['var2'] for row in table.iterrows() + if row['var1'] <= 20 ] + + """ + return self.iterrows() + + def _read( + self, + start: int, + stop: int, + step: int, + field: str | None = None, + out: np.ndarray | None = None, + ) -> np.ndarray: + """Read a range of rows and return an in-memory object.""" + select_field = None + if field: + if field not in self.coldtypes: + if field in self.description._v_names: + # Remember to select this field + select_field = field + field = None + else: + raise KeyError( + ("Field {} not found in table " "{}").format( + field, self + ) + ) + else: + # The column hangs directly from the top + dtype_field = self.coldtypes[field] + + # Return a rank-0 array if start > stop + if (start >= stop and 0 < step) or (start <= stop and 0 > step): + if field is None: + nra = self._get_container(0) + return nra + return np.empty(shape=0, dtype=dtype_field) + + nrows = len(range(start, stop, step)) + + if out is None: + # Compute the shape of the resulting column object + if field: + # Create a container for the results + result = np.empty(shape=nrows, dtype=dtype_field) + else: + # Recarray case + result = self._get_container(nrows) + else: + # there is no fast way to byteswap, since different columns may + # have different byteorders + if not out.dtype.isnative: + raise ValueError( + "output array must be in system's byteorder " + "or results will be incorrect" + ) + if field: + bytes_required = dtype_field.itemsize * nrows + else: + bytes_required = self.rowsize * nrows + if bytes_required != out.nbytes: + raise ValueError( + f"output array size invalid, got {out.nbytes}" + f" bytes, need {bytes_required} bytes" + ) + if not out.flags["C_CONTIGUOUS"]: + raise ValueError("output array not C contiguous") + result = out + + # Call the routine to fill-up the resulting array + if step == 1 and not field: + # This optimization works three times faster than + # the row._fill_col method (up to 170 MB/s on a pentium IV @ 2GHz) + self._read_records(start, stop - start, result) + # Warning!: _read_field_name should not be used until + # H5TBread_fields_name in tableextension will be finished + # F. Alted 2005/05/26 + # XYX Ho implementem per a PyTables 2.0?? + elif field and step > 15 and 0: + # For step>15, this seems to work always faster than row._fill_col. + self._read_field_name(result, start, stop, step, field) + else: + self.row._fill_col(result, start, stop, step, field) + + if select_field: + return result[select_field] + else: + return result + + def read( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + field: str | None = None, + out: np.ndarray | None = None, + ) -> np.ndarray: + """Get data in the table as a (record) array. + + The start, stop and step parameters can be used to select only + a *range of rows* in the table. Their meanings are the same as + in the built-in Python slices. + + If field is supplied only the named column will be selected. + If the column is not nested, an *array* of the current flavor + will be returned; if it is, a *structured array* will be used + instead. If no field is specified, all the columns will be + returned in a structured array of the current flavor. + + Columns under a nested column can be specified in the field + parameter by using a slash character (/) as a separator (e.g. + 'position/x'). + + The out parameter may be used to specify a NumPy array to + receive the output data. Note that the array must have the + same size as the data selected with the other parameters. + Note that the array's datatype is not checked and no type + casting is performed, so if it does not match the datatype on + disk, the output will not be correct. + + When specifying a single nested column with the field parameter, + and supplying an output buffer with the out parameter, the + output buffer must contain all columns in the table. + The data in all columns will be read into the output buffer. + However, only the specified nested column will be returned from + the method call. + + When data is read from disk in NumPy format, the output will be + in the current system's byteorder, regardless of how it is + stored on disk. If the out parameter is specified, the output + array also must be in the current system's byteorder. + + .. versionchanged:: 3.0 + Added the *out* parameter. Also the start, stop and step + parameters now behave like in slice. + + Examples + -------- + Reading the entire table:: + + t.read() + + Reading record n. 6:: + + t.read(6, 7) + + Reading from record n. 6 to the end of the table:: + + t.read(6) + + """ + self._g_check_open() + + if field: + self._check_column(field) + + if out is not None and self.flavor != "numpy": + msg = ( + f"Optional 'out' argument may only be supplied if array " + f"flavor is 'numpy', currently is {self.flavor}" + ) + raise TypeError(msg) + + start, stop, step = self._process_range( + start, stop, step, warn_negstep=False + ) + + arr = self._read(start, stop, step, field, out) + return internal_to_flavor(arr, self.flavor) + + def _read_coordinates( + self, coords: np.ndarray, field: str | None = None + ) -> np.ndarray: + """Private part of `read_coordinates()` with no flavor conversion.""" + coords = self._point_selection(coords) + + ncoords = len(coords) + # Create a read buffer only if needed + if field is None or ncoords > 0: + # Doing a copy is faster when ncoords is small (<1000) + if ncoords < min(1000, self.nrowsinbuf): + result = self._v_iobuf[:ncoords].copy() + else: + result = self._get_container(ncoords) + + # Do the real read + if ncoords > 0: + # Turn coords into an array of coordinate indexes, if necessary + if not ( + isinstance(coords, np.ndarray) + and coords.dtype.type is _npsizetype + and coords.flags.contiguous + and coords.flags.aligned + ): + # Get a contiguous and aligned coordinate array + coords = np.array(coords, dtype=SizeType) + self._read_elements(coords, result) + + # Do the final conversions, if needed + if field: + if ncoords > 0: + result = get_nested_field(result, field) + else: + # Get an empty array from the cache + result = self._getemptyarray(self.coldtypes[field]) + return result + + def read_coordinates( + self, coords: np.ndarray, field: str | None = None + ) -> np.ndarray: + """Get a set of rows given their indexes as a (record) array. + + This method works much like the :meth:`Table.read` method, but it uses + a sequence (coords) of row indexes to select the wanted columns, + instead of a column range. + + The selected rows are returned in an array or structured array of the + current flavor. + + """ + self._g_check_open() + result = self._read_coordinates(coords, field) + return internal_to_flavor(result, self.flavor) + + def get_enum(self, colname: str) -> Enum: + """Get the enumerated type associated with the named column. + + If the column named colname (a string) exists and is of an enumerated + type, the corresponding Enum instance (see :ref:`EnumClassDescr`) is + returned. If it is not of an enumerated type, a TypeError is raised. If + the column does not exist, a KeyError is raised. + + """ + self._check_column(colname) + + try: + return self._colenums[colname] + except KeyError: + raise TypeError( + "column ``%s`` of table ``%s`` is not of an enumerated type" + % (colname, self._v_pathname) + ) + + def col(self, name: str) -> np.ndarray: + """Get a column from the table. + + If a column called name exists in the table, it is read and returned as + a NumPy object. If it does not exist, a KeyError is raised. + + Examples + -------- + :: + + narray = table.col('var2') + + That statement is equivalent to:: + + narray = table.read(field='var2') + + Here you can see how this method can be used as a shorthand for the + :meth:`Table.read` method. + + """ + return self.read(field=name) + + def __getitem__( + self, key: int | slice | list[int] | list[bool] | np.ndarray + ) -> np.ndarray: + """Get a row or a range of rows from the table. + + If key argument is an integer, the corresponding table row is returned + as a record of the current flavor. If key is a slice, the range of rows + determined by it is returned as a structured array of the current + flavor. + + In addition, NumPy-style point selections are supported. In + particular, if key is a list of row coordinates, the set of rows + determined by it is returned. Furthermore, if key is an array of + boolean values, only the coordinates where key is True are returned. + Note that for the latter to work it is necessary that key list would + contain exactly as many rows as the table has. + + Examples + -------- + :: + + record = table[4] + recarray = table[4:1000:2] + recarray = table[[4,1000]] # only retrieves rows 4 and 1000 + recarray = table[[True, False, ..., True]] + + Those statements are equivalent to:: + + record = table.read(start=4)[0] + recarray = table.read(start=4, stop=1000, step=2) + recarray = table.read_coordinates([4,1000]) + recarray = table.read_coordinates([True, False, ..., True]) + + Here, you can see how indexing can be used as a shorthand for the + :meth:`Table.read` and :meth:`Table.read_coordinates` methods. + + """ + self._g_check_open() + + if is_idx(key): + key = operator.index(key) + + # Index out of range protection + if key >= self.nrows: + raise IndexError("Index out of range") + if key < 0: + # To support negative values + key += self.nrows + start, stop, step = self._process_range(key, key + 1, 1) + return self.read(start, stop, step)[0] + elif isinstance(key, slice): + start, stop, step = self._process_range( + key.start, key.stop, key.step + ) + return self.read(start, stop, step) + # Try with a boolean or point selection + elif type(key) in (list, tuple) or isinstance(key, np.ndarray): + return self._read_coordinates(key, None) + else: + raise IndexError(f"Invalid index or slice: {key!r}") + + def __setitem__( + self, + key: int | slice | list[int] | list[bool] | np.ndarray, + value: Any, + ) -> int: + """Set a row or a range of rows in the table. + + It takes different actions depending on the type of the *key* + parameter: if it is an integer, the corresponding table row is + set to *value* (a record or sequence capable of being converted + to the table structure). If *key* is a slice, the row slice + determined by it is set to *value* (a record array or sequence + capable of being converted to the table structure). + + In addition, NumPy-style point selections are supported. In + particular, if key is a list of row coordinates, the set of rows + determined by it is set to value. Furthermore, if key is an array of + boolean values, only the coordinates where key is True are set to + values from value. Note that for the latter to work it is necessary + that key list would contain exactly as many rows as the table has. + + Examples + -------- + :: + + # Modify just one existing row + table[2] = [456,'db2',1.2] + + # Modify two existing rows + rows = np.rec.array( + [[457,'db1',1.2],[6,'de2',1.3]], formats='i4,S3,f8' + ) + table[1:30:2] = rows # modify a table slice + table[[1,3]] = rows # only modifies rows 1 and 3 + table[[True,False,True]] = rows # only modifies rows 0 and 2 + + Which is equivalent to:: + + table.modify_rows(start=2, rows=[456,'db2',1.2]) + rows = np.rec.array( + [[457,'db1',1.2],[6,'de2',1.3]], formats='i4,S3,f8' + ) + table.modify_rows(start=1, stop=3, step=2, rows=rows) + table.modify_coordinates([1,3,2], rows) + table.modify_coordinates([True, False, True], rows) + + Here, you can see how indexing can be used as a shorthand for the + :meth:`Table.modify_rows` and :meth:`Table.modify_coordinates` + methods. + + """ + self._g_check_open() + self._v_file._check_writable() + + if is_idx(key): + key = operator.index(key) + + # Index out of range protection + if key >= self.nrows: + raise IndexError("Index out of range") + if key < 0: + # To support negative values + key += self.nrows + return self.modify_rows(key, key + 1, 1, [value]) + elif isinstance(key, slice): + start, stop, step = self._process_range( + key.start, key.stop, key.step + ) + return self.modify_rows(start, stop, step, value) + # Try with a boolean or point selection + elif type(key) in (list, tuple) or isinstance(key, np.ndarray): + return self.modify_coordinates(key, value) + else: + raise IndexError(f"Invalid index or slice: {key!r}") + + def _save_buffered_rows(self, wbuf_ra: np.ndarray, lenrows: int) -> None: + """Update the indexes after a flushing of rows.""" + self._open_append(wbuf_ra) + self._append_records(lenrows) + self._close_append() + if self.indexed: + self._unsaved_indexedrows += lenrows + # The table caches for indexed queries are dirty now + self._dirtycache = True + if self.autoindex: + # Flush the unindexed rows + self.flush_rows_to_index(_lastrow=False) + else: + # All the columns are dirty now + self._mark_columns_as_dirty(self.colpathnames) + + def append(self, rows: list | np.ndarray) -> None: + """Append a sequence of rows to the end of the table. + + The rows argument may be any object which can be converted to + a structured array compliant with the table structure + (otherwise, a ValueError is raised). This includes NumPy + structured arrays, lists of tuples or array records, and a + string or Python buffer. + + Examples + -------- + :: + + import tables as tb + + class Particle(tb.IsDescription): + name = tb.StringCol(16, pos=1) # 16-character String + lati = tb.IntCol(pos=2) # integer + longi = tb.IntCol(pos=3) # integer + pressure = tb.Float32Col(pos=4) # float (single-precision) + temperature = tb.FloatCol(pos=5) # double (double-precision) + + fileh = tb.open_file('test4.h5', mode='w') + table = fileh.create_table(fileh.root, 'table', Particle, + "A table") + + # Append several rows in only one call + table.append([("Particle: 10", 10, 0, 10 * 10, 10**2), + ("Particle: 11", 11, -1, 11 * 11, 11**2), + ("Particle: 12", 12, -2, 12 * 12, 12**2)]) + fileh.close() + + """ + self._g_check_open() + self._v_file._check_writable() + + if not self._chunked: + raise HDF5ExtError( + "You cannot append rows to a non-chunked table.", h5bt=False + ) + + if ( + hasattr(rows, "dtype") + and not self.description._v_is_nested + and rows.dtype == self.dtype + ): + # Shortcut for compliant arrays + # (for some reason, not valid for nested types) + wbuf_ra = rows + else: + # Try to convert the object into a recarray compliant with table + try: + iflavor = flavor_of(rows) + if iflavor != "python": + rows = array_as_internal(rows, iflavor) + # Works for Python structures and always copies the original, + # so the resulting object is safe for in-place conversion. + wbuf_ra = np.rec.array(rows, dtype=self._v_dtype) + except Exception as exc: # XXX + raise ValueError( + f"rows parameter cannot be converted into a " + f"recarray object compliant with table '{self}'. " + f"The error was: <{exc}>" + ) + lenrows = wbuf_ra.shape[0] + # If the number of rows to append is zero, don't do anything else + if lenrows > 0: + # Save write buffer to disk + self._save_buffered_rows(wbuf_ra, lenrows) + + def _conv_to_recarr(self, obj: Sequence) -> np.ndarray: + """Try to convert the object into a recarray.""" + try: + iflavor = flavor_of(obj) + if iflavor != "python": + obj = array_as_internal(obj, iflavor) + if hasattr(obj, "shape") and obj.shape == (): + # To allow conversion of scalars (void type) into arrays. + # See http://projects.scipy.org/scipy/numpy/ticket/315 + # for discussion on how to pass buffers to constructors + # See also http://projects.scipy.org/scipy/numpy/ticket/348 + recarr = np.array([obj], dtype=self._v_dtype) + else: + # Works for Python structures and always copies the original, + # so the resulting object is safe for in-place conversion. + recarr = np.rec.array(obj, dtype=self._v_dtype) + except Exception as exc: # XXX + raise ValueError( + f"Object cannot be converted into a recarray object compliant " + f"with table format '{self.description._v_nested_descr}'. " + f"The error was: <{exc}>" + ) + + return recarr + + def modify_coordinates( + self, coords: list | tuple | np.ndarray, rows: Sequence + ) -> int: + """Modify a series of rows in positions specified in coords. + + The values in the selected rows will be modified with the data given in + rows. This method returns the number of rows modified. + + The possible values for the rows argument are the same as in + :meth:`Table.append`. + + """ + if rows is None: # Nothing to be done + return SizeType(0) + + # Convert the coordinates to something expected by HDF5 + coords = self._point_selection(coords) + + lcoords = len(coords) + if len(rows) < lcoords: + raise ValueError( + "The value has not enough elements to fill-in " + "the specified range" + ) + + # Convert rows into a recarray + recarr = self._conv_to_recarr(rows) + + if len(coords) > 0: + # Do the actual update of rows + self._update_elements(lcoords, coords, recarr) + + # Redo the index if needed + self._reindex(self.colpathnames) + + return SizeType(lcoords) + + def modify_rows( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + rows: Sequence | None = None, + ) -> int: + """Modify a series of rows in the slice [start:stop:step]. + + The values in the selected rows will be modified with the data given in + rows. This method returns the number of rows modified. Should the + modification exceed the length of the table, an IndexError is raised + before changing data. + + The possible values for the rows argument are the same as in + :meth:`Table.append`. + + """ + if step is None: + step = 1 + if rows is None: # Nothing to be done + return SizeType(0) + if start is None: + start = 0 + + if start < 0: + raise ValueError("'start' must have a positive value.") + if step < 1: + raise ValueError( + "'step' must have a value greater or equal than 1." + ) + if stop is None: + # compute the stop value. start + len(rows)*step does not work + stop = start + (len(rows) - 1) * step + 1 + + start, stop, step = self._process_range(start, stop, step) + if stop > self.nrows: + raise IndexError( + "This modification will exceed the length of " + "the table. Giving up." + ) + # Compute the number of rows to read. + nrows = len(range(start, stop, step)) + if len(rows) != nrows: + raise ValueError( + "The value has different elements than the specified range" + ) + + # Convert rows into a recarray + recarr = self._conv_to_recarr(rows) + + lenrows = len(recarr) + if start + lenrows > self.nrows: + raise IndexError( + "This modification will exceed the length of the " + "table. Giving up." + ) + + # Do the actual update + self._update_records(start, stop, step, recarr) + + # Redo the index if needed + self._reindex(self.colpathnames) + + return SizeType(lenrows) + + def modify_column( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + column: Sequence | None = None, + colname: str | None = None, + ): + """Modify one single column in the row slice [start:stop:step]. + + The colname argument specifies the name of the column in the + table to be modified with the data given in column. This + method returns the number of rows modified. Should the + modification exceed the length of the table, an IndexError is + raised before changing data. + + The *column* argument may be any object which can be converted + to a (record) array compliant with the structure of the column + to be modified (otherwise, a ValueError is raised). This + includes NumPy (record) arrays, lists of scalars, tuples or + array records, and a string or Python buffer. + + """ + if step is None: + step = 1 + if not isinstance(colname, str): + raise TypeError("The 'colname' parameter must be a string.") + self._v_file._check_writable() + + if column is None: # Nothing to be done + return SizeType(0) + if start is None: + start = 0 + + if start < 0: + raise ValueError("'start' must have a positive value.") + if step < 1: + raise ValueError( + "'step' must have a value greater or equal than 1." + ) + # Get the column format to be modified: + objcol = self._get_column_instance(colname) + descr = [objcol._v_parent._v_nested_descr[objcol._v_pos]] + # Try to convert the column object into a NumPy ndarray + try: + # If the column is a recarray (or kind of), convert into ndarray + if hasattr(column, "dtype") and column.dtype.kind == "V": + column = np.rec.array(column, dtype=descr).field(0) + else: + # Make sure the result is always a *copy* of the original, + # so the resulting object is safe for in-place conversion. + iflavor = flavor_of(column) + column = array_as_internal(column, iflavor) + except Exception as exc: # XXX + raise ValueError( + f"column parameter cannot be converted into a " + f"ndarray object compliant with specified column " + f"'{column}'. The error was: <{exc}>" + ) + + # Get rid of single-dimensional dimensions + column = column.squeeze() + if column.shape == (): + # Oops, stripped off too much dimensions + column.shape = (1,) + + if stop is None: + # compute the stop value. start + len(rows)*step does not work + stop = start + (len(column) - 1) * step + 1 + start, stop, step = self._process_range(start, stop, step) + if stop > self.nrows: + raise IndexError( + "This modification will exceed the length of " + "the table. Giving up." + ) + # Compute the number of rows to read. + nrows = len(range(start, stop, step)) + if len(column) < nrows: + raise ValueError( + "The value has not enough elements to fill-in " + "the specified range" + ) + # Now, read the original values: + mod_recarr = self._read(start, stop, step) + # Modify the appropriate column in the original recarray + mod_col = get_nested_field(mod_recarr, colname) + mod_col[:] = column + # save this modified rows in table + self._update_records(start, stop, step, mod_recarr) + # Redo the index if needed + self._reindex([colname]) + + return SizeType(nrows) + + def modify_columns( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + columns: Sequence | None = None, + names: list[str] | tuple[str, ...] | None = None, + ) -> int: + """Modify a series of columns in the row slice [start:stop:step]. + + The names argument specifies the names of the columns in the + table to be modified with the data given in columns. This + method returns the number of rows modified. Should the + modification exceed the length of the table, an IndexError + is raised before changing data. + + The columns argument may be any object which can be converted + to a structured array compliant with the structure of the + columns to be modified (otherwise, a ValueError is raised). + This includes NumPy structured arrays, lists of tuples or array + records, and a string or Python buffer. + + """ + if step is None: + step = 1 + if type(names) not in (list, tuple): + raise TypeError("The 'names' parameter must be a list of strings.") + + if columns is None: # Nothing to be done + return SizeType(0) + if start is None: + start = 0 + if start < 0: + raise ValueError("'start' must have a positive value.") + if step < 1: + raise ValueError( + "'step' must have a value greater or equal than 1." + ) + descr = [] + for colname in names: + objcol = self._get_column_instance(colname) + descr.append(objcol._v_parent._v_nested_descr[objcol._v_pos]) + # descr.append(objcol._v_parent._v_dtype[objcol._v_pos]) + # Try to convert the columns object into a recarray + try: + # Make sure the result is always a *copy* of the original, + # so the resulting object is safe for in-place conversion. + iflavor = flavor_of(columns) + if iflavor != "python": + columns = array_as_internal(columns, iflavor) + recarray = np.rec.array(columns, dtype=descr) + else: + recarray = np.rec.fromarrays(columns, dtype=descr) + except Exception as exc: # XXX + raise ValueError( + f"columns parameter cannot be converted into a " + f"recarray object compliant with table '{self}'. " + f"The error was: <{exc}>" + ) + + if stop is None: + # compute the stop value. start + len(rows)*step does not work + stop = start + (len(recarray) - 1) * step + 1 + start, stop, step = self._process_range(start, stop, step) + if stop > self.nrows: + raise IndexError( + "This modification will exceed the length of " + "the table. Giving up." + ) + # Compute the number of rows to read. + nrows = len(range(start, stop, step)) + if len(recarray) < nrows: + raise ValueError( + "The value has not enough elements to fill-in " + "the specified range" + ) + # Now, read the original values: + mod_recarr = self._read(start, stop, step) + # Modify the appropriate columns in the original recarray + for i, name in enumerate(recarray.dtype.names): + mod_col = get_nested_field(mod_recarr, names[i]) + mod_col[:] = recarray[name].squeeze() + # save this modified rows in table + self._update_records(start, stop, step, mod_recarr) + # Redo the index if needed + self._reindex(names) + + return SizeType(nrows) + + def flush_rows_to_index(self, _lastrow: bool = True) -> int: + """Add remaining rows in buffers to non-dirty indexes. + + This can be useful when you have chosen non-automatic indexing + for the table (see the :attr:`Table.autoindex` property in + :class:`Table`) and you want to update the indexes on it. + + """ + rowsadded = 0 + if self.indexed: + # Update the number of unsaved indexed rows + start = self._indexedrows + nrows = self._unsaved_indexedrows + for colname, colindexed in self.colindexed.items(): + if colindexed: + col = self.cols._g_col(colname) + if nrows > 0 and not col.index.dirty: + rowsadded = self._add_rows_to_index( + colname, start, nrows, _lastrow, update=True + ) + self._unsaved_indexedrows -= rowsadded + self._indexedrows += rowsadded + return rowsadded + + def _add_rows_to_index( + self, colname: str, start: int, nrows: int, lastrow: bool, update: bool + ) -> int: + """Add more elements to the existing index.""" + # This method really belongs to Column, but since it makes extensive + # use of the table, it gets dangerous when closing the file, since the + # column may be accessing a table which is being destroyed. + index = self.cols._g_col(colname).index + slicesize = index.slicesize + # The next loop does not rely on xrange so that it can + # deal with long ints (i.e. more than 32-bit integers) + # This allows to index columns with more than 2**31 rows + # F. Alted 2005-05-09 + start_lr = index.sorted.nrows * slicesize + indexedrows = start_lr - start + stop = start + nrows - slicesize + 1 + while start_lr < stop: + index.append( + [self._read(start_lr, start_lr + slicesize, 1, colname)], + update=update, + ) + indexedrows += slicesize + start_lr += slicesize + # index the remaining rows in last row + if lastrow and start_lr < self.nrows: + index.append_last_row( + [self._read(start_lr, self.nrows, 1, colname)], update=update + ) + indexedrows += self.nrows - start_lr + return indexedrows + + def remove_rows( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + ) -> int: + """Remove a range of rows in the table. + + If only start is supplied, that row and all following will be deleted. + If a range is supplied, i.e. both the start and stop parameters are + passed, all the rows in the range are removed. + + .. versionchanged:: 3.0 + The start, stop and step parameters now behave like in slice. + + .. seealso:: remove_row() + + Parameters + ---------- + start : int + Sets the starting row to be removed. It accepts negative values + meaning that the count starts from the end. A value of 0 means the + first row. + stop : int + Sets the last row to be removed to stop-1, i.e. the end point is + omitted (in the Python range() tradition). Negative values are also + accepted. If None all rows after start will be removed. + step : int + The step size between rows to remove. + + .. versionadded:: 3.0 + + Examples + -------- + Removing rows from 5 to 10 (excluded):: + + t.remove_rows(5, 10) + + Removing all rows starting from the 10th:: + + t.remove_rows(10) + + Removing the 6th row:: + + t.remove_rows(6, 7) + + .. note:: + + removing a single row can be done using the specific + :meth:`remove_row` method. + + """ + start, stop, step = self._process_range(start, stop, step) + nrows = self._remove_rows(start, stop, step) + # remove_rows is an invalidating index operation + self._reindex(self.colpathnames) + + return SizeType(nrows) + + def remove_row(self, n: int) -> None: + """Remove a row from the table. + + Parameters + ---------- + n : int + The index of the row to remove. + + + .. versionadded:: 3.0 + + Examples + -------- + Remove row 15:: + + table.remove_row(15) + + Which is equivalent to:: + + table.remove_rows(15, 16) + + .. warning:: + + This is not equivalent to:: + + table.remove_rows(15) + + """ + self.remove_rows(start=n, stop=n + 1) + + def _g_update_dependent(self) -> None: + super()._g_update_dependent() + + # Update the new path in columns + self.cols._g_update_table_location(self) + + # Update the new path in the Row instance, if cached. Fixes #224. + if "row" in self.__dict__: + self.__dict__["row"] = tableextension.Row(self) + + def _g_move(self, newparent: Group, newname: str) -> None: + """Move this node in the hierarchy. + + This overloads the Node._g_move() method. + + """ + itgpathname = _index_pathname_of(self) + + # First, move the table to the new location. + super()._g_move(newparent, newname) + + # Then move the associated index group (if any). + try: + itgroup = self._v_file._get_node(itgpathname) + except NoSuchNodeError: + pass + else: + newigroup = self._v_parent + newiname = _index_name_of(self) + itgroup._g_move(newigroup, newiname) + + def _g_remove(self, recursive: bool = False, force: bool = False) -> None: + # Remove the associated index group (if any). + itgpathname = _index_pathname_of(self) + try: + itgroup = self._v_file._get_node(itgpathname) + except NoSuchNodeError: + pass + else: + itgroup._f_remove(recursive=True) + self.indexed = False # there are indexes no more + + # Remove the leaf itself from the hierarchy. + super()._g_remove(recursive, force) + + def _set_column_indexing(self, colpathname: str, indexed: bool) -> None: + """Mark the referred column as indexed or non-indexed.""" + colindexed = self.colindexed + isindexed, wasindexed = bool(indexed), colindexed[colpathname] + if isindexed == wasindexed: + return # indexing state is unchanged + + # Changing the set of indexed columns invalidates the condition cache + self._condition_cache.clear() + colindexed[colpathname] = isindexed + self.indexed = max(colindexed.values()) # this is an OR :) + + def _mark_columns_as_dirty(self, colnames: list[str]) -> None: + """Mark column indexes in `colnames` as dirty.""" + assert len(colnames) > 0 + if self.indexed: + colindexed, cols = self.colindexed, self.cols + # Mark the proper indexes as dirty + for colname in colnames: + if colindexed[colname]: + col = cols._g_col(colname) + col.index.dirty = True + + def _reindex(self, colnames: list[str]) -> None: + """Re-index columns in `colnames` if automatic indexing is true.""" + if self.indexed: + colindexed, cols = self.colindexed, self.cols + colstoindex = [] + # Mark the proper indexes as dirty + for colname in colnames: + if colindexed[colname]: + col = cols._g_col(colname) + col.index.dirty = True + colstoindex.append(colname) + # Now, re-index the dirty ones + if self.autoindex and colstoindex: + self._do_reindex(dirty=True) + # The table caches for indexed queries are dirty now + self._dirtycache = True + + def _do_reindex(self, dirty: bool) -> int: + """Execute common code for `reindex()` and `reindex_dirty()`.""" + indexedrows = 0 + for colname, colindexed in self.colindexed.items(): + if colindexed: + indexcol = self.cols._g_col(colname) + indexedrows = indexcol._do_reindex(dirty) + # Update counters in case some column has been updated + if indexedrows > 0: + self._indexedrows = indexedrows + self._unsaved_indexedrows = self.nrows - indexedrows + + return SizeType(indexedrows) + + def reindex(self) -> None: + """Recompute all the existing indexes in the table. + + This can be useful when you suspect that, for any reason, the + index information for columns is no longer valid and want to + rebuild the indexes on it. + + """ + self._do_reindex(dirty=False) + + def reindex_dirty(self) -> None: + """Recompute the existing indexes in table, *if* they are dirty. + + This can be useful when you have set :attr:`Table.autoindex` + (see :class:`Table`) to false for the table and you want to + update the indexes after an invalidating index operation + (:meth:`Table.remove_rows`, for example). + + """ + self._do_reindex(dirty=True) + + def _g_copy_rows( + self, + obj: Table, + start: int, + stop: int, + step: int, + sortby: Column | str | None, + checkCSI: bool, # noqa: N803 + ) -> None: + """Copy rows from self to object.""" + if sortby is None: + self._g_copy_rows_optim(obj, start, stop, step) + return + lenbuf = self.nrowsinbuf + absstep = step + if step < 0: + absstep = -step + start, stop = stop + 1, start + 1 + if sortby is not None: + index = self._check_sortby_csi(sortby, checkCSI) + for start2 in range(start, stop, absstep * lenbuf): + stop2 = start2 + absstep * lenbuf + if stop2 > stop: + stop2 = stop + # The next 'if' is not needed, but it doesn't bother either + if sortby is None: + rows = self[start2:stop2:step] + else: + coords = index[start2:stop2:step] + rows = self.read_coordinates(coords) + # Save the records on disk + obj.append(rows) + obj.flush() + + def _g_copy_rows_optim( + self, obj: Table, start: int, stop: int, step: int + ) -> None: + """Copy rows from self to object (optimized version).""" + nrowsinbuf = self.nrowsinbuf + obj._open_append(self._v_iobuf) + nrowsdest = obj.nrows + for start2 in range(start, stop, step * nrowsinbuf): + # Save the records on disk + stop2 = start2 + step * nrowsinbuf + if stop2 > stop: + stop2 = stop + # Optimized version (it saves some conversions) + nrows = ((stop2 - start2 - 1) // step) + 1 + self.row._fill_col(self._v_iobuf, start2, stop2, step, None) + # The output buffer is created anew, + # so the operation is safe to in-place conversion. + obj._append_records(nrows) + nrowsdest += nrows + obj._close_append() + + def _g_prop_indexes(self, other: Table) -> None: + """Generate index in `other` table for every indexed column here.""" + oldcols, newcols = self.colinstances, other.colinstances + for colname in newcols: + if isinstance(oldcols[colname], Column): + oldcolindexed = oldcols[colname].is_indexed + if oldcolindexed: + oldcolindex = oldcols[colname].index + newcol = newcols[colname] + newcol.create_index( + kind=oldcolindex.kind, + optlevel=oldcolindex.optlevel, + filters=oldcolindex.filters, + tmp_dir=None, + ) + + def _g_copy_with_stats( + self, + group: Group, + name: str, + start: int, + stop: int, + step: int, + title: str, + filters: Filters | None, + chunkshape: int | tuple[int] | None, + _log: bool, + **kwargs, + ) -> tuple[Table, int]: + """Private part of Leaf.copy() for each kind of leaf.""" + # Get the private args for the Table flavor of copy() + sortby = kwargs.pop("sortby", None) + propindexes = kwargs.pop("propindexes", False) + check_csi = kwargs.pop("checkCSI", False) + # Compute the correct indices. + start, stop, step = self._process_range_read( + start, stop, step, warn_negstep=sortby is None + ) + # And the number of final rows + nrows = len(range(start, stop, step)) + # Create the new table and copy the selected data. + newtable = Table( + group, + name, + self.description, + title=title, + filters=filters, + expectedrows=nrows, + chunkshape=chunkshape, + _log=_log, + ) + self._g_copy_rows(newtable, start, stop, step, sortby, check_csi) + nbytes = newtable.nrows * newtable.rowsize + # Generate equivalent indexes in the new table, if required. + if propindexes and self.indexed: + self._g_prop_indexes(newtable) + return (newtable, nbytes) + + # This overloading of copy is needed here in order to document + # the additional keywords for the Table case. + def copy( + self, + newparent: Group | None = None, + newname: str | None = None, + overwrite: bool = False, + createparents: bool = False, + **kwargs, + ) -> Table: + """Copy this table and return the new one. + + This method has the behavior and keywords described in + :meth:`Leaf.copy`. Moreover, it recognises the following additional + keyword arguments. + + Parameters + ---------- + sortby + If specified, and sortby corresponds to a column with an index, + then the copy will be sorted by this index. If you want to ensure + a fully sorted order, the index must be a CSI one. A reverse + sorted copy can be achieved by specifying a negative value for the + step keyword. If sortby is omitted or None, the original table + order is used. + checkCSI + If true and a CSI index does not exist for the sortby column, an + error will be raised. If false (the default), it does nothing. + You can use this flag in order to explicitly check for the + existence of a CSI index. + propindexes + If true, the existing indexes in the source table are propagated + (created) to the new one. If false (the default), the indexes are + not propagated. + + """ + return super().copy( + newparent, newname, overwrite, createparents, **kwargs + ) + + def flush(self) -> None: + """Flush the table buffers.""" + if self._v_file._iswritable(): + # Flush rows that remains to be appended + if "row" in self.__dict__: + self.row._flush_buffered_rows() + if self.indexed and self.autoindex: + # Flush any unindexed row + rowsadded = self.flush_rows_to_index(_lastrow=True) + assert rowsadded <= 0 or self._indexedrows == self.nrows, ( + "internal error: the number of indexed rows (%d) " + "and rows in the table (%d) is not equal; " + "please report this to the authors." + % (self._indexedrows, self.nrows) + ) + if self._dirtyindexes: + # Finally, re-index any dirty column + self.reindex_dirty() + + super().flush() + + def _g_pre_kill_hook(self) -> None: + """Code to be called before killing the node.""" + # Flush the buffers before to clean-up them + # self.flush() + # It seems that flushing during the __del__ phase is a sure receipt for + # bringing all kind of problems: + # 1. Illegal Instruction + # 2. Malloc(): trying to call free() twice + # 3. Bus Error + # 4. Segmentation fault + # So, the best would be doing *nothing* at all in this __del__ phase. + # As a consequence, the I/O will not be cleaned until a call to + # Table.flush() would be done. This could lead to a potentially large + # memory consumption. + # NOTE: The user should make a call to Table.flush() whenever he has + # finished working with his table. + # I've added a Performance warning in order to compel the user to + # call self.flush() before the table is being preempted. + # F. Alted 2006-08-03 + if ("row" in self.__dict__ and self.row._get_unsaved_nrows() > 0) or ( + self.indexed + and self.autoindex + and (self._unsaved_indexedrows > 0 or self._dirtyindexes) + ): + warnings.warn( + f"table ``{self._v_pathname}`` is being preempted from " + f"alive nodes without its buffers being flushed or with some " + f"index being dirty. This may lead to very " + f"ineficient use of resources and even to fatal " + f"errors in certain situations. Please do a call " + f"to the .flush() or .reindex_dirty() methods on " + f"this table before start using other nodes.", + PerformanceWarning, + ) + # Get rid of the IO buffers (if they have been created at all) + mydict = self.__dict__ + if "_v_iobuf" in mydict: + del mydict["_v_iobuf"] + if "_v_wdflts" in mydict: + del mydict["_v_wdflts"] + + def _f_close(self, flush: bool = True) -> None: + if not self._v_isopen: + return # the node is already closed + + # .. note:: + # + # As long as ``Table`` objects access their indices on closing, + # ``File.close()`` will need to make *two separate passes* + # to first close ``Table`` objects and then ``Index`` hierarchies. + # + + # Flush right now so the row object does not get in the middle. + if flush: + self.flush() + + # Some warnings can be issued after calling `self._g_set_location()` + # in `self.__init__()`. If warnings are turned into exceptions, + # `self._g_post_init_hook` may not be called and `self.cols` not set. + # One example of this is + # ``test_create.createTestCase.test05_maxFieldsExceeded()``. + cols = self.cols + if cols is not None: + cols._g_close() + + # Clean address cache + self._clean_chunk_addrs() + + # Close myself as a leaf. + super()._f_close(False) + + def __repr__(self) -> str: + """Return the string representation of `Table` objects. + + This provides column metainfo in addition to standard __str__. + """ + if self.indexed: + return f"""\ +{self} + description := {self.description!r} + byteorder := {self.byteorder!r} + chunkshape := {self.chunkshape!r} + autoindex := {self.autoindex!r} + colindexes := {_ColIndexes(self.colindexes)!r}""" + else: + return f"""\ +{self} + description := {self.description!r} + byteorder := {self.byteorder!r} + chunkshape := {self.chunkshape!r}""" + + +class Cols: + """Container for columns in a table or nested column. + + This class is used as an *accessor* to the columns in a table or nested + column. It supports the *natural naming* convention, so that you can + access the different columns as attributes which lead to Column instances + (for non-nested columns) or other Cols instances (for nested columns). + + For instance, if table.cols is a Cols instance with a column named col1 + under it, the later can be accessed as table.cols.col1. If col1 is nested + and contains a col2 column, this can be accessed as table.cols.col1.col2 + and so on. Because of natural naming, the names of members start with + special prefixes, like in the Group class (see :ref:`GroupClassDescr`). + + Like the Column class (see :ref:`ColumnClassDescr`), Cols supports item + access to read and write ranges of values in the table or nested column. + + + .. rubric:: Cols attributes + + .. attribute:: _v_colnames + + A list of the names of the columns hanging directly + from the associated table or nested column. The order of + the names matches the order of their respective columns in + the containing table. + + .. attribute:: _v_colpathnames + + A list of the pathnames of all the columns under the + associated table or nested column (in preorder). If it does + not contain nested columns, this is exactly the same as the + :attr:`Cols._v_colnames` attribute. + + .. attribute:: _v_desc + + The associated Description instance (see + :ref:`DescriptionClassDescr`). + + """ + + @property + def _v_table(self) -> Table: + """Return the parent Table instance (see :ref:`TableClassDescr`).""" + return self._v__tableFile._get_node(self._v__tablePath) + + def __init__(self, table: Table, desc: Description) -> None: + dict_ = self.__dict__ + dict_["_v__tableFile"] = table._v_file + dict_["_v__tablePath"] = table._v_pathname + dict_["_v_desc"] = desc + dict_["_v_colnames"] = desc._v_names + dict_["_v_colpathnames"] = table.description._v_pathnames + # Put the column in the local dictionary + for name in desc._v_names: + if name in desc._v_types: + dict_[name] = Column(table, name, desc) + else: + dict_[name] = Cols(table, desc._v_colobjects[name]) + + def _g_update_table_location(self, table: Table) -> None: + """Update the location information about the associated `table`.""" + dict_ = self.__dict__ + dict_["_v__tableFile"] = table._v_file + dict_["_v__tablePath"] = table._v_pathname + + # Update the locations in individual columns. + for colname in self._v_colnames: + dict_[colname]._g_update_table_location(table) + + def __len__(self) -> int: + """Get the number of top level columns in table.""" + return len(self._v_colnames) + + def _f_col(self, colname: str) -> Cols: + """Get an accessor to the column colname. + + This method returns a Column instance (see :ref:`ColumnClassDescr`) if + the requested column is not nested, and a Cols instance (see + :ref:`ColsClassDescr`) if it is. You may use full column pathnames in + colname. + + Calling cols._f_col('col1/col2') is equivalent to using cols.col1.col2. + However, the first syntax is more intended for programmatic use. It is + also better if you want to access columns with names that are not valid + Python identifiers. + + """ + if not isinstance(colname, str): + raise TypeError( + f"Parameter can only be an string. You passed object: " + f"{colname}" + ) + if ( + colname.find("/") > -1 and colname not in self._v_colpathnames + ) and colname not in self._v_colnames: + raise KeyError( + f"Cols accessor " + f"``{self._v__tablePath}.cols{self._v_desc._v_pathname}`` " + f"does not have a column named ``{colname}``" + ) + + return self._g_col(colname) + + def _g_col(self, colname: str) -> Cols: + """Like `self._f_col()` but it does not check arguments.""" + # Get the Column or Description object + inames = colname.split("/") + cols = self + for iname in inames: + cols = cols.__dict__[iname] + return cols + + def __getitem__(self, key: int | slice) -> Any: + """Get a row or a range of rows from a table or nested column. + + If key argument is an integer, the corresponding nested type row is + returned as a record of the current flavor. If key is a slice, the + range of rows determined by it is returned as a structured array of the + current flavor. + + Examples + -------- + :: + + record = table.cols[4] # equivalent to table[4] + recarray = table.cols.Info[4:1000:2] + + Those statements are equivalent to:: + + nrecord = table.read(start=4)[0] + nrecarray = table.read(start=4, stop=1000, step=2).field('Info') + + Here you can see how a mix of natural naming, indexing and slicing can + be used as shorthands for the :meth:`Table.read` method. + + """ + table = self._v_table + nrows = table.nrows + if is_idx(key): + key = operator.index(key) + + # Index out of range protection + if key >= nrows: + raise IndexError("Index out of range") + if key < 0: + # To support negative values + key += nrows + start, stop, step = table._process_range(key, key + 1, 1) + colgroup = self._v_desc._v_pathname + if colgroup == "": # The root group + return table.read(start, stop, step)[0] + else: + crecord = table.read(start, stop, step)[0] + return crecord[colgroup] + elif isinstance(key, slice): + start, stop, step = table._process_range( + key.start, key.stop, key.step + ) + colgroup = self._v_desc._v_pathname + if colgroup == "": # The root group + return table.read(start, stop, step) + else: + crecarray = table.read(start, stop, step) + if hasattr(crecarray, "field"): + return crecarray.field(colgroup) # RecArray case + else: + return get_nested_field(crecarray, colgroup) # numpy case + else: + raise TypeError(f"invalid index or slice: {key!r}") + + def __setitem__(self, key: int | slice, value: Any) -> None: + """Set a row or a range of rows in a table or nested column. + + If key argument is an integer, the corresponding row is set to + value. If key is a slice, the range of rows determined by it is set to + value. + + Examples + -------- + :: + + table.cols[4] = record + table.cols.Info[4:1000:2] = recarray + + Those statements are equivalent to:: + + table.modify_rows(4, rows=record) + table.modify_column(4, 1000, 2, colname='Info', column=recarray) + + Here you can see how a mix of natural naming, indexing and slicing + can be used as shorthands for the :meth:`Table.modify_rows` and + :meth:`Table.modify_column` methods. + + """ + table = self._v_table + nrows = table.nrows + if is_idx(key): + key = operator.index(key) + + # Index out of range protection + if key >= nrows: + raise IndexError("Index out of range") + if key < 0: + # To support negative values + key += nrows + start, stop, step = table._process_range(key, key + 1, 1) + elif isinstance(key, slice): + start, stop, step = table._process_range( + key.start, key.stop, key.step + ) + else: + raise TypeError(f"invalid index or slice: {key!r}") + + # Actually modify the correct columns + colgroup = self._v_desc._v_pathname + if colgroup == "": # The root group + table.modify_rows(start, stop, step, rows=value) + else: + table.modify_column( + start, stop, step, colname=colgroup, column=value + ) + + def _g_close(self) -> None: + # First, close the columns (ie possible indices open) + for col in self._v_colnames: + colobj = self._g_col(col) + if isinstance(colobj, Column): + colobj.close() + # Delete the reference to column + del self.__dict__[col] + else: + colobj._g_close() + + self.__dict__.clear() + + def __str__(self) -> str: + """Return the string representation for this object.""" + # The pathname + descpathname = self._v_desc._v_pathname + if descpathname: + descpathname = "." + descpathname + return ( + f"{self._v__tablePath}.cols{descpathname} " + f"({self.__class__.__name__}), " + f"{len(self._v_colnames)} columns" + ) + + def __repr__(self) -> str: + """Detailed string representation for this object.""" + lines = [f"{self!s}"] + for name in self._v_colnames: + # Get this class name + classname = getattr(self, name).__class__.__name__ + # The type + if name in self._v_desc._v_dtypes: + tcol = self._v_desc._v_dtypes[name] + # The shape for this column + shape = (self._v_table.nrows,) + self._v_desc._v_dtypes[ + name + ].shape + else: + tcol = "Description" + # Description doesn't have a shape currently + shape = () + lines.append(f" {name} ({classname}{shape}, {tcol})") + return "\n".join(lines) + "\n" + + +class Column: + """Accessor for a non-nested column in a table. + + Each instance of this class is associated with one *non-nested* column of a + table. These instances are mainly used to read and write data from the + table columns using item access (like the Cols class - see + :ref:`ColsClassDescr`), but there are a few other associated methods to + deal with indexes. + + .. rubric:: Column attributes + + .. attribute:: descr + + The Description (see :ref:`DescriptionClassDescr`) instance of the + parent table or nested column. + + .. attribute:: name + + The name of the associated column. + + .. attribute:: pathname + + The complete pathname of the associated column (the same as + Column.name if the column is not inside a nested column). + + .. attribute:: attrs + + Column attributes (see :ref:`ColClassDescr`). + + Parameters + ---------- + table + The parent table instance + name + The name of the column that is associated with this object + descr + The parent description object + + """ + + @lazyattr + def dtype(self) -> np.dtype: + """Return the NumPy dtype that most closely matches this column.""" + return self.descr._v_dtypes[self.name].base # Get rid of shape info + + @lazyattr + def type(self) -> str: # noqa: A003 + """Return the PyTables type of the column (a string).""" + return self.descr._v_types[self.name] + + @property + def table(self) -> Table: + """Return the parent Table instance (see :ref:`TableClassDescr`).""" + return self._table_file._get_node(self._table_path) + + @property + def index(self) -> Index | None: + """Return the Index instance associated with this column. + + Return `None` if the column is not indexed. + + See :ref:`IndexClassDescr`. + """ + index_path = _index_pathname_of_column_( + self._table_path, self.pathname + ) + try: + index = self._table_file._get_node(index_path) + except NodeError: + index = None # The column is not indexed + return index + + @lazyattr + def _itemtype(self) -> np.dtype: + return self.descr._v_dtypes[self.name] + + @property + def shape(self) -> tuple[int, ...]: + """Return the shape of this column.""" + return (self.table.nrows,) + self.descr._v_dtypes[self.name].shape + + @property + def is_indexed(self) -> bool: + """Return True if the column is indexed, false otherwise.""" + if self.index is None: + return False + else: + return True + + @property + def maindim(self) -> int: + """Return the dimension along which iterators work. + + Its value is 0 (i.e. the first dimension). + """ + return 0 + + def __init__(self, table: Table, name: str, descr: Description) -> None: + self._table_file = table._v_file + self._table_path = table._v_pathname + self.name = name + """The name of the associated column.""" + self.pathname = descr._v_colobjects[name]._v_pathname + """The complete pathname of the associated column (the same as + Column.name if the column is not inside a nested column).""" + self.descr = descr + """The Description (see :ref:`DescriptionClassDescr`) instance of the + parent table or nested column.""" + self._v_attrs = ColumnAttributeSet(self) + + def _g_update_table_location(self, table: Table) -> None: + """Update the location information about the associated `table`.""" + self._table_file = table._v_file + self._table_path = table._v_pathname + + def __len__(self) -> int: + """Get the number of elements in the column. + + This matches the length in rows of the parent table. + + """ + return self.table.nrows + + def __getitem__(self, key: int | slice) -> np.ndarray: + """Get a row or a range of rows from a column. + + If key argument is an integer, the corresponding element in the column + is returned as an object of the current flavor. If key is a slice, the + range of elements determined by it is returned as an array of the + current flavor. + + Examples + -------- + :: + + print("Column handlers:") + for name in table.colnames: + print(table.cols._f_col(name)) + print("Select table.cols.name[1]-->", table.cols.name[1]) + print("Select table.cols.name[1:2]-->", table.cols.name[1:2]) + print("Select table.cols.name[:]-->", table.cols.name[:]) + print("Select table.cols._f_col('name')[:]-->", + table.cols._f_col('name')[:]) + + The output of this for a certain arbitrary table is:: + + Column handlers: + /table.cols.name (Column(), string, idx=None) + /table.cols.lati (Column(), int32, idx=None) + /table.cols.longi (Column(), int32, idx=None) + /table.cols.vector (Column(2,), int32, idx=None) + /table.cols.matrix2D (Column(2, 2), float64, idx=None) + Select table.cols.name[1]--> Particle: 11 + Select table.cols.name[1:2]--> ['Particle: 11'] + Select table.cols.name[:]--> ['Particle: 10' + 'Particle: 11' 'Particle: 12' + 'Particle: 13' 'Particle: 14'] + Select table.cols._f_col('name')[:]--> ['Particle: 10' + 'Particle: 11' 'Particle: 12' + 'Particle: 13' 'Particle: 14'] + + See the :file:`examples/table2.py` file for a more complete example. + + """ + table = self.table + + # Generalized key support not there yet, but at least allow + # for a tuple with one single element (the main dimension). + # (key,) --> key + if isinstance(key, tuple) and len(key) == 1: + key = key[0] + + if is_idx(key): + key = operator.index(key) + + # Index out of range protection + if key >= table.nrows: + raise IndexError("Index out of range") + if key < 0: + # To support negative values + key += table.nrows + start, stop, step = table._process_range(key, key + 1, 1) + return table.read(start, stop, step, self.pathname)[0] + elif isinstance(key, slice): + start, stop, step = table._process_range( + key.start, key.stop, key.step + ) + return table.read(start, stop, step, self.pathname) + else: + raise TypeError("'%s' key type is not valid in this context" % key) + + def __iter__(self) -> Generator[np.ndarray]: + """Iterate through all items in the column.""" + table = self.table + itemsize = self.dtype.itemsize + nrowsinbuf = table._v_file.params["IO_BUFFER_SIZE"] // itemsize + buf = np.empty((nrowsinbuf,), self._itemtype) + max_row = len(self) + for start_row in range(0, len(self), nrowsinbuf): + end_row = min(start_row + nrowsinbuf, max_row) + buf_slice = buf[0 : end_row - start_row] + table.read( + start_row, end_row, 1, field=self.pathname, out=buf_slice + ) + yield from buf_slice + + def __setitem__(self, key: int | slice, value: Any) -> int: + """Set a row or a range of rows in a column. + + If key argument is an integer, the corresponding element is set to + value. If key is a slice, the range of elements determined by it is + set to value. + + Examples + -------- + :: + + # Modify row 1 + table.cols.col1[1] = -1 + + # Modify rows 1 and 3 + table.cols.col1[1::2] = [2,3] + + Which is equivalent to:: + + # Modify row 1 + table.modify_columns(start=1, columns=[[-1]], names=['col1']) + + # Modify rows 1 and 3 + columns = np.rec.fromarrays([[2,3]], formats='i4') + table.modify_columns(start=1, step=2, columns=columns, + names=['col1']) + + """ + table = self.table + table._v_file._check_writable() + + # Generalized key support not there yet, but at least allow + # for a tuple with one single element (the main dimension). + # (key,) --> key + if isinstance(key, tuple) and len(key) == 1: + key = key[0] + + if is_idx(key): + key = operator.index(key) + + # Index out of range protection + if key >= table.nrows: + raise IndexError("Index out of range") + if key < 0: + # To support negative values + key += table.nrows + return table.modify_column( + key, key + 1, 1, [[value]], self.pathname + ) + elif isinstance(key, slice): + start, stop, step = table._process_range( + key.start, key.stop, key.step + ) + return table.modify_column(start, stop, step, value, self.pathname) + else: + raise ValueError("Non-valid index or slice: %s" % key) + + def create_index( + self, + optlevel: int = 6, + kind: str = "medium", + filters: Filters | None = None, + tmp_dir: str | None = None, + _blocksizes: tuple[int, int, int, int] | None = None, + _testmode: bool = False, + _verbose: bool = False, + ) -> int: + """Create an index for this column. + + .. warning:: + + In some situations it is useful to get a completely sorted + index (CSI). For those cases, it is best to use the + :meth:`Column.create_csindex` method instead. + + Parameters + ---------- + optlevel : int + The optimization level for building the index. The levels range + from 0 (no optimization) up to 9 (maximum optimization). Higher + levels of optimization mean better chances for reducing the entropy + of the index at the price of using more CPU, memory and I/O + resources for creating the index. + kind : str + The kind of the index to be built. It can take the 'ultralight', + 'light', 'medium' or 'full' values. Lighter kinds ('ultralight' + and 'light') mean that the index takes less space on disk, but will + perform queries slower. Heavier kinds ('medium' and 'full') mean + better chances for reducing the entropy of the index (increasing + the query speed) at the price of using more disk space as well as + more CPU, memory and I/O resources for creating the index. + + Note that selecting a full kind with an optlevel of 9 (the maximum) + guarantees the creation of an index with zero entropy, that is, a + completely sorted index (CSI) - provided that the number of rows in + the table does not exceed the 2**48 figure (that is more than 100 + trillions of rows). See :meth:`Column.create_csindex` method for a + more direct way to create a CSI index. + filters : Filters + Specify the Filters instance used to compress the index. If None, + default index filters will be used (currently, zlib level 1 with + shuffling). + tmp_dir + When kind is other than 'ultralight', a temporary file is created + during the index build process. You can use the tmp_dir argument + to specify the directory for this temporary file. The default is + to create it in the same directory as the file containing the + original table. + + """ + kinds = ["ultralight", "light", "medium", "full"] + if kind not in kinds: + raise ValueError("Kind must have any of these values: %s" % kinds) + if not isinstance(optlevel, int) or (optlevel < 0 or optlevel > 9): + raise ValueError( + "Optimization level must be an integer in the range 0-9" + ) + if filters is None: + filters = default_index_filters + if tmp_dir is None: + tmp_dir = str(Path(self._table_file.filename).parent) + else: + if not Path(tmp_dir).is_dir(): + raise ValueError( + f"Temporary directory '{tmp_dir}' does not exist" + ) + if _blocksizes is not None and ( + not isinstance(_blocksizes, tuple) or len(_blocksizes) != 4 + ): + raise ValueError( + "_blocksizes must be a tuple with exactly 4 elements" + ) + idxrows = _column__create_index( + self, optlevel, kind, filters, tmp_dir, _blocksizes, _verbose + ) + return SizeType(idxrows) + + def create_csindex( + self, + filters: Filters | None = None, + tmp_dir: str | None = None, + _blocksizes: tuple[int, int, int, int] | None = None, + _testmode: bool = False, + _verbose: bool = False, + ) -> int: + """Create a completely sorted index (CSI) for this column. + + This method guarantees the creation of an index with zero entropy, that + is, a completely sorted index (CSI) -- provided that the number of rows + in the table does not exceed the 2**48 figure (that is more than 100 + trillions of rows). A CSI index is needed for some table methods (like + :meth:`Table.itersorted` or :meth:`Table.read_sorted`) in order to + ensure completely sorted results. + + For the meaning of filters and tmp_dir arguments see + :meth:`Column.create_index`. + + Notes + ----- + This method is equivalent to + Column.create_index(optlevel=9, kind='full', ...). + + """ + return self.create_index( + kind="full", + optlevel=9, + filters=filters, + tmp_dir=tmp_dir, + _blocksizes=_blocksizes, + _testmode=_testmode, + _verbose=_verbose, + ) + + def _do_reindex(self, dirty: bool) -> int: + """Execute common code for reindex() and reindex_dirty() codes.""" + index = self.index + dodirty = True + if dirty and not index.dirty: + dodirty = False + if index is not None and dodirty: + self._table_file._check_writable() + # Get the old index parameters + kind = index.kind + optlevel = index.optlevel + filters = index.filters + # We *need* to tell the index that it is going to be undirty. + # This is needed here so as to unnail() the condition cache. + index.dirty = False + # Delete the existing Index + index._f_remove() + # Create a new Index with the previous parameters + return SizeType( + self.create_index( + kind=kind, optlevel=optlevel, filters=filters + ) + ) + else: + return SizeType(0) # The column is not intended for indexing + + def reindex(self) -> None: + """Recompute the index associated with this column. + + This can be useful when you suspect that, for any reason, + the index information is no longer valid and you want to rebuild it. + + This method does nothing if the column is not indexed. + + """ + self._do_reindex(dirty=False) + + def reindex_dirty(self) -> None: + """Recompute the associated index only if it is dirty. + + This can be useful when you have set :attr:`Table.autoindex` to false + for the table and you want to update the column's index after an + invalidating index operation (like :meth:`Table.remove_rows`). + + This method does nothing if the column is not indexed. + + """ + self._do_reindex(dirty=True) + + def remove_index(self) -> None: + """Remove the index associated with this column. + + This method does nothing if the column is not indexed. The removed + index can be created again by calling the :meth:`Column.create_index` + method. + + """ + self._table_file._check_writable() + + # Remove the index if existing. + if self.is_indexed: + index = self.index + index._f_remove() + self.table._set_column_indexing(self.pathname, False) + + def close(self) -> None: + """Close the column.""" + self.__dict__.clear() + + def __str__(self) -> str: + """Return the string representation for this object.""" + return ( + f"{self._table_path}.cols.{self.pathname.replace('/', '.')} " + f"({self.__class__.__name__}{self.shape}, " + f"{self.descr._v_types[self.name]}, idx={self.index})" + ) + + def __repr__(self) -> str: + """Detailed string representation for this object.""" + return str(self) + + @lazyattr + def _v_pos(self) -> int: + return self.descr._v_colobjects[self.name]._v_pos + + @lazyattr + def _v_col_attrs(self) -> dict: + return self.descr._v_colobjects[self.name]._v_col_attrs + + @property + def attrs(self) -> ColumnAttributeSet: + """Column attributes.""" + return self._v_attrs + + +class ColumnAttributeSet: + """Column attribute set.""" + + def __init__(self, column: Column) -> None: + + self.__dict__["_v_tableattrs"] = column.table.attrs + self.__dict__["_v_fieldindex"] = column._v_pos + self.__dict__["_v_column_reference"] = weakref.ref(column) + + # Check if this column has _v_col_attrs set and translate them into + # the table attribute format + for col_attr_key, col_attr_val in column._v_col_attrs.items(): + self.__setitem__(col_attr_key, col_attr_val) + + def issystemcolumnname(self, key: str) -> bool: + """Check whether a key is a reserved attribute name.""" + return key in ["_v_tableattrs", "_v_fieldindex", "_v_column_reference"] + + def _prefix(self, string: str) -> str: + """Prefix keys with a special pattern for storing table attributes.""" + field_index = self.__dict__["_v_fieldindex"] + return "FIELD_%i_ATTR_%s" % (field_index, string) + + def __getattr__(self, key: str) -> Any: + """Retrieve a PyTables attribute for this column.""" + if not self.issystemcolumnname(key): + return getattr(self._v_tableattrs, self._prefix(key)) + else: + return super().__getattr__(key) + + def __setattr__(self, key: str, val: Any) -> Any: + """Set a PyTables attribute for this column.""" + if not self.issystemcolumnname(key): + setattr(self._v_tableattrs, self._prefix(key), val) + else: + return super().__setattr__(key, val) + + def __getitem__(self, key: str) -> Any: + """Dictionary-like interface for __getattr__.""" + if not self.issystemcolumnname(key): + return self._v_tableattrs[self._prefix(key)] + else: + return self[key] + + def __setitem__(self, key: str, value: Any) -> None: + """Dictionary-like interface for __setattr__.""" + if not self.issystemcolumnname(key): + self._v_tableattrs[self._prefix(key)] = value + else: + self[key] = value + + def __delattr__(self, key: str) -> None: + """Delete the attribute for this column.""" + if self.issystemcolumnname(key): + raise TypeError("Deleting system attributes is prohibited") + else: + delattr(self._v_tableattrs, self._prefix(key)) + + def __delitem__(self, key: str) -> None: + """Dictionary-like interface for __delattr__.""" + if self.issystemcolumnname(key): + raise TypeError("Deleting system attributes is prohibited") + else: + del self._v_tableattrs[self._prefix(key)] + + def _f_rename(self, oldattrname: str, newattrname: str) -> None: + """Rename an attribute from oldattrname to newattrname.""" + if oldattrname == newattrname: + # Do nothing + return + + if self.issystemcolumnname(oldattrname): + raise TypeError("Renaming system attributes is prohibited") + + # First, fetch the value of the oldattrname + attrvalue = getattr(self, oldattrname) + + # Now, create the new attribute + setattr(self, newattrname, attrvalue) + + # Finally, remove the old attribute + delattr(self, oldattrname) + + def _f_copy(self, where: Column) -> None: + """Copy attributes to another column.""" + # Is there a better way to do this? + if not isinstance(where, Column): + raise TypeError(f"destination object is not a column: {where!r}") + + for key in self.keys(): + where.attrs[key] = self[key] + + def keys(self) -> list[str]: + """Return the list of attributes for this column.""" + col_prefix = self._prefix("") + length = len(col_prefix) + return [ + key[length:] + for key in self._v_tableattrs._v_attrnames + if key.startswith(col_prefix) + ] + + def contains(self, key: str) -> bool: + """Return whether a key is in the attribute set.""" + return key in self.keys() + + def __str__(self) -> str: + """Return the string representation for this object.""" + pathname = self._v_tableattrs._v__nodepath + classname = self._v_column_reference().__class__.__name__ + # self._v_tableattrs._v_node.__class__.__name__ + attrnumber = sum(1 for _ in self.keys()) + columnname = self._v_column_reference().name + + return ( + f"{pathname}.cols.{columnname}._v_attrs ({classname}), " + f"{attrnumber} attributes" + ) + + def __repr__(self) -> str: + """Detailed string representation for this object.""" + # print additional info only if there are attributes to show + attrnames = self.keys() + if attrnames: + rep = [f"{attr} := {getattr(self, attr)!r}" for attr in attrnames] + return f"{self!s}:\n [" + ",\n ".join(rep) + "]" + else: + return str(self) diff --git a/venv/Lib/site-packages/tables/tableextension.pyd b/venv/Lib/site-packages/tables/tableextension.pyd new file mode 100644 index 0000000..8ed95b6 Binary files /dev/null and b/venv/Lib/site-packages/tables/tableextension.pyd differ diff --git a/venv/Lib/site-packages/tables/tableextension.pyx b/venv/Lib/site-packages/tables/tableextension.pyx new file mode 100644 index 0000000..8314820 --- /dev/null +++ b/venv/Lib/site-packages/tables/tableextension.pyx @@ -0,0 +1,1793 @@ +######################################################################## +# +# License: BSD +# Created: June 17, 2005 +# Author: Francesc Alted - faltet@pytables.com +# +# $Id$ +# +######################################################################## + +"""Here is where Table and Row extension types live. + +Classes (type extensions): + + Table + Row + +Functions: + +Misc variables: + +""" +import os +import sys +import math +import platform +from time import time + +import numpy as np + +from .utils import SizeType +from .conditions import call_on_recarr +from .exceptions import HDF5ExtError +from .description import Col +from .utilsextension import ( + get_nested_field, + atom_from_hdf5_type, + create_nested_type, + hdf5_to_np_ext_type, + platform_byteorder, + pttype_to_hdf5, + pt_special_kinds, + npext_prefixes_to_ptkinds, + hdf5_class_to_string, + H5T_STD_I64, +) + +from numpy cimport ( + import_array, + ndarray, + npy_intp, + PyArray_GETITEM, + PyArray_SETITEM, + PyArray_BYTES, + PyArray_DATA, + PyArray_NDIM, + PyArray_STRIDE, +) +from cpython cimport PyErr_Clear +from libc.stdio cimport snprintf +from libc.stdint cimport int32_t +from libc.stdlib cimport malloc, free +from libc.string cimport memcpy, strdup, strcmp, strlen + +from .definitions cimport ( + hid_t, + herr_t, + hsize_t, + haddr_t, + htri_t, + hbool_t, + H5F_ACC_RDONLY, + H5P_DEFAULT, + H5D_CHUNKED, + H5T_DIR_DEFAULT, + H5F_SCOPE_LOCAL, + H5F_SCOPE_GLOBAL, + H5T_COMPOUND, + H5Tget_order, + H5Fflush, + H5Dget_create_plist, + H5T_ORDER_LE, + H5D_layout_t, + H5Dopen, + H5Dclose, + H5Dread, + H5Dget_type, + H5Dget_space, + H5Pget_layout, + H5Pget_chunk, + H5Pclose, + H5Sget_simple_extent_ndims, + H5Sget_simple_extent_dims, + H5Sclose, + H5T_class_t, + H5Tget_size, + H5Tset_size, + H5Tcreate, + H5Tcopy, + H5Tclose, + H5Tget_nmembers, + H5Tget_member_name, + H5Tget_member_type, + H5Tget_native_type, + H5Tget_member_offset, + H5Tinsert, + H5Tget_class, + H5Tget_super, + H5Tget_offset, + H5T_cset_t, + H5T_CSET_ASCII, + H5T_CSET_UTF8, + H5ATTRset_attribute_string, + H5ATTRset_attribute, + get_len_of_range, + get_order, + set_order, + is_complex, + conv_float64_timeval32, + truncate_dset, + H5free_memory, +) + +# numpy functions & objects +from .hdf5extension cimport Leaf +from .utilsextension cimport get_native_type, cstr_to_pystr +from .lrucacheextension cimport ObjectCache, NumCache + +#----------------------------------------------------------------- + +# Optimized HDF5 API for PyTables +cdef extern from "H5TB-opt.h" nogil: + + ctypedef struct chunk_iter_op: + size_t itemsize + size_t chunkshape + haddr_t *addrs + + int fill_chunk_addrs(hid_t dataset_id, hsize_t nchunks, chunk_iter_op *chunk_op) + int clean_chunk_addrs(chunk_iter_op *chunk_op) + + herr_t H5TBOmake_table( char *table_title, hid_t loc_id, char *dset_name, + char *version, char *class_, + hid_t mem_type_id, hsize_t nrecords, + hsize_t chunk_size, hsize_t block_size, + void *fill_data, int compress, + char *complib, int shuffle, int fletcher32, + hbool_t track_times, hbool_t blosc2_support, + void *data ) + + herr_t H5TBOread_records( char* filename, hbool_t blosc2_support, + chunk_iter_op chunk_op, + hid_t dataset_id, hid_t mem_type_id, + hsize_t start, hsize_t nrecords, void *data ) + + herr_t H5TBOread_elements( hid_t dataset_id, hid_t mem_type_id, + hsize_t nrecords, void *coords, void *data ) + + herr_t H5TBOappend_records( hbool_t blosc2_support, hid_t dataset_id, + hid_t mem_type_id, hsize_t start, + hsize_t nrecords, void *data ) + + herr_t H5TBOwrite_records ( hbool_t blosc2_support, hid_t dataset_id, + hid_t mem_type_id, hsize_t start, + hsize_t nrecords, hsize_t step, void *data ) + + herr_t write_records_blosc2( hid_t dataset_id, hid_t mem_type_id, + hsize_t start, hsize_t nrecords, + const void *data ) + + herr_t H5TBOwrite_elements( hid_t dataset_id, hid_t mem_type_id, + hsize_t nrecords, void *coords, void *data ) + + herr_t H5TBOdelete_records( char* filename, hbool_t blosc2_support, + chunk_iter_op chunk_op, + hid_t dataset_id, hid_t mem_type_id, + hsize_t ntotal_records, size_t src_size, + hsize_t start, hsize_t nrecords, + hsize_t maxtuples ) + + +#---------------------------------------------------------------------------- + +# Initialization code + +# The numpy API requires this function to be called before +# using any numpy facilities in an extension module. +import_array() + +#------------------------------------------------------------- + + +# Private functions +cdef get_nested_field_cache(recarray, fieldname, fieldcache): + """Get the maybe nested field named `fieldname` from the `recarray`. + + The `fieldname` may be a simple field name or a nested field name with + slah-separated components. It can also be an integer specifying the position + of the field. + + """ + + try: + field = fieldcache[fieldname] + except KeyError: + # Check whether fieldname is an integer and if so, get the field + # straight from the recarray dictionary (it can't be anywhere else) + if isinstance(fieldname, int): + field = recarray[fieldname] + else: + field = get_nested_field(recarray, fieldname) + fieldcache[fieldname] = field + return field + + +cdef join_path(object parent, object name): + if parent == "": + return name + else: + return parent + '/' + name + + +# Public classes + +cdef class Table(Leaf): + # instance variables + cdef void *wbuf + cdef chunk_iter_op chunk_op + cdef hbool_t blosc2_support_read + cdef hbool_t blosc2_support_write + + def _create_table(self, title, complib, obversion): + cdef int offset + cdef int ret + cdef long buflen + cdef hid_t oid + cdef void *data + cdef hsize_t nrows + cdef bytes class_ + cdef ndarray wdflts + cdef void *fill_data + cdef ndarray recarr + cdef object name + cdef bytes encoded_title, encoded_complib, encoded_obversion + cdef char *ctitle = NULL + cdef char *cobversion = NULL + cdef bytes encoded_name + cdef char fieldname[128] + cdef int i + cdef H5T_cset_t cset = H5T_CSET_ASCII + + encoded_title = title.encode('utf-8') + encoded_complib = complib.encode('utf-8') + encoded_obversion = obversion.encode('utf-8') + encoded_name = self.name.encode('utf-8') + + # Get the C pointer + ctitle = encoded_title + cobversion = encoded_obversion + + # Compute the complete compound datatype based on the table description + self.disk_type_id = create_nested_type(self.description, self.byteorder) + #self.type_id = H5Tcopy(self.disk_type_id) + # A H5Tcopy only is not enough, as we want the in-memory type to be + # in the byteorder of the machine (sys.byteorder). + self.type_id = create_nested_type(self.description, sys.byteorder) + + # The fill values area + wdflts = self._v_wdflts + if wdflts is None: + fill_data = NULL + else: + fill_data = PyArray_DATA(wdflts) + + # test if there is data to be saved initially + if self._v_recarray is not None: + recarr = self._v_recarray + data = PyArray_DATA(recarr) + else: + data = NULL + + # Decide whether Blosc2 optimized operations can be used. + self.blosc2_support_write = ( + (self.byteorder == sys.byteorder) and + (not self.filters.fletcher32) and + (self.filters.complib is not None) and + (self.filters.complib.startswith("blosc2"))) + # For reading, Windows does not support re-opening a file twice + # in not read-only mode (for good reason), so we cannot use the + # blosc2 opt + self.blosc2_support_read = ( + self.blosc2_support_write and + ((platform.system().lower() != 'windows') or + (self._v_file.mode == 'r'))) + + class_ = self._c_classid.encode('utf-8') + cdef hsize_t blocksize = int(os.environ.get("PT_DEFAULT_B2_BLOCKSIZE", "0")) + self.dataset_id = H5TBOmake_table(ctitle, self.parent_id, encoded_name, + cobversion, class_, self.disk_type_id, + self.nrows, self.chunkshape[0], + blocksize, fill_data, + self.filters.complevel, encoded_complib, + self.filters.shuffle_bitshuffle, + self.filters.fletcher32, + self._want_track_times, + self.blosc2_support_write, data) + if self.dataset_id < 0: + raise HDF5ExtError("Problems creating the table") + + if self._v_file.params['PYTABLES_SYS_ATTRS']: + cset = H5T_CSET_UTF8 + # Set the conforming table attributes + # Attach the CLASS attribute + ret = H5ATTRset_attribute_string(self.dataset_id, "CLASS", class_, + len(class_), cset) + if ret < 0: + raise HDF5ExtError("Can't set attribute '%s' in table:\n %s." % + ("CLASS", self.name)) + # Attach the VERSION attribute + ret = H5ATTRset_attribute_string(self.dataset_id, "VERSION", cobversion, + len(encoded_obversion), cset) + if ret < 0: + raise HDF5ExtError("Can't set attribute '%s' in table:\n %s." % + ("VERSION", self.name)) + # Attach the TITLE attribute + ret = H5ATTRset_attribute_string(self.dataset_id, "TITLE", ctitle, + len(encoded_title), cset) + if ret < 0: + raise HDF5ExtError("Can't set attribute '%s' in table:\n %s." % + ("TITLE", self.name)) + # Attach the NROWS attribute + nrows = self.nrows + ret = H5ATTRset_attribute(self.dataset_id, "NROWS", H5T_STD_I64, + 0, NULL, &nrows) + if ret < 0: + raise HDF5ExtError("Can't set attribute '%s' in table:\n %s." % + ("NROWS", self.name)) + + # Attach the FIELD_N_NAME attributes + # We write only the first level names + for i, name in enumerate(self.description._v_names): + snprintf(fieldname, 128, "FIELD_%d_NAME", i) + encoded_name = name.encode('utf-8') + ret = H5ATTRset_attribute_string(self.dataset_id, fieldname, + encoded_name, len(encoded_name), + cset) + if ret < 0: + raise HDF5ExtError("Can't set attribute '%s' in table:\n %s." % + (fieldname, self.name)) + + # If created in PyTables, the table is always chunked + self._chunked = True # Accessible from python + + # Initialize blosc2 struct for chunk addresses + self.chunk_op = chunk_iter_op(self.description._v_itemsize, self.chunkshape[0], NULL) + + # Finally, return the object identifier. + return self.dataset_id + + + cdef get_nested_type(self, hid_t type_id, hid_t native_type_id, + object colpath, object field_byteorders): + """Open a nested type and return a nested dictionary as description.""" + + cdef hid_t member_type_id, native_member_type_id, member_offset + cdef hsize_t nfields, i + cdef hsize_t dims[1] + cdef size_t itemsize + cdef char *c_colname + cdef H5T_class_t class_id + cdef char c_byteorder2[11] # "irrelevant" fits easily here + cdef char *sys_byteorder + cdef object desc, colobj, colpath2, typeclassname, typeclass + cdef object byteorder + cdef str colname, byteorder2 + + offset = 0 + desc = {} + # Get the number of members + nfields = H5Tget_nmembers(type_id) + + # Iterate through fields to get the correct order that elements may appear in + # The object type can be stored not in order, so order based on the offset in the data + position_order = [] + for i in range(nfields): + member_offset = H5Tget_member_offset(type_id, i) + position_order.append((member_offset, i)) + + position_order.sort() + + # Iterate thru the members + for pos, i in enumerate([x[1] for x in position_order]): + # Get the member name + c_colname = H5Tget_member_name(type_id, i) + colname = cstr_to_pystr(c_colname) + + # Get the member type + member_type_id = H5Tget_member_type(type_id, i) + # Get the member offset + member_offset = H5Tget_member_offset(type_id, i) + # Get the HDF5 class + class_id = H5Tget_class(member_type_id) + if class_id == H5T_COMPOUND and not is_complex(member_type_id): + colpath2 = join_path(colpath, colname) + # Create the native data in-memory + itemsize = H5Tget_size(member_type_id) + native_member_type_id = H5Tcreate(H5T_COMPOUND, itemsize) + desc[colname], itemsize = self.get_nested_type( + member_type_id, native_member_type_id, colpath2, field_byteorders) + desc[colname]["_v_pos"] = pos + desc[colname]["_v_offset"] = member_offset + else: + # Get the member format and the corresponding Col object + try: + native_member_type_id = get_native_type(member_type_id) + atom = atom_from_hdf5_type(native_member_type_id) + colobj = Col.from_atom(atom, pos=pos, _offset=member_offset) + itemsize = H5Tget_size(native_member_type_id) + except TypeError, te: + # Re-raise TypeError again with more info + raise TypeError( + ("table ``%s``, column ``%s``: %%s" % (self.name, colname)) + % te.args[0]) + desc[colname] = colobj + # For time kinds, save the byteorder of the column + # (useful for conversion of time datatypes later on) + if colobj.kind == "time": + colobj._byteorder = H5Tget_order(member_type_id) + if colobj._byteorder == H5T_ORDER_LE: + field_byteorders.append("little") + else: + field_byteorders.append("big") + elif colobj.kind in ['int', 'uint', 'float', 'complex', 'enum']: + # Keep track of the byteorder for this column + get_order(member_type_id, c_byteorder2) + byteorder2 = cstr_to_pystr(c_byteorder2) + if byteorder2 in ["little", "big"]: + field_byteorders.append(byteorder2) + + # Insert the native member + H5Tinsert(native_type_id, c_colname, member_offset, native_member_type_id) + # Update the offset + offset = offset + itemsize + # Release resources + H5Tclose(native_member_type_id) + H5Tclose(member_type_id) + H5free_memory(c_colname) + + # set the byteorder and other things (just in top level) + if colpath == "": + # Compute a byteorder for the entire table + if len(field_byteorders) > 0: + field_byteorders = np.array(field_byteorders) + # Cython doesn't interpret well the extended comparison + # operators so this: field_byteorders == "little" doesn't work + # as expected + if np.all(field_byteorders.__eq__("little")): + byteorder = "little" + elif np.all(field_byteorders.__eq__("big")): + byteorder = "big" + else: # Yes! someone has done it! + byteorder = "mixed" + else: + byteorder = "irrelevant" + self.byteorder = byteorder + + return desc, offset + + def _get_info(self): + """Get info from a table on disk.""" + + cdef hid_t space_id, plist + cdef size_t type_size, size2 + cdef hsize_t dims[1] # enough for unidimensional tables + cdef hsize_t chunksize[1] + cdef H5D_layout_t layout + cdef bytes encoded_name + + # Open the dataset + encoded_name = self.name.encode('utf-8') + self.dataset_id = H5Dopen(self.parent_id, encoded_name, H5P_DEFAULT) + if self.dataset_id < 0: + raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" % + (self.name, self._v_parent._v_pathname)) + + # Get the datatype on disk + self.disk_type_id = H5Dget_type(self.dataset_id) + if H5Tget_class(self.disk_type_id) != H5T_COMPOUND: + raise ValueError("Node ``%s`` is not a Table object" % + (self._v_parent._v_leaves[self.name]._v_pathname)) + # Get the number of rows + space_id = H5Dget_space(self.dataset_id) + H5Sget_simple_extent_dims(space_id, dims, NULL) + self.nrows = SizeType(dims[0]) + # Free resources + H5Sclose(space_id) + + # Get the layout of the datatype + plist = H5Dget_create_plist(self.dataset_id) + layout = H5Pget_layout(plist) + if layout == H5D_CHUNKED: + self._chunked = 1 + # Get the chunksize + H5Pget_chunk(plist, 1, chunksize) + else: + self._chunked = 0 + chunksize[0] = 0 + H5Pclose(plist) + + # Get the type size + type_size = H5Tget_size(self.disk_type_id) + # Create the native data in-memory + self.type_id = H5Tcreate(H5T_COMPOUND, type_size) + # Fill-up the (nested) native type and description + desc, offset = self.get_nested_type(self.disk_type_id, self.type_id, "", []) + + if desc == {}: + raise HDF5ExtError("Problems getting desciption for table %s", self.name) + + if offset < type_size: + # Trailing padding, set the itemsize to the correct type_size (see #765) + desc['_v_itemsize'] = type_size + + # Initialize blosc2 struct for chunk addresses + self.chunk_op = chunk_iter_op(type_size, chunksize[0], NULL) + + # Return the object ID and the description + return (self.dataset_id, desc, SizeType(chunksize[0])) + + cdef _convert_time64_(self, ndarray nparr, hsize_t nrecords, int sense): + """Converts a NumPy of Time64 elements between NumPy and HDF5 formats. + + NumPy to HDF5 conversion is performed when 'sense' is 0. Otherwise, HDF5 + to NumPy conversion is performed. The conversion is done in place, + i.e. 'nparr' is modified. + + """ + + cdef void *t64buf + cdef long byteoffset + cdef npy_intp bytestride, nelements + + byteoffset = 0 # NumPy objects doesn't have an offset + bytestride = PyArray_STRIDE(nparr, 0) # supports multi-dimensional recarray + # Compute the number of elements in the multidimensional cell + nelements = nparr.size // len(nparr) + t64buf = PyArray_DATA(nparr) + + conv_float64_timeval32( + t64buf, byteoffset, bytestride, nrecords, nelements, sense) + + cpdef _convert_types(self, ndarray recarr, hsize_t nrecords, int sense): + """Converts columns in 'recarr' between NumPy and HDF5 formats. + + NumPy to HDF5 conversion is performed when 'sense' is 0. Otherwise, HDF5 + to NumPy conversion is performed. The conversion is done in place, + i.e. 'recarr' is modified. + + """ + + # For reading, first swap the byteorder by hand + # (this is not currently supported by HDF5) + if sense == 1: + for colpathname in self.colpathnames: + if self.coltypes[colpathname] in ["time32", "time64"]: + colobj = self.coldescrs[colpathname] + if hasattr(colobj, "_byteorder"): + if colobj._byteorder != platform_byteorder: + column = get_nested_field(recarr, colpathname) + # Do an *inplace* byteswapping + column.byteswap(True) + + # This should be generalised to support other type conversions. + for t64cname in self._time64colnames: + column = get_nested_field(recarr, t64cname) + self._convert_time64_(column, nrecords, sense) + + def _open_append(self, ndarray recarr): + self._v_recarray = recarr + # Get the pointer to the buffer data area + self.wbuf = PyArray_DATA(recarr) + + def _append_records(self, hsize_t nrecords): + cdef int ret + cdef hsize_t nrows + + # Clean address cache + self._clean_chunk_addrs() + + # Convert some NumPy types to HDF5 before storing. + self._convert_types(self._v_recarray, nrecords, 0) + + nrows = self.nrows + # release GIL (allow other threads to use the Python interpreter) + with nogil: + # Append the records: + ret = H5TBOappend_records(self.blosc2_support_write, self.dataset_id, + self.type_id, nrows, nrecords, self.wbuf) + + if ret < 0: + raise HDF5ExtError("Problems appending the records.") + + self.nrows = self.nrows + nrecords + + def _close_append(self): + cdef hsize_t nrows + + if self._v_file.params['PYTABLES_SYS_ATTRS']: + # Update the NROWS attribute + nrows = self.nrows + if (H5ATTRset_attribute(self.dataset_id, "NROWS", H5T_STD_I64, + 0, NULL, &nrows) < 0): + raise HDF5ExtError("Problems setting the NROWS attribute.") + + # Set the caches to dirty (in fact, and for the append case, + # it should be only the caches based on limits, but anyway) + self._dirtycache = True + self._clean_chunk_addrs() + # Delete the reference to recarray as we doesn't need it anymore + self._v_recarray = None + + def _update_records(self, hsize_t start, hsize_t stop, + hsize_t step, ndarray recarr): + cdef herr_t ret + cdef void *rbuf + cdef hsize_t nrecords, nrows + + # Get the pointer to the buffer data area + rbuf = PyArray_DATA(recarr) + + # Compute the number of records to update + nrecords = len(recarr) + nrows = get_len_of_range(start, stop, step) + if nrecords > nrows: + nrecords = nrows + + # Convert some NumPy types to HDF5 before storing. + self._convert_types(recarr, nrecords, 0) + # Update the records: + with nogil: + ret = H5TBOwrite_records(self.blosc2_support_write and (step == 1), self.dataset_id, + self.type_id, start, nrecords, step, rbuf) + + if ret < 0: + raise HDF5ExtError("Problems updating the records.") + + # Set the caches to dirty + self._dirtycache = True + self._clean_chunk_addrs() + + + def _update_elements(self, hsize_t nrecords, ndarray coords, + ndarray recarr): + cdef herr_t ret + cdef void *rbuf + cdef void *rcoords + + # Get the chunk of the coords that correspond to a buffer + rcoords = PyArray_DATA(coords) + + # Get the pointer to the buffer data area + rbuf = PyArray_DATA(recarr) + + # Convert some NumPy types to HDF5 before storing. + self._convert_types(recarr, nrecords, 0) + + # Update the records: + with nogil: + ret = H5TBOwrite_elements(self.dataset_id, self.type_id, + nrecords, rcoords, rbuf) + + if ret < 0: + raise HDF5ExtError("Problems updating the records.") + + # Set the caches to dirty + self._dirtycache = True + self._clean_chunk_addrs() + + + def _read_records(self, hsize_t start, hsize_t nrecords, ndarray recarr): + cdef void *rbuf + cdef int ret + cdef bytes fname = self._v_file.filename.encode('utf8') + cdef char* filename = fname + + if self.blosc2_support_read: + # Grab the addresses for the blosc2 frames (HDF5 chunks) + nchunks = math.ceil(self.nrows / self.chunkshape[0]) + fill_chunk_addrs(self.dataset_id, nchunks, &self.chunk_op) + + # Correct the number of records to read, if needed + if (start + nrecords) > self.nrows: + nrecords = self.nrows - start + + # Get the pointer to the buffer data area + rbuf = PyArray_DATA(recarr) + + # Read the records from disk + with nogil: + ret = H5TBOread_records(filename, self.blosc2_support_read, self.chunk_op, + self.dataset_id, self.type_id, start, + nrecords, rbuf) + + if ret < 0: + raise HDF5ExtError("Problems reading records.") + + # Convert some HDF5 types to NumPy after reading. + self._convert_types(recarr, nrecords, 1) + + return nrecords + + cdef hsize_t _read_chunk(self, hsize_t nchunk, ndarray iobuf, long cstart): + cdef long nslot + cdef hsize_t start, nrecords, chunkshape + cdef int ret + cdef void *rbuf + cdef NumCache chunkcache + cdef bytes fname = self._v_file.filename.encode('utf8') + cdef char* filename = fname + + if self.blosc2_support_read: + # Grab the addresses for the blosc2 frames (HDF5 chunks) + nchunks = math.ceil(self.nrows / self.chunkshape[0]) + fill_chunk_addrs(self.dataset_id, nchunks, &self.chunk_op) + + chunkcache = self._chunkcache + chunkshape = chunkcache.slotsize + # Correct the number of records to read, if needed + start = nchunk*chunkshape + nrecords = chunkshape + if (start + nrecords) > self.nrows: + nrecords = self.nrows - start + rbuf = PyArray_BYTES(iobuf) + cstart * chunkcache.itemsize + # Try to see if the chunk is in cache + nslot = chunkcache.getslot_(nchunk) + if nslot >= 0: + chunkcache.getitem_(nslot, rbuf, 0) + else: + # Chunk is not in cache. Read it and put it in the LRU cache. + with nogil: + ret = H5TBOread_records(filename, self.blosc2_support_read, self.chunk_op, + self.dataset_id, self.type_id, start, + nrecords, rbuf) + + if ret < 0: + raise HDF5ExtError("Problems reading chunk records.") + nslot = chunkcache.setitem_(nchunk, rbuf, 0) + return nrecords + + def _read_elements(self, ndarray coords, ndarray recarr): + cdef long nrecords + cdef void *rbuf + cdef void *rbuf2 + cdef int ret + + # Get the chunk of the coords that correspond to a buffer + nrecords = coords.size + # Get the pointer to the buffer data area + rbuf = PyArray_DATA(recarr) + # Get the pointer to the buffer coords area + rbuf2 = PyArray_DATA(coords) + + with nogil: + ret = H5TBOread_elements(self.dataset_id, self.type_id, + nrecords, rbuf2, rbuf) + + if ret < 0: + raise HDF5ExtError("Problems reading records.") + + # Convert some HDF5 types to NumPy after reading. + self._convert_types(recarr, nrecords, 1) + + return nrecords + + def _remove_rows(self, hsize_t start, hsize_t stop, long step): + cdef size_t rowsize + cdef hsize_t nrecords=0, nrecords2 + cdef hsize_t i + cdef bytes fname = self._v_file.filename.encode('utf8') + cdef char* filename = fname + + if step == 1: + nrecords = stop - start + rowsize = self.rowsize + # Using self.disk_type_id should be faster (i.e. less conversions) + if (H5TBOdelete_records(filename, self.blosc2_support_read, self.chunk_op, + self.dataset_id, + self.disk_type_id, self.nrows, rowsize, + start, nrecords, self.nrowsinbuf) < 0): + raise HDF5ExtError("Problems deleting records.") + + self.nrows = self.nrows - nrecords + if self._v_file.params['PYTABLES_SYS_ATTRS']: + # Attach the NROWS attribute + nrecords2 = self.nrows + H5ATTRset_attribute(self.dataset_id, "NROWS", H5T_STD_I64, + 0, NULL, &nrecords2) + # Set the caches to dirty + self._dirtycache = True + self._clean_chunk_addrs() + elif step == -1: + nrecords = self._remove_rows(stop+1, start+1, 1) + elif step >= 1: + # always want to go through the space backwards + for i in range(stop - step, start - step, -step): + nrecords += self._remove_rows(i, i+1, 1) + elif step <= -1: + # always want to go through the space backwards + for i in range(start, stop, step): + nrecords += self._remove_rows(i, i+1, 1) + else: + raise ValueError("step size may not be 0.") + + # Return the number of records removed + return nrecords + + # Clean address cache + def _clean_chunk_addrs(self): + clean_chunk_addrs(&self.chunk_op) + + +cdef class Row: + """Table row iterator and field accessor. + + Instances of this class are used to fetch and set the values + of individual table fields. It works very much like a dictionary, + where keys are the pathnames or positions (extended slicing is + supported) of the fields in the associated table in a specific row. + + This class provides an *iterator interface* + so that you can use the same Row instance to + access successive table rows one after the other. There are also + some important methods that are useful for accessing, adding and + modifying values in tables. + + .. rubric:: Row attributes + + .. attribute:: nrow + + The current row number. + + This property is useful for knowing which row is being dealt with in the + middle of a loop or iterator. + + """ + + cdef npy_intp _stride + cdef long _row, _unsaved_nrows, _mod_nrows + cdef long long start, absstep + cdef long long stop, step, nextelement, _nrow, stopb # has to be long long, not hsize_t, for negative step sizes + cdef long long nrowsinbuf, nrows, nrowsread + cdef long long chunksize, nchunksinbuf, totalchunks + cdef long long startb, lenbuf + cdef long long indexchunk + cdef int bufcounter, counter + cdef int exist_enum_cols + cdef int _riterator, _rowsize, _write_to_seqcache + cdef int wherecond, indexed + cdef int ro_filemode, chunked + cdef int _bufferinfo_done, sss_on + cdef long long iterseq_max_elements + cdef ndarray bufcoords, indexvalid, indexvalues, chunkmap + cdef hsize_t *bufcoords_data + cdef hsize_t *index_values_data + cdef char *chunkmap_data + cdef char *index_valid_data + cdef object dtype + cdef object iobuf, iobufcpy + cdef object wrec, wreccpy + cdef object wfields, rfields + cdef object coords + cdef object condfunc, condargs, condkwargs + cdef object mod_elements, colenums + cdef object rfieldscache, wfieldscache + cdef object iterseq + cdef object _table_file, _table_path + cdef object modified_fields + cdef object seqcache_key + + # The nrow() method has been converted into a property, which is handier + property nrow: + """The current row number. + + This property is useful for knowing which row is being dealt with in the + middle of a loop or iterator. + + """ + + def __get__(self): + return SizeType(self._nrow) + + property table: + def __get__(self): + self._table_file._check_open() + return self._table_file._get_node(self._table_path) + + def __cinit__(self, table): + cdef int nfields, i + # Location-dependent information. + self._table_file = table._v_file + self._table_path = table._v_pathname + self._unsaved_nrows = 0 + self._mod_nrows = 0 + self._row = 0 + self._nrow = 0 # Useful in mod_append read iterators + self._riterator = 0 + self._bufferinfo_done = 0 + # Some variables from table will be cached here + if table._v_file.mode == 'r': + self.ro_filemode = 1 + else: + self.ro_filemode = 0 + self.chunked = table._chunked + self.colenums = table._colenums + self.exist_enum_cols = len(self.colenums) + self.nrowsinbuf = table.nrowsinbuf + self.chunksize = table.chunkshape[0] + self.nchunksinbuf = self.nrowsinbuf // self.chunksize + self.dtype = table._v_dtype + self._new_buffer(table) + self.mod_elements = None + self.rfieldscache = {} + self.wfieldscache = {} + self.modified_fields = set() + + def _iter(self, start=0, stop=0, step=1, coords=None, chunkmap=None): + """Return an iterator for traversiong the data in table.""" + self._init_loop(start, stop, step, coords, chunkmap) + return iter(self) + + def __iter__(self): + """Iterator that traverses all the data in the Table""" + return self + + cdef _new_buffer(self, table): + """Create the recarrays for I/O buffering""" + + wdflts = table._v_wdflts + if wdflts is None: + self.wrec = np.zeros(1, dtype=self.dtype) # Defaults are zero + else: + self.wrec = table._v_wdflts.copy() + self.wreccpy = self.wrec.copy() # A copy of the defaults + # Build the wfields dictionary for faster access to columns + self.wfields = {} + for name in self.dtype.names: + self.wfields[name] = self.wrec[name] + + # Get the read buffer for this instance (it is private, remember!) + buff = self.iobuf = table._get_container(self.nrowsinbuf) + # Build the rfields dictionary for faster access to columns + # This is quite fast, as it only takes around 5 us per column + # in my laptop (Pentium 4 @ 2 GHz). + # F. Alted 2006-08-18 + self.rfields = {} + for i, name in enumerate(self.dtype.names): + self.rfields[i] = buff[name] + self.rfields[name] = buff[name] + + # Get the stride of these buffers + self._stride = PyArray_STRIDE(buff, 0) + # The rowsize + self._rowsize = self.dtype.itemsize + self.nrows = table.nrows # This value may change + + cdef _init_loop(self, long long start, long long stop, long long step, + object coords, object chunkmap): + """Initialization for the __iter__ iterator""" + cdef Table table = self.table + self._riterator = 1 # We are inside a read iterator + self.start = start + self.stop = stop + self.step = step + self.coords = coords + self.startb = 0 + if step > 0: + self._row = -1 # a sentinel + self.nrowsread = start + elif step < 0: + self._row = 0 + self.nrowsread = 0 + self.nextelement = start + self._nrow = start - self.step + self.wherecond = 0 + self.indexed = 0 + self.nrows = table.nrows # Update the row counter + if table.blosc2_support_read: + # Grab the addresses for the blosc2 frames (HDF5 chunks) + nchunks = math.ceil(self.nrows / self.table.chunkshape[0]) + fill_chunk_addrs(table.dataset_id, nchunks, &table.chunk_op) + + if coords is not None and 0 < step: + self.nrowsread = start + self.nextelement = start + self.stop = min(stop, len(coords)) + self.absstep = abs(step) + return + elif coords is not None and 0 > step: + #self.nrowsread = 0 + #self.nextelement = start + #self.stop = min(stop, len(coords)) + #self.stop = max(stop, start - len(coords)) + self.absstep = abs(step) + return + + if table._where_condition: + self.wherecond = 1 + #self.condkwargs = {'ex_uses_vml': True} + self.condfunc, self.condargs, self.condkwargs = table._where_condition + table._where_condition = None + + if table._use_index: + # Indexing code depends on this condition (see #319) + assert self.nrowsinbuf % self.chunksize == 0 + self.indexed = 1 + # Compute totalchunks here because self.nrows can change during the + # life of a Row instance. + self.totalchunks = self.nrows // self.chunksize + if self.nrows % self.chunksize: + self.totalchunks = self.totalchunks + 1 + self.nrowsread = 0 + self.nextelement = 0 + self.chunkmap = chunkmap + self.chunkmap_data = PyArray_BYTES(self.chunkmap) + table._use_index = False + self.lenbuf = self.nrowsinbuf + # Check if we have limitations on start, stop, step + self.sss_on = (self.start > 0 or self.stop < self.nrows or self.step > 1) + + self.seqcache_key = table._seqcache_key + table._seqcache_key = None + if self.seqcache_key is not None: + self._write_to_seqcache = 1 + self.iterseq_max_elements = table._v_file.params['ITERSEQ_MAX_ELEMENTS'] + self.iterseq = [] # all the row indexes, unless it would be longer than ITERSEQ_MAX_ELEMENTS + else: + self._write_to_seqcache = 0 + self.iterseq = None + + def __next__(self): + """next() method for __iter__() that is called on each iteration""" + + if not self._riterator: + # The iterator is already exhausted! + raise StopIteration + if self.indexed: + return self.__next__indexed() + elif self.coords is not None: + return self.__next__coords() + elif self.wherecond: + return self.__next__inkernel() + else: + return self.__next__general() + + cdef __next__indexed(self): + """The version of next() for indexed columns and a chunkmap.""" + + cdef long recout, j, cs, vlen, rowsize + cdef long long nchunksread + cdef object tmp_range + cdef Table table + cdef ndarray iobuf + cdef void *IObufData + cdef long nslot + cdef object seq + cdef object seqcache + + assert self.nrowsinbuf >= self.chunksize + while self.nextelement < self.stop: + if self.nextelement >= self.nrowsread: + # Skip until there is interesting information + while self.start > self.nrowsread + self.nrowsinbuf: + self.nrowsread = self.nrowsread + self.nrowsinbuf + self.nextelement = self.nextelement + self.nrowsinbuf + + table = self.table + iobuf = self.iobuf + j = 0; recout = 0; cs = self.chunksize + nchunksread = self.nrowsread // cs + tmp_range = np.arange(0, cs, dtype='int64') + self.bufcoords = np.empty(self.nrowsinbuf, dtype='int64') + # Fetch valid chunks until the I/O buffer is full + while nchunksread < self.totalchunks: + if self.chunkmap_data[nchunksread]: + self.bufcoords[j*cs:(j+1)*cs] = tmp_range + self.nrowsread + # Not optimized read + # recout = recout + table._read_records( + # nchunksread*cs, cs, iobuf[j*cs:]) + # + # Optimized read through the use of a chunk cache. This cache has + # more or less the same speed than the integrated HDF5 chunk + # cache, but using the PyTables one has the advantage that the + # user can easily change this parameter. + recout = recout + table._read_chunk(nchunksread, iobuf, j*cs) + j = j + 1 + self.nrowsread = (nchunksread+1)*cs + if self.nrowsread > self.stop: + self.nrowsread = self.stop + break + elif j == self.nchunksinbuf: + break + nchunksread = nchunksread + 1 + + # Evaluate the condition on this table fragment. + iobuf = iobuf[:recout] + + if len(iobuf) > 0: + self.table._convert_types(iobuf, len(iobuf), 1) + self.indexvalid = call_on_recarr( + self.condfunc, self.condargs, iobuf, **self.condkwargs) + self.index_valid_data = PyArray_BYTES(self.indexvalid) + # Get the valid coordinates + self.indexvalues = self.bufcoords[:recout][self.indexvalid] + self.index_values_data = PyArray_DATA(self.indexvalues) + self.lenbuf = self.indexvalues.size + # Place the valid results at the beginning of the buffer + iobuf[:self.lenbuf] = iobuf[self.indexvalid] + + # Initialize the internal buffer row counter + self._row = -1 + + if self._write_to_seqcache: + # Feed the indexvalues into the seqcache + seqcache = self.iterseq + if self.lenbuf + len(seqcache) < self.iterseq_max_elements: + seqcache.extend(self.indexvalues) + else: + self.iterseq = None + self._write_to_seqcache = 0 + + self._row = self._row + 1 + # Check whether we have read all the rows in buf + if self._row == self.lenbuf: + self.nextelement = self.nrowsread + # Make _row to point to the last valid entry in buffer + # (this is useful for accessing the last row after an iterator loop) + self._row = self._row - 1 + continue + self._nrow = self.index_values_data[self._row] + # Check additional conditions on start, stop, step params + if self.sss_on: + if (self._nrow < self.start or self._nrow >= self.stop): + self.nextelement = self.nextelement + 1 + continue + if (self.step > 1 and + ((self._nrow - self.start) % self.step > 0)): + self.nextelement = self.nextelement + 1 + continue + # Return this row + self.nextelement = self._nrow + 1 + return self + else: + # All the elements have been read for this mode + self._finish_riterator() + + cdef __next__coords(self): + """The version of next() for user-required coordinates""" + cdef int recout + cdef long long lenbuf, nextelement + cdef object tmp + if 0 < self.step: + while self.nextelement < self.stop: + if self.nextelement >= self.nrowsread: + # Correction for avoiding reading past self.stop + if self.nrowsread+self.nrowsinbuf > self.stop: + lenbuf = self.stop-self.nrowsread + else: + lenbuf = self.nrowsinbuf + tmp = self.coords[self.nrowsread:self.nrowsread+lenbuf:self.step] + # We have to get a contiguous buffer, so numpy.array is the way to go + self.bufcoords = np.array(tmp, dtype="uint64") + self._row = -1 + if self.bufcoords.size > 0: + recout = self.table._read_elements(self.bufcoords, self.iobuf) + else: + recout = 0 + self.bufcoords_data = PyArray_DATA(self.bufcoords) + self.nrowsread = self.nrowsread + lenbuf + if recout == 0: + # no items were read, skip out + continue + self._row = self._row + 1 + self._nrow = self.bufcoords_data[self._row] + self.nextelement = self.nextelement + self.absstep + return self + else: + # All the elements have been read for this mode + self._finish_riterator() + elif 0 > self.step: + #print("self.nextelement = ", self.nextelement, self.start, self.nrowsread, self.nextelement < self.start - self.nrowsread + 1) + while self.nextelement > self.stop: + if self.nextelement < self.start - ( self.nrowsread) + 1: + if 0 > self.nextelement - ( self.nrowsinbuf) + 1: + tmp = self.coords[0:self.nextelement + 1] + else: + tmp = self.coords[self.nextelement - ( self.nrowsinbuf) + 1:self.nextelement + 1] + self.bufcoords = np.array(tmp, dtype="uint64") + recout = self.table._read_elements(self.bufcoords, self.iobuf) + self.bufcoords_data = PyArray_DATA(self.bufcoords) + self.nrowsread = self.nrowsread + self.nrowsinbuf + self._row = len(self.bufcoords) - 1 + else: + self._row = (self._row + self.step) % len(self.bufcoords) + + self._nrow = self.nextelement - self.step + self.nextelement = self.nextelement + self.step + # Return this value + return self + else: + # All the elements have been read for this mode + self._finish_riterator() + else: + self._finish_riterator() + + cdef __next__inkernel(self): + """The version of next() in case of in-kernel conditions""" + + cdef hsize_t recout, correct + cdef object numexpr_locals, colvar, col + self.nextelement = self._nrow + self.step + while self.nextelement < self.stop: + if self.nextelement >= self.nrowsread: + # Skip until there is interesting information + while self.nextelement >= self.nrowsread + self.nrowsinbuf: + self.nrowsread = self.nrowsread + self.nrowsinbuf + # Compute the end for this iteration + self.stopb = self.stop - self.nrowsread + if self.stopb > self.nrowsinbuf: + self.stopb = self.nrowsinbuf + self._row = self.startb - self.step + # Read a chunk + recout = self.table._read_records(self.nextelement, self.nrowsinbuf, + self.iobuf) + self.nrowsread = self.nrowsread + recout + self.indexchunk = -self.step + + # Evaluate the condition on this table fragment. + self.indexvalid = call_on_recarr( + self.condfunc, self.condargs, self.iobuf[:recout], **self.condkwargs) + self.index_valid_data = PyArray_BYTES(self.indexvalid) + + # Is there any interesting information in this buffer? + if not np.any(self.indexvalid): + # No, so take the next one + if self.step >= self.nrowsinbuf: + self.nextelement = self.nextelement + self.step + else: + self.nextelement = self.nextelement + self.nrowsinbuf + # Correction for step size > 1 + if self.step > 1: + correct = (self.nextelement - self.start) % self.step + self.nextelement = self.nextelement - correct + continue + + self._row = self._row + self.step + self._nrow = self.nextelement + if self._row + self.step >= self.stopb: + # Compute the start row for the next buffer + self.startb = 0 + + self.nextelement = self._nrow + self.step + # Return only if this value is interesting + self.indexchunk = self.indexchunk + self.step + if self.index_valid_data[self.indexchunk]: + return self + else: + self._finish_riterator() + + cdef __next__general(self): + """The version of next() for the general cases""" + cdef int recout + if 0 < self.step: + self.nextelement = self._nrow + self.step + while self.nextelement < self.stop: + if self.nextelement >= self.nrowsread: + # Skip until there is interesting information + while self.nextelement >= self.nrowsread + self.nrowsinbuf: + self.nrowsread = self.nrowsread + self.nrowsinbuf + # Compute the end for this iteration + self.stopb = self.stop - self.nrowsread + if self.stopb > self.nrowsinbuf: + self.stopb = self.nrowsinbuf + self._row = self.startb - self.step + # Read a chunk + recout = self.table._read_records(self.nrowsread, self.nrowsinbuf, + self.iobuf) + self.nrowsread = self.nrowsread + recout + + self._row = self._row + self.step + self._nrow = self.nextelement + if self._row + self.step >= self.stopb: + # Compute the start row for the next buffer + self.startb = (self._row + self.step) % self.nrowsinbuf + + self.nextelement = self._nrow + self.step + # Return this value + return self + else: + self._finish_riterator() + elif 0 > self.step: + self.stopb = -1 + while self.nextelement - 1 > self.stop: + if self.nextelement < self.start - self.nrowsread + 1: + # Read a chunk + recout = self.table._read_records(self.nextelement - self.nrowsinbuf + 1, + self.nrowsinbuf, self.iobuf) + self.nrowsread = self.nrowsread + self.nrowsinbuf + self._row = self.nrowsinbuf - 1 + else: + self._row = (self._row + self.step) % self.nrowsinbuf + + self._nrow = self.nextelement - self.step + self.nextelement = self.nextelement + self.step + # Return this value + return self + else: + self._finish_riterator() + + cdef _finish_riterator(self): + """Clean-up things after iterator has been done""" + cdef ObjectCache seqcache + cdef Table table = self.table + + self.rfieldscache = {} # empty rfields cache + self.wfieldscache = {} # empty wfields cache + # Make a copy of the last read row in the private record + # (this is useful for accessing the last row after an iterator loop) + if self._row >= 0: + self.wrec[:] = self.iobuf[self._row] + if self._write_to_seqcache: + seqcache = self.table._seqcache + # Guessing iterseq size: Each element in self.iterseq should take at least 8 bytes + seqcache.setitem_(self.seqcache_key, self.iterseq, len(self.iterseq) * 8) + self._riterator = 0 # out of iterator + self.iterseq = None # empty seqcache-related things + self.seqcache_key = None + if self._mod_nrows > 0: # Check if there is some modified row + self._flush_mod_rows() # Flush any possible modified row + self.modified_fields = set() # Empty the set of modified fields + raise StopIteration # end of iteration + + def _fill_col(self, result, start, stop, step, field): + """Read a field from a table on disk and put the result in result""" + + cdef hsize_t startr, istartb + cdef long long istart, inrowsinbuf, inextelement + cdef long long stopr, istopb, i, j, inrowsread + cdef long long istop, istep + cdef object fields + + # We can't reuse existing buffers in this context + self._init_loop(start, stop, step, None, None) + istart, istop, istep = self.start, self.stop, self.step + inrowsinbuf, inextelement, inrowsread = self.nrowsinbuf, istart, istart + istartb, startr = self.startb, 0 + i = istart + if 0 < istep: + while i < istop: + if (inextelement >= inrowsread + inrowsinbuf): + inrowsread = inrowsread + inrowsinbuf + i = i + inrowsinbuf + continue + # Compute the end for this iteration + istopb = istop - inrowsread + if istopb > inrowsinbuf: + istopb = inrowsinbuf + stopr = startr + ((istopb - istartb - 1) // istep) + 1 + # Read a chunk + inrowsread = inrowsread + self.table._read_records(i, inrowsinbuf, + self.iobuf) + # Assign the correct part to result + fields = self.iobuf + if field: + fields = get_nested_field(fields, field) + result[startr:stopr] = fields[istartb:istopb:istep] + + # Compute some indexes for the next iteration + startr = stopr + j = istartb + ((istopb - istartb - 1) // istep) * istep + istartb = (j+istep) % inrowsinbuf + inextelement = inextelement + istep + i = i + inrowsinbuf + elif istep < 0: + inrowsinbuf = self.nrowsinbuf + #istartb = self.startb + istartb = self.nrowsinbuf - 1 + #istopb = self.stopb - 1 + istopb = -1 + startr = 0 + i = istart + inextelement = istart + inrowsread = 0 + while i-1 > istop: + #if (inextelement <= inrowsread + inrowsinbuf): + if (inextelement < i - inrowsinbuf): + inrowsread = inrowsread + inrowsinbuf + i = i - inrowsinbuf + continue + # Compute the end for this iteration + # (we know we are going backward so try to keep indices positive) + stopr = startr + (1 - istopb + istartb) // (-istep) + # Read a chunk + inrowsread = inrowsread + self.table._read_records(i - inrowsinbuf + 1, + inrowsinbuf, self.iobuf) + # Assign the correct part to result + fields = self.iobuf + if field: + fields = get_nested_field(fields, field) + if istopb >= 0: + result[startr:stopr] = fields[istartb:istopb:istep] + else: + result[startr:stopr] = fields[istartb::istep] + + # Compute some indexes for the next iteration + startr = stopr + istartb = (i - istartb)%inrowsinbuf + inextelement = inextelement + istep + i = i - inrowsinbuf + self._riterator = 0 # out of iterator + return + + + def append(self): + """Add a new row of data to the end of the dataset. + + Once you have filled the proper fields for the current + row, calling this method actually appends the new data to the + *output buffer* (which will eventually be + dumped to disk). If you have not set the value of a field, the + default value of the column will be used. + + .. warning:: + + After completion of the loop in which :meth:`Row.append` has + been called, it is always convenient to make a call to + :meth:`Table.flush` in order to avoid losing the last rows that + may still remain in internal buffers. + + Examples + -------- + + :: + + row = table.row + for i in xrange(nrows): + row['col1'] = i-1 + row['col2'] = 'a' + row['col3'] = -1.0 + row.append() + table.flush() + + """ + cdef ndarray iobuf, wrec, wreccpy + + if self.ro_filemode: + raise IOError("Attempt to write over a file opened in read-only mode") + + if not self.chunked: + raise HDF5ExtError("You cannot append rows to a non-chunked table.", + h5tb=False) + + if self._riterator: + raise NotImplementedError("You cannot append rows when in middle of a table iterator. If what you want is to update records, use Row.update() instead.") + + # Commit the private record into the write buffer + # self.iobuf[self._unsaved_nrows] = self.wrec + # The next is faster + iobuf = self.iobuf; wrec = self.wrec + memcpy(PyArray_BYTES(iobuf) + self._unsaved_nrows * self._stride, + PyArray_BYTES(wrec), self._rowsize) + # Restore the defaults for the private record + # self.wrec[:] = self.wreccpy + # The next is faster + wreccpy = self.wreccpy + memcpy(PyArray_BYTES(wrec), PyArray_BYTES(wreccpy), self._rowsize) + self._unsaved_nrows = self._unsaved_nrows + 1 + # When the buffer is full, flush it + if self._unsaved_nrows == self.nrowsinbuf: + self._flush_buffered_rows() + + def _flush_buffered_rows(self): + if self._unsaved_nrows > 0: + self.table._save_buffered_rows(self.iobuf, self._unsaved_nrows) + # Reset the buffer unsaved counter + self._unsaved_nrows = 0 + + + def _get_unsaved_nrows(self): + return self._unsaved_nrows + + + def update(self): + """Change the data of the current row in the dataset. + + This method allows you to modify values in a table when you are in the + middle of a table iterator like :meth:`Table.iterrows` or + :meth:`Table.where`. + + Once you have filled the proper fields for the current row, calling + this method actually changes data in the *output buffer* (which will + eventually be dumped to disk). If you have not set the value of a + field, its original value will be used. + + .. warning:: + + After completion of the loop in which :meth:`Row.update` has + been called, it is always convenient to make a call to + :meth:`Table.flush` in order to avoid losing changed rows that + may still remain in internal buffers. + + Examples + -------- + + :: + + for row in table.iterrows(step=10): + row['col1'] = row.nrow + row['col2'] = 'b' + row['col3'] = 0.0 + row.update() + table.flush() + + which modifies every tenth row in table. Or:: + + for row in table.where('col1 > 3'): + row['col1'] = row.nrow + row['col2'] = 'b' + row['col3'] = 0.0 + row.update() + table.flush() + + which just updates the rows with values bigger than 3 in the first + column. + + """ + + cdef ndarray iobufcpy, iobuf + + if self.ro_filemode: + raise IOError("Attempt to write over a file opened in read-only mode") + + if not self._riterator: + raise NotImplementedError("You are only allowed to update rows through the Row.update() method if you are in the middle of a table iterator.") + + if self.mod_elements is None: + # Initialize an array for keeping the modified elements + # (just in case Row.update() would be used) + self.mod_elements = np.empty(shape=self.nrowsinbuf, dtype=SizeType) + # We need a different copy for self.iobuf here + self.iobufcpy = self.iobuf.copy() + + # Add this row to the list of elements to be modified + self.mod_elements[self._mod_nrows] = self._nrow + # Copy the current buffer row in input to the output buffer + # self.iobufcpy[self._mod_nrows] = self.iobuf[self._row] + # The next is faster + iobufcpy = self.iobufcpy; iobuf = self.iobuf + memcpy(PyArray_BYTES(iobufcpy) + self._mod_nrows * self._stride, + PyArray_BYTES(iobuf) + self._row * self._stride, self._rowsize) + # Increase the modified buffer count by one + self._mod_nrows = self._mod_nrows + 1 + # No point writing seqcache -- Table.flush will invalidate it + # since we no longer know whether this row will meet _where_condition + self._write_to_seqcache = 0 + # When the buffer is full, flush it + if self._mod_nrows == self.nrowsinbuf: + self._flush_mod_rows() + + def _flush_mod_rows(self): + """Flush any possible modified row using Row.update()""" + + table = self.table + # Save the records on disk + table._update_elements(self._mod_nrows, self.mod_elements, self.iobufcpy) + # Reset the counter of modified rows to 0 + self._mod_nrows = 0 + # Mark the modified fields' indexes as dirty. + table._mark_columns_as_dirty(self.modified_fields) + + + def __contains__(self, item): + """__contains__(item) + + A true value is returned if item is found in current row, false + otherwise. + + """ + + return item in self.fetch_all_fields() + + # This method is twice as faster than __getattr__ because there is + # not a lookup in the local dictionary + def __getitem__(self, key): + """__getitem__(key) + + Get the row field specified by the `key`. + + The key can be a string (the name of the field), an integer (the + position of the field) or a slice (the range of field positions). When + key is a slice, the returned value is a *tuple* containing the values + of the specified fields. + + Examples + -------- + + :: + + res = [row['var3'] for row in table.where('var2 < 20')] + + which selects the var3 field for all the rows that fulfil the + condition. Or:: + + res = [row[4] for row in table if row[1] < 20] + + which selects the field in the *4th* position for all the rows that + fulfil the condition. Or:: + + res = [row[:] for row in table if row['var2'] < 20] + + which selects the all the fields (in the form of a *tuple*) for all the + rows that fulfil the condition. Or:: + + res = [row[1::2] for row in table.iterrows(2, 3000, 3)] + + which selects all the fields in even positions (in the form of a + *tuple*) for all the rows in the slice [2:3000:3]. + + """ + + cdef long offset + cdef ndarray field + cdef object row, fields, fieldscache + + if self._riterator: + # If in the middle of an iterator loop, the user probably wants to + # access the read buffer + fieldscache = self.rfieldscache; fields = self.rfields + offset = self._row + else: + # We are not in an iterator loop, so the user probably wants to access + # the write buffer + fieldscache = self.wfieldscache; fields = self.wfields + offset = 0 + + try: + # Check whether this object is in the cache dictionary + field = fieldscache[key] + except (KeyError, TypeError): + try: + # Try to get it from fields (str or int keys) + field = get_nested_field_cache(fields, key, fieldscache) + except TypeError: + # No luck yet. Still, the key can be a slice. + # Fetch the complete row and convert it into a tuple + if self._riterator: + row = self.iobuf[self._row].copy().item() + else: + row = self.wrec[0].copy().item() + # Try with __getitem__() + return row[key] + + if PyArray_NDIM(field) == 1: + # For an scalar it is not needed a copy (immutable object) + return PyArray_GETITEM(field, PyArray_BYTES(field) + offset * self._stride) + else: + # Do a copy of the array, so that it can be overwritten by the user + # without damaging the internal self.rfields buffer + return field[offset].copy() + + # This is slightly faster (around 3%) than __setattr__ + def __setitem__(self, object key, object value): + """__setitem__(key, value) + + Set the key row field to the specified value. + + Differently from its __getitem__() counterpart, in this case key can + only be a string (the name of the field). The changes done via + __setitem__() will not take effect on the data on disk until any of the + :meth:`Row.append` or :meth:`Row.update` methods are called. + + Examples + -------- + + :: + + for row in table.iterrows(step=10): + row['col1'] = row.nrow + row['col2'] = 'b' + row['col3'] = 0.0 + row.update() + table.flush() + + which modifies every tenth row in the table. + + """ + + cdef int ret + cdef long offset + cdef ndarray field + cdef object fields, fieldscache + + if self.ro_filemode: + raise IOError("attempt to write over a file opened in read-only mode") + + if self._riterator: + # If in the middle of an iterator loop, or *after*, the user + # probably wants to access the read buffer + fieldscache = self.rfieldscache; fields = self.rfields + offset = self._row + else: + # We are not in an iterator loop, so the user probably wants to access + # the write buffer + fieldscache = self.wfieldscache; fields = self.wfields + offset = 0 + + # Check validity of enumerated value. + if self.exist_enum_cols: + if key in self.colenums: + enum = self.colenums[key] + for cenval in np.asarray(value).flat: + enum(cenval) # raises ``ValueError`` on invalid values + + # Get the field to be modified + field = get_nested_field_cache(fields, key, fieldscache) + if key not in self.modified_fields: + self.modified_fields.add(key) + + # Finally, try to set it to the value + try: + # Optimization for scalar values. This can optimize the writes + # between a 10% and 100%, depending on the number of columns modified + if PyArray_NDIM(field) == 1: + ret = PyArray_SETITEM(field, PyArray_BYTES(field) + offset * self._stride, value) + if ret < 0: + PyErr_Clear() + raise TypeError + ##### End of optimization for scalar values + else: + field[offset] = value + except TypeError: + raise TypeError("invalid type (%s) for column ``%s``" % (type(value), + key)) + + def fetch_all_fields(self): + """Retrieve all the fields in the current row. + + Contrarily to row[:] (see :ref:`RowSpecialMethods`), this returns row + data as a NumPy void scalar. For instance:: + + [row.fetch_all_fields() for row in table.where('col1 < 3')] + + will select all the rows that fulfill the given condition + as a list of NumPy records. + + """ + + # We need to do a cast for recognizing negative row numbers! + if self._nrow < 0: + return ("Warning: Row iterator has not been initialized for table:\n" + " %s\n" + " You will normally want to use this method in iterator " + "contexts." % self.table) + + # Always return a copy of the row so that new data that is written + # in self.iobuf doesn't overwrite the original returned data. + return self.iobuf[self._row].copy() + + def __str__(self): + """Represent the record as an string""" + + # We need to do a cast for recognizing negative row numbers! + if self._nrow < 0: + return ("Warning: Row iterator has not been initialized for table:\n" + " %s\n" + " You will normally want to use this object in iterator " + "contexts." % self.table) + + tablepathname = self.table._v_pathname + classname = self.__class__.__name__ + return "%s.row (%s), pointing to row #%d" % (tablepathname, classname, + self._nrow) + + def __repr__(self): + """Represent the record as an string""" + + return str(self) + +## Local Variables: +## mode: python +## py-indent-offset: 2 +## tab-width: 2 +## fill-column: 78 +## End: diff --git a/venv/Lib/site-packages/tables/tests/Table2_1_lzo_nrv2e_shuffle.h5 b/venv/Lib/site-packages/tables/tests/Table2_1_lzo_nrv2e_shuffle.h5 new file mode 100644 index 0000000..8020100 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/Table2_1_lzo_nrv2e_shuffle.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/Tables_lzo1.h5 b/venv/Lib/site-packages/tables/tests/Tables_lzo1.h5 new file mode 100644 index 0000000..4d928bc Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/Tables_lzo1.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/Tables_lzo1_shuffle.h5 b/venv/Lib/site-packages/tables/tests/Tables_lzo1_shuffle.h5 new file mode 100644 index 0000000..622518d Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/Tables_lzo1_shuffle.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/Tables_lzo2.h5 b/venv/Lib/site-packages/tables/tests/Tables_lzo2.h5 new file mode 100644 index 0000000..d85e4b1 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/Tables_lzo2.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/Tables_lzo2_shuffle.h5 b/venv/Lib/site-packages/tables/tests/Tables_lzo2_shuffle.h5 new file mode 100644 index 0000000..6fc6b7d Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/Tables_lzo2_shuffle.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/__init__.py b/venv/Lib/site-packages/tables/tests/__init__.py new file mode 100644 index 0000000..7c7ed59 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/__init__.py @@ -0,0 +1,11 @@ +"""Unit tests for PyTables. + +This package contains some modules which provide a ``suite()`` function +(with no arguments) which returns a test suite for some PyTables +functionality. + +""" + +import tables.req_versions # Necessary for the test suite +from tables.tests.common import print_versions +from tables.tests.test_suite import test, suite diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/__init__.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..522f96a Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/__init__.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/check_leaks.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/check_leaks.cpython-311.pyc new file mode 100644 index 0000000..a6bbb15 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/check_leaks.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/common.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/common.cpython-311.pyc new file mode 100644 index 0000000..72dc8dc Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/common.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/create_backcompat_indexes.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/create_backcompat_indexes.cpython-311.pyc new file mode 100644 index 0000000..454a3e4 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/create_backcompat_indexes.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/run_ft.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/run_ft.cpython-311.pyc new file mode 100644 index 0000000..1712503 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/run_ft.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_all.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_all.cpython-311.pyc new file mode 100644 index 0000000..52938e2 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_all.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_array.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_array.cpython-311.pyc new file mode 100644 index 0000000..3385773 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_array.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_attributes.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_attributes.cpython-311.pyc new file mode 100644 index 0000000..fec5ad0 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_attributes.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_aux.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_aux.cpython-311.pyc new file mode 100644 index 0000000..6510242 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_aux.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_backcompat.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_backcompat.cpython-311.pyc new file mode 100644 index 0000000..5f82522 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_backcompat.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_basics.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_basics.cpython-311.pyc new file mode 100644 index 0000000..3f57bc2 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_basics.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_carray.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_carray.cpython-311.pyc new file mode 100644 index 0000000..8882020 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_carray.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_create.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_create.cpython-311.pyc new file mode 100644 index 0000000..7d39923 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_create.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_direct_chunk.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_direct_chunk.cpython-311.pyc new file mode 100644 index 0000000..88db197 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_direct_chunk.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_do_undo.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_do_undo.cpython-311.pyc new file mode 100644 index 0000000..1d72db2 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_do_undo.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_earray.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_earray.cpython-311.pyc new file mode 100644 index 0000000..e7088c8 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_earray.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_enum.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_enum.cpython-311.pyc new file mode 100644 index 0000000..a91dd42 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_enum.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_expression.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_expression.cpython-311.pyc new file mode 100644 index 0000000..1a18068 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_expression.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_garbage.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_garbage.cpython-311.pyc new file mode 100644 index 0000000..17956ff Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_garbage.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_hdf5compat.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_hdf5compat.cpython-311.pyc new file mode 100644 index 0000000..a27a401 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_hdf5compat.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_index_backcompat.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_index_backcompat.cpython-311.pyc new file mode 100644 index 0000000..4c307ac Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_index_backcompat.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_indexes.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_indexes.cpython-311.pyc new file mode 100644 index 0000000..ebc085e Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_indexes.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_indexvalues.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_indexvalues.cpython-311.pyc new file mode 100644 index 0000000..54bbeb0 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_indexvalues.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_large_tables.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_large_tables.cpython-311.pyc new file mode 100644 index 0000000..33aabe6 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_large_tables.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_links.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_links.cpython-311.pyc new file mode 100644 index 0000000..4a73526 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_links.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_lists.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_lists.cpython-311.pyc new file mode 100644 index 0000000..aca5dc1 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_lists.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_nestedtypes.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_nestedtypes.cpython-311.pyc new file mode 100644 index 0000000..8d33923 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_nestedtypes.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_numpy.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_numpy.cpython-311.pyc new file mode 100644 index 0000000..9742874 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_numpy.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_queries.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_queries.cpython-311.pyc new file mode 100644 index 0000000..2e0503c Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_queries.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_suite.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_suite.cpython-311.pyc new file mode 100644 index 0000000..56620fc Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_suite.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_tables.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_tables.cpython-311.pyc new file mode 100644 index 0000000..7eeb19b Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_tables.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_tablesMD.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_tablesMD.cpython-311.pyc new file mode 100644 index 0000000..ec6f270 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_tablesMD.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_timestamps.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_timestamps.cpython-311.pyc new file mode 100644 index 0000000..f76f63e Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_timestamps.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_timetype.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_timetype.cpython-311.pyc new file mode 100644 index 0000000..5ebd991 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_timetype.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_tree.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_tree.cpython-311.pyc new file mode 100644 index 0000000..acfacc6 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_tree.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_types.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_types.cpython-311.pyc new file mode 100644 index 0000000..6c2727b Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_types.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_utils.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_utils.cpython-311.pyc new file mode 100644 index 0000000..6682c6c Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_utils.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/__pycache__/test_vlarray.cpython-311.pyc b/venv/Lib/site-packages/tables/tests/__pycache__/test_vlarray.cpython-311.pyc new file mode 100644 index 0000000..3c6ea81 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/__pycache__/test_vlarray.cpython-311.pyc differ diff --git a/venv/Lib/site-packages/tables/tests/array_mdatom.h5 b/venv/Lib/site-packages/tables/tests/array_mdatom.h5 new file mode 100644 index 0000000..64b0f89 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/array_mdatom.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/attr-u16.h5 b/venv/Lib/site-packages/tables/tests/attr-u16.h5 new file mode 100644 index 0000000..1de849d Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/attr-u16.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/b2nd-no-chunkshape.h5 b/venv/Lib/site-packages/tables/tests/b2nd-no-chunkshape.h5 new file mode 100644 index 0000000..5c6aa08 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/b2nd-no-chunkshape.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/blosc_bigendian.h5 b/venv/Lib/site-packages/tables/tests/blosc_bigendian.h5 new file mode 100644 index 0000000..dce88b1 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/blosc_bigendian.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/bug-idx.h5 b/venv/Lib/site-packages/tables/tests/bug-idx.h5 new file mode 100644 index 0000000..005577f Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/bug-idx.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/check_leaks.py b/venv/Lib/site-packages/tables/tests/check_leaks.py new file mode 100644 index 0000000..d02b394 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/check_leaks.py @@ -0,0 +1,383 @@ +from time import perf_counter as clock +from pathlib import Path + +import tables as tb + +tref = clock() +trel = tref + + +def show_mem(explain): + global tref, trel + + for line in Path("/proc/self/status").read_text().splitlines(): + if line.startswith("VmSize:"): + vmsize = int(line.split()[1]) + elif line.startswith("VmRSS:"): + vmrss = int(line.split()[1]) + elif line.startswith("VmData:"): + vmdata = int(line.split()[1]) + elif line.startswith("VmStk:"): + vmstk = int(line.split()[1]) + elif line.startswith("VmExe:"): + vmexe = int(line.split()[1]) + elif line.startswith("VmLib:"): + vmlib = int(line.split()[1]) + + print("\nMemory usage: ******* %s *******" % explain) + print(f"VmSize: {vmsize:>7} kB\tVmRSS: {vmrss:>7} kB") + print(f"VmData: {vmdata:>7} kB\tVmStk: {vmstk:>7} kB") + print(f"VmExe: {vmexe:>7} kB\tVmLib: {vmlib:>7} kB") + print("WallClock time:", clock() - tref, end=" ") + print(" Delta time:", clock() - trel) + trel = clock() + + +def write_group(filename, nchildren, niter): + for i in range(niter): + fileh = tb.open_file(filename, mode="w") + for child in range(nchildren): + fileh.create_group( + fileh.root, "group" + str(child), "child: %d" % child + ) + show_mem("After creating. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def read_group(filename, nchildren, niter): + for i in range(niter): + fileh = tb.open_file(filename, mode="r") + for child in range(nchildren): + node = fileh.get_node(fileh.root, "group" + str(child)) + assert node is not None + # flavor = node._v_attrs.CLASS + # for child in fileh.walk_nodes(): + # pass + show_mem("After reading metadata. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def write_array(filename, nchildren, niter): + for i in range(niter): + fileh = tb.open_file(filename, mode="w") + for child in range(nchildren): + fileh.create_array( + fileh.root, "array" + str(child), [1, 1], "child: %d" % child + ) + show_mem("After creating. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def read_array(filename, nchildren, niter): + for i in range(niter): + fileh = tb.open_file(filename, mode="r") + for child in range(nchildren): + node = fileh.get_node(fileh.root, "array" + str(child)) + # flavor = node._v_attrs.FLAVOR + data = node[:] # Read data + assert data is not None + show_mem("After reading data. Iter %s" % i) + # for child in range(nchildren): + # node = fileh.get_node(fileh.root, 'array' + str(child)) + # flavor = node._v_attrs.FLAVOR + # # flavor = node._v_attrs + # for child in fileh.walk_nodes(): + # pass + # show_mem("After reading metadata. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def write_carray(filename, nchildren, niter): + for i in range(niter): + fileh = tb.open_file(filename, mode="w") + for child in range(nchildren): + fileh.create_carray( + fileh.root, + "array" + str(child), + tb.IntAtom(), + (2,), + "child: %d" % child, + ) + show_mem("After creating. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def read_carray(filename, nchildren, niter): + for i in range(niter): + fileh = tb.open_file(filename, mode="r") + for child in range(nchildren): + node = fileh.get_node(fileh.root, "array" + str(child)) + # flavor = node._v_attrs.FLAVOR + data = node[:] # Read data + assert data is not None + # print("data-->", data) + show_mem("After reading data. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def write_earray(filename, nchildren, niter): + for i in range(niter): + fileh = tb.open_file(filename, mode="w") + for child in range(nchildren): + ea = fileh.create_earray( + fileh.root, + "array" + str(child), + tb.IntAtom(), + shape=(0,), + title="child: %d" % child, + ) + ea.append([1, 2, 3]) + show_mem("After creating. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def read_earray(filename, nchildren, niter): + for i in range(niter): + fileh = tb.open_file(filename, mode="r") + for child in range(nchildren): + node = fileh.get_node(fileh.root, "array" + str(child)) + # flavor = node._v_attrs.FLAVOR + data = node[:] # Read data + assert data is not None + # print("data-->", data) + show_mem("After reading data. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def write_vlarray(filename, nchildren, niter): + for i in range(niter): + fileh = tb.open_file(filename, mode="w") + for child in range(nchildren): + vl = fileh.create_vlarray( + fileh.root, + "array" + str(child), + tb.IntAtom(), + "child: %d" % child, + ) + vl.append([1, 2, 3]) + show_mem("After creating. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def read_vlarray(filename, nchildren, niter): + for i in range(niter): + fileh = tb.open_file(filename, mode="r") + for child in range(nchildren): + node = fileh.get_node(fileh.root, "array" + str(child)) + # flavor = node._v_attrs.FLAVOR + data = node[:] # Read data + assert data is not None + # print("data-->", data) + show_mem("After reading data. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def write_table(filename, nchildren, niter): + + class Record(tb.IsDescription): + var1 = tb.IntCol(pos=1) + var2 = tb.StringCol(length=1, pos=2) + var3 = tb.FloatCol(pos=3) + + for i in range(niter): + fileh = tb.open_file(filename, mode="w") + for child in range(nchildren): + t = fileh.create_table( + fileh.root, "table" + str(child), Record, "child: %d" % child + ) + t.append([[1, "2", 3.0]]) + show_mem("After creating. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def read_table(filename, nchildren, niter): + for i in range(niter): + fileh = tb.open_file(filename, mode="r") + for child in range(nchildren): + node = fileh.get_node(fileh.root, "table" + str(child)) + # klass = node._v_attrs.CLASS + data = node[:] # Read data + assert data is not None + # print("data-->", data) + show_mem("After reading data. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def write_xtable(filename, nchildren, niter): + + class Record(tb.IsDescription): + var1 = tb.IntCol(pos=1) + var2 = tb.StringCol(length=1, pos=2) + var3 = tb.FloatCol(pos=3) + + for i in range(niter): + fileh = tb.open_file(filename, mode="w") + for child in range(nchildren): + t = fileh.create_table( + fileh.root, "table" + str(child), Record, "child: %d" % child + ) + t.append([[1, "2", 3.0]]) + t.cols.var1.create_index() + show_mem("After creating. Iter %s" % i) + fileh.close() + show_mem("After close") + + +def read_xtable(filename, nchildren, niter): + for i in range(niter): + fileh = tb.open_file(filename, mode="r") + for child in range(nchildren): + node = fileh.get_node(fileh.root, "table" + str(child)) + # klass = node._v_attrs.CLASS + # data = node[:] # Read data + # print("data-->", data) + show_mem("After reading data. Iter %s" % i) + fileh.close() + show_mem("After close") + del node + + +if __name__ == "__main__": + import pstats + import profile as prof + import argparse + + def _get_parser(): + parser = argparse.ArgumentParser( + description="Check for PyTables memory leaks." + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="enable verbose mode" + ) + parser.add_argument( + "-p", "--profile", action="store_true", help="profile" + ) + parser.add_argument( + "-a", + "--array", + action="store_true", + help="create/read arrays (default)", + ) + parser.add_argument( + "-c", "--carray", action="store_true", help="create/read carrays" + ) + parser.add_argument( + "-e", "--earray", action="store_true", help="create/read earrays" + ) + parser.add_argument( + "-l", "--vlarray", action="store_true", help="create/read vlarrays" + ) + parser.add_argument( + "-t", "--table", action="store_true", help="create/read tables" + ) + parser.add_argument( + "-x", + "--indexed-table", + action="store_true", + dest="xtable", + help="create/read indexed-tables", + ) + parser.add_argument( + "-g", "--group", action="store_true", help="create/read groups" + ) + parser.add_argument( + "-r", "--read", action="store_true", help="only read test" + ) + parser.add_argument( + "-w", "--write", action="store_true", help="only write test" + ) + parser.add_argument( + "-n", + "--nchildren", + type=int, + default=1000, + help="number of children (%(default)d is the " "default)", + ) + parser.add_argument( + "-i", + "--niter", + type=int, + default=3, + help="number of iterations (default: %(default)d)", + ) + + parser.add_argument("filename", help="HDF5 file name") + + return parser + + parser = _get_parser() + args = parser.parse_args() + + # set 'array' as default value if no ather option has been specified + for name in ("carray", "earray", "vlarray", "table", "xtable", "group"): + if getattr(args, name): + break + else: + args.array = True + + filename = args.filename + nchildren = args.nchildren + niter = args.niter + + if args.array: + fwrite = "write_array" + fread = "read_array" + elif args.carray: + fwrite = "write_carray" + fread = "read_carray" + elif args.earray: + fwrite = "write_earray" + fread = "read_earray" + elif args.vlarray: + fwrite = "write_vlarray" + fread = "read_vlarray" + elif args.table: + fwrite = "write_table" + fread = "read_table" + elif args.xtable: + fwrite = "write_xtable" + fread = "read_xtable" + elif args.group: + fwrite = "write_group" + fread = "read_group" + + show_mem("Before open") + if args.write: + if args.profile: + prof.run( + str(fwrite) + "(filename, nchildren, niter)", "write_file.prof" + ) + stats = pstats.Stats("write_file.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + if args.verbose: + stats.print_stats() + else: + stats.print_stats(20) + else: + eval(fwrite + "(filename, nchildren, niter)") + if args.read: + if args.profile: + prof.run(fread + "(filename, nchildren, niter)", "read_file.prof") + stats = pstats.Stats("read_file.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + if args.verbose: + print("profile -verbose") + stats.print_stats() + else: + stats.print_stats(20) + else: + eval(fread + "(filename, nchildren, niter)") diff --git a/venv/Lib/site-packages/tables/tests/common.py b/venv/Lib/site-packages/tables/tests/common.py new file mode 100644 index 0000000..a81a456 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/common.py @@ -0,0 +1,393 @@ +"""Utilities for PyTables' test suites.""" + +import os +import re +import sys +import locale +import platform +import tempfile +import unittest +from time import perf_counter as clock +from pathlib import Path + +import numpy as np +import numexpr as ne +from packaging.version import Version + +import tables as tb +from tables.utilsextension import which_lib_version + +hdf5_version = Version(tb.hdf5_version) +blosc_version = Version(which_lib_version("blosc")[1]) +blosc2_version = Version(which_lib_version("blosc2")[1]) + + +verbose = os.environ.get("VERBOSE", "FALSE") == "TRUE" +"""Show detailed output of the testing process.""" + +heavy = False +"""Run all tests even when they take long to complete.""" + +show_memory = False +"""Show the progress of memory consumption.""" + + +def parse_argv(argv): + global verbose, heavy + + if "verbose" in argv: + verbose = True + argv.remove("verbose") + + if "silent" in argv: # take care of old flag, just in case + verbose = False + argv.remove("silent") + + if "--heavy" in argv: + heavy = True + argv.remove("--heavy") + + return argv + + +zlib_avail = which_lib_version("zlib") is not None +lzo_avail = which_lib_version("lzo") is not None +bzip2_avail = which_lib_version("bzip2") is not None +blosc_avail = which_lib_version("blosc") is not None +blosc2_avail = which_lib_version("blosc2") is not None + + +def print_heavy(heavy): + if heavy: + print("""Performing the complete test suite!""") + else: + print("""\ +Performing only a light (yet comprehensive) subset of the test suite. +If you want a more complete test, try passing the --heavy flag to this script +(or set the 'heavy' parameter in case you are using tables.test() call). +The whole suite will take more than 4 hours to complete on a relatively +modern CPU and around 512 MB of main memory.""") + print("-=" * 38) + + +def print_versions(): + """Print all the versions of software that PyTables relies on.""" + + print("-=" * 38) + print("PyTables version: %s" % tb.__version__) + print("HDF5 version: %s" % which_lib_version("hdf5")[1]) + print("NumPy version: %s" % np.__version__) + tinfo = which_lib_version("zlib") + if ne.use_vml: + # Get only the main version number and strip out all the rest + vml_version = ne.get_vml_version() + vml_version = re.findall("[0-9.]+", vml_version)[0] + vml_avail = "using VML/MKL %s" % vml_version + else: + vml_avail = "not using Intel's VML/MKL" + print(f"Numexpr version: {ne.__version__} ({vml_avail})") + if tinfo is not None: + print(f"Zlib version: {tinfo[1]} (in Python interpreter)") + tinfo = which_lib_version("lzo") + if tinfo is not None: + print(f"LZO version: {tinfo[1]} ({tinfo[2]})") + tinfo = which_lib_version("bzip2") + if tinfo is not None: + print(f"BZIP2 version: {tinfo[1]} ({tinfo[2]})") + tinfo = which_lib_version("blosc") + if tinfo is not None: + blosc_date = tinfo[2].split()[1] + print(f"Blosc version: {tinfo[1]} ({blosc_date})") + blosc_cinfo = tb.blosc_get_complib_info() + blosc_cinfo = [f"{k} ({v[1]})" for k, v in sorted(blosc_cinfo.items())] + print("Blosc compressors: %s" % ", ".join(blosc_cinfo)) + blosc_finfo = ["shuffle", "bitshuffle"] + print("Blosc filters: %s" % ", ".join(blosc_finfo)) + tinfo = which_lib_version("blosc2") + if tinfo is not None: + blosc2_date = tinfo[2].split()[1] + print(f"Blosc2 version: {tinfo[1]} ({blosc2_date})") + blosc2_cinfo = tb.blosc2_get_complib_info() + blosc2_cinfo = [ + f"{k} ({v[1]})" for k, v in sorted(blosc2_cinfo.items()) + ] + print("Blosc2 compressors: %s" % ", ".join(blosc2_cinfo)) + blosc2_finfo = ["shuffle", "bitshuffle"] + print("Blosc2 filters: %s" % ", ".join(blosc2_finfo)) + try: + from Cython import __version__ as cython_version + + print("Cython version: %s" % cython_version) + except Exception: + pass + print("Python version: %s" % sys.version) + print("Platform: %s" % platform.platform()) + # if os.name == 'posix': + # (sysname, nodename, release, version, machine) = os.uname() + # print('Platform: %s-%s' % (sys.platform, machine)) + print("Byte-ordering: %s" % sys.byteorder) + print("Detected cores: %s" % tb.utils.detect_number_of_cores()) + print("Default encoding: %s" % sys.getdefaultencoding()) + print("Default FS encoding: %s" % sys.getfilesystemencoding()) + print("Default locale: (%s, %s)" % locale.getdefaultlocale()) + print("-=" * 38) + + # This should improve readability whan tests are run by CI tools + sys.stdout.flush() + + +def test_filename(filename): + from importlib import resources + + return resources.files("tables.tests") / filename + + +def verbosePrint(string, nonl=False): + """Print out the `string` if verbose output is enabled.""" + if not verbose: + return + if nonl: + print(string, end=" ") + else: + print(string) + + +def allequal(a, b, flavor="numpy"): + """Checks if two numerical objects are equal.""" + + # print("a-->", repr(a)) + # print("b-->", repr(b)) + if not hasattr(b, "shape"): + # Scalar case + return a == b + + if (not hasattr(a, "shape") or a.shape == ()) and ( + not hasattr(b, "shape") or b.shape == () + ): + return a == b + + if a.shape != b.shape: + if verbose: + print("Shape is not equal:", a.shape, "!=", b.shape) + return 0 + + # Way to check the type equality without byteorder considerations + if hasattr(b, "dtype") and a.dtype.str[1:] != b.dtype.str[1:]: + if verbose: + print("dtype is not equal:", a.dtype, "!=", b.dtype) + return 0 + + # Rank-0 case + if len(a.shape) == 0: + if a[()] == b[()]: + return 1 + else: + if verbose: + print("Shape is not equal:", a.shape, "!=", b.shape) + return 0 + + # null arrays + if a.size == 0: # len(a) is not correct for generic shapes + if b.size == 0: + return 1 + else: + if verbose: + print("length is not equal") + print("len(a.data) ==>", len(a.data)) + print("len(b.data) ==>", len(b.data)) + return 0 + + # Multidimensional case + result = a == b + result = np.all(result) + if not result and verbose: + print("Some of the elements in arrays are not equal") + + return result + + +def areArraysEqual(arr1, arr2, *, check_type=True): + """Are both `arr1` and `arr2` equal arrays? + + Arguments can be regular NumPy arrays, chararray arrays or + structured arrays (including structured record arrays). They are + checked for type and value equality. + + """ + + t1 = type(arr1) + t2 = type(arr2) + + if check_type and not ( + (hasattr(arr1, "dtype") and arr1.dtype == arr2.dtype) + or issubclass(t1, t2) + or issubclass(t2, t1) + ): + return False + + return np.all(arr1 == arr2) + + +class PyTablesTestCase(unittest.TestCase): + def tearDown(self): + super().tearDown() + for key in self.__dict__: + if self.__dict__[key].__class__.__name__ != "instancemethod": + self.__dict__[key] = None + + def _getName(self): + """Get the name of this test case.""" + return self.id().split(".")[-2] + + def _getMethodName(self): + """Get the name of the method currently running in the test case.""" + return self.id().split(".")[-1] + + def _verboseHeader(self): + """Print a nice header for the current test method if verbose.""" + + if verbose: + name = self._getName() + methodName = self._getMethodName() + + title = f"Running {name}.{methodName}" + print("{}\n{}".format(title, "-" * len(title))) + + def _checkEqualityGroup(self, node1, node2, hardlink=False): + if verbose: + print("Group 1:", node1) + print("Group 2:", node2) + if hardlink: + self.assertTrue( + node1._v_pathname != node2._v_pathname, + "node1 and node2 have the same pathnames.", + ) + else: + self.assertTrue( + node1._v_pathname == node2._v_pathname, + "node1 and node2 does not have the same pathnames.", + ) + self.assertTrue( + node1._v_children == node2._v_children, + "node1 and node2 does not have the same children.", + ) + + def _checkEqualityLeaf(self, node1, node2, hardlink=False): + if verbose: + print("Leaf 1:", node1) + print("Leaf 2:", node2) + if hardlink: + self.assertTrue( + node1._v_pathname != node2._v_pathname, + "node1 and node2 have the same pathnames.", + ) + else: + self.assertTrue( + node1._v_pathname == node2._v_pathname, + "node1 and node2 does not have the same pathnames.", + ) + self.assertTrue( + areArraysEqual(node1[:], node2[:]), + "node1 and node2 does not have the same values.", + ) + + +class TestFileMixin: + h5fname = None + open_kwargs = {} + + def setUp(self): + super().setUp() + self.h5file = tb.open_file( + self.h5fname, title=self._getName(), **self.open_kwargs + ) + + def tearDown(self): + """Close ``h5file``.""" + + self.h5file.close() + super().tearDown() + + +class TempFileMixin: + open_mode = "w" + open_kwargs = {} + + def _getTempFileName(self): + return tempfile.mktemp(prefix=self._getName(), suffix=".h5") + + def setUp(self): + """Set ``h5file`` and ``h5fname`` instance attributes. + + * ``h5fname``: the name of the temporary HDF5 file. + * ``h5file``: the writable, empty, temporary HDF5 file. + + """ + + super().setUp() + self.h5fname = self._getTempFileName() + self.h5file = tb.open_file( + self.h5fname, + self.open_mode, + title=self._getName(), + **self.open_kwargs, + ) + + def tearDown(self): + """Close ``h5file`` and remove ``h5fname``.""" + + self.h5file.close() + self.h5file = None + Path(self.h5fname).unlink() # comment this for debug only + super().tearDown() + + def _reopen(self, mode="r", **kwargs): + """Reopen ``h5file`` in the specified ``mode``. + + Returns a true or false value depending on whether the file was + reopenend or not. If not, nothing is changed. + + """ + + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, mode, **kwargs) + return True + + +class ShowMemTime(PyTablesTestCase): + tref = clock() + """Test for showing memory and time consumption.""" + + def test00(self): + """Showing memory and time consumption.""" + + # Obtain memory info (only for Linux 2.6.x) + for line in Path("/proc/self/status").read_text().splitlines(): + if line.startswith("VmSize:"): + vmsize = int(line.split()[1]) + elif line.startswith("VmRSS:"): + vmrss = int(line.split()[1]) + elif line.startswith("VmData:"): + vmdata = int(line.split()[1]) + elif line.startswith("VmStk:"): + vmstk = int(line.split()[1]) + elif line.startswith("VmExe:"): + vmexe = int(line.split()[1]) + elif line.startswith("VmLib:"): + vmlib = int(line.split()[1]) + print("\nWallClock time:", clock() - self.tref) + print("Memory usage: ******* %s *******" % self._getName()) + print(f"VmSize: {vmsize:>7} kB\tVmRSS: {vmrss:>7} kB") + print(f"VmData: {vmdata:>7} kB\tVmStk: {vmstk:>7} kB") + print(f"VmExe: {vmexe:>7} kB\tVmLib: {vmlib:>7} kB") + + +try: + from unittest import makeSuite as make_suite +except ImportError: + + def make_suite(test_case_class, *, prefix=None): + loader = unittest.TestLoader() + if prefix: + loader.testMethodPrefix = prefix + return loader.loadTestsFromTestCase(test_case_class) diff --git a/venv/Lib/site-packages/tables/tests/create_backcompat_indexes.py b/venv/Lib/site-packages/tables/tests/create_backcompat_indexes.py new file mode 100644 index 0000000..f6296f1 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/create_backcompat_indexes.py @@ -0,0 +1,41 @@ +# Script for creating different kind of indexes in a small space as possible. +# This is intended for testing purposes. + +import tables as tb + + +class Descr(tb.IsDescription): + var1 = tb.StringCol(itemsize=4, shape=(), dflt="", pos=0) + var2 = tb.BoolCol(shape=(), dflt=False, pos=1) + var3 = tb.Int32Col(shape=(), dflt=0, pos=2) + var4 = tb.Float64Col(shape=(), dflt=0.0, pos=3) + + +# Parameters for the table and index creation +small_chunkshape = (2,) +small_blocksizes = (64, 32, 16, 8) +nrows = 43 + +# Create the new file +h5fname = "indexes_2_1.h5" +h5file = tb.open_file(h5fname, "w") +t1 = h5file.create_table(h5file.root, "table1", Descr) +row = t1.row +for i in range(nrows): + row["var1"] = i + row["var2"] = i + row["var3"] = i + row["var4"] = i + row.append() +t1.flush() + +# Do a copy of table1 +t1.copy(h5file.root, "table2") + +# Create indexes of all kinds +t1.cols.var1.create_index(0, "ultralight", _blocksizes=small_blocksizes) +t1.cols.var2.create_index(3, "light", _blocksizes=small_blocksizes) +t1.cols.var3.create_index(6, "medium", _blocksizes=small_blocksizes) +t1.cols.var4.create_index(9, "full", _blocksizes=small_blocksizes) + +h5file.close() diff --git a/venv/Lib/site-packages/tables/tests/elink.h5 b/venv/Lib/site-packages/tables/tests/elink.h5 new file mode 100644 index 0000000..7fdf3ad Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/elink.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/elink2.h5 b/venv/Lib/site-packages/tables/tests/elink2.h5 new file mode 100644 index 0000000..2b5a394 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/elink2.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/ex-noattr.h5 b/venv/Lib/site-packages/tables/tests/ex-noattr.h5 new file mode 100644 index 0000000..c839038 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/ex-noattr.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/flavored_vlarrays-format1.6.h5 b/venv/Lib/site-packages/tables/tests/flavored_vlarrays-format1.6.h5 new file mode 100644 index 0000000..5592f8c Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/flavored_vlarrays-format1.6.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/float.h5 b/venv/Lib/site-packages/tables/tests/float.h5 new file mode 100644 index 0000000..6555e3d Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/float.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/idx-std-1.x.h5 b/venv/Lib/site-packages/tables/tests/idx-std-1.x.h5 new file mode 100644 index 0000000..3b28020 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/idx-std-1.x.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/indexes_2_0.h5 b/venv/Lib/site-packages/tables/tests/indexes_2_0.h5 new file mode 100644 index 0000000..54169b2 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/indexes_2_0.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/indexes_2_1.h5 b/venv/Lib/site-packages/tables/tests/indexes_2_1.h5 new file mode 100644 index 0000000..88e2e79 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/indexes_2_1.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/issue_368.h5 b/venv/Lib/site-packages/tables/tests/issue_368.h5 new file mode 100644 index 0000000..4cb0e9a Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/issue_368.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/issue_560.h5 b/venv/Lib/site-packages/tables/tests/issue_560.h5 new file mode 100644 index 0000000..9b42dfd Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/issue_560.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/itemsize.h5 b/venv/Lib/site-packages/tables/tests/itemsize.h5 new file mode 100644 index 0000000..64ac7ed Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/itemsize.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/matlab_file.mat b/venv/Lib/site-packages/tables/tests/matlab_file.mat new file mode 100644 index 0000000..810eabe Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/matlab_file.mat differ diff --git a/venv/Lib/site-packages/tables/tests/nested-type-with-gaps.h5 b/venv/Lib/site-packages/tables/tests/nested-type-with-gaps.h5 new file mode 100644 index 0000000..de4217f Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/nested-type-with-gaps.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/non-chunked-table.h5 b/venv/Lib/site-packages/tables/tests/non-chunked-table.h5 new file mode 100644 index 0000000..a7df4ea Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/non-chunked-table.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/oldflavor_numeric.h5 b/venv/Lib/site-packages/tables/tests/oldflavor_numeric.h5 new file mode 100644 index 0000000..5253468 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/oldflavor_numeric.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/out_of_order_types.h5 b/venv/Lib/site-packages/tables/tests/out_of_order_types.h5 new file mode 100644 index 0000000..92f4e17 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/out_of_order_types.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/python2.h5 b/venv/Lib/site-packages/tables/tests/python2.h5 new file mode 100644 index 0000000..10630ff Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/python2.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/python3.h5 b/venv/Lib/site-packages/tables/tests/python3.h5 new file mode 100644 index 0000000..28eccfd Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/python3.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/run_ft.py b/venv/Lib/site-packages/tables/tests/run_ft.py new file mode 100644 index 0000000..d301a6f --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/run_ft.py @@ -0,0 +1,305 @@ +"""Run unit tests in parallel threads.""" + +# Based on unittest-ft, which is copyright Amethyst Reese, MIT license. + +from __future__ import annotations + +import os +import sys +import time +import random +import logging +import argparse +import unittest +import threading +import collections +from typing import Any, Self, TextIO +from dataclasses import dataclass, field +from collections.abc import Generator +from concurrent.futures import ( + Future, + ThreadPoolExecutor, + as_completed, +) + +LOG = logging.getLogger(__name__) + +DEFAULT_THREADS = os.cpu_count() or 4 + + +class FTTestResult(unittest.TestResult): + def __init__( + self, + stream: TextIO | None = None, + descriptions: bool | None = None, + verbosity: int | None = None, + ) -> None: + super().__init__( + stream=stream, descriptions=descriptions, verbosity=verbosity + ) + self.verbosity = verbosity or 1 + self.before = time.monotonic_ns() + self.duration = 0 + self.collected_duration = 0 + + def stopTest(self, test: Any) -> None: + super().stopTest(test) + self.duration = time.monotonic_ns() - self.before + + def stopTestRun(self) -> None: + super().stopTestRun() + self.duration = time.monotonic_ns() - self.before + + def __str__(self) -> str: + items: dict[tuple[str, str], int] = collections.defaultdict(int) + for test_case, trace in self.errors: + items[f"ERROR: {test_case}", trace] += 1 + for test_case, trace in self.failures: + items[f"FAIL: {test_case}", trace] += 1 + + results = { + f"{label}": trace for (label, trace), count in items.items() + } + longest = max(len(label) for label in results) if results else 70 + + msg = "\n" + msg += "\n".join( + f"{'=' * longest}\n{label}\n{'-' * longest}\n{trace}" + for label, trace in results.items() + ) + msg += "-" * longest + msg += f"\nRan {self.testsRun} tests in {format_ns(self.duration)}" + + saved = self.collected_duration - self.duration + if saved > 0 and (saved / self.duration) > 0.10: + msg += f" (saved {format_ns(self.collected_duration - self.duration)})" + msg += "\n\n" + + msg += "OK" if self.wasSuccessful() else "FAILED" + + parts = [] + if self.errors: + parts += [f"errors={len(self.errors)}"] + if self.failures: + parts += [f"failures={len(self.failures)}"] + if self.skipped: + parts += [f"skipped={len(self.skipped)}"] + if self.expectedFailures: + parts += [f"expected failures={len(self.expectedFailures)}"] + + if parts: + msg += f" ({', '.join(parts)})" + + return msg + + def __add__(self, other: object) -> FTTestResult: + if not isinstance(other, unittest.TestResult): + return NotImplemented + result = FTTestResult() + result.errors = self.errors + other.errors + result.expectedFailures = ( + self.expectedFailures + other.expectedFailures + ) + result.failures = self.failures + other.failures + result.skipped = self.skipped + other.skipped + result.testsRun = self.testsRun + other.testsRun + result.unexpectedSuccesses = ( + self.unexpectedSuccesses + other.unexpectedSuccesses + ) + if isinstance(other, FTTestResult): + result.collected_duration = self.duration + other.duration + return result + + def __iadd__(self, other: object) -> Self: + if not isinstance(other, unittest.TestResult): + return NotImplemented + self.errors += other.errors + self.expectedFailures += other.expectedFailures + self.failures += other.failures + self.skipped += other.skipped + self.testsRun += other.testsRun + self.unexpectedSuccesses += other.unexpectedSuccesses + if isinstance(other, FTTestResult): + self.collected_duration += other.duration + return self + + +def get_individual_tests( + suite: unittest.TestSuite, +) -> Generator[unittest.TestCase]: + for test in suite: + if isinstance(test, unittest.TestSuite): + yield from get_individual_tests(test) + else: + yield test + + +def run_single_test(suite: unittest.TestSuite) -> tuple[str, FTTestResult]: + test_id = suite.id() + LOG.debug(f"Running test {threading.get_ident()} {test_id}") + result = FTTestResult(descriptions=True, verbosity=2) + suite.run(result) + LOG.debug("Finished test %s", test_id) + return (test_id, result) + + +def format_ns(duration: int) -> str: + if duration < 1_000_000_000: + return f"{duration / 1_000_000:.2f}ms" + else: + return f"{duration / 1_000_000_000:.3f}s" + + +@dataclass +class Output: + total: int + futures: dict[Future[tuple[str, FTTestResult]], str] = field( + default_factory=dict + ) + stream: TextIO = sys.stdout + verbosity: int = 1 + + def __post_init__(self) -> None: + self.count = 0 + + def render(self, test_id: str, test_result: FTTestResult) -> None: + stream = self.stream + verbosity = self.verbosity + + self.count += 1 + if verbosity == 2: + stream.write( + f"[{self.count}/{self.total}] {test_id}" + f" ... {'OK' if test_result.wasSuccessful() else 'FAIL'} " + f" {format_ns(test_result.duration)}\n" + ) + elif verbosity == 1: + if test_result.errors: + stream.write("E") + elif test_result.failures: + stream.write("F") + elif test_result.expectedFailures: + stream.write("x") + elif test_result.skipped: + stream.write("s") + else: + stream.write(".") + stream.flush() + + +_EXCLUDE_CASES = set(""" + tables.filters.Filters + tables.misc.enum + tables.tests.test_array.SI1NACloseTestCase + tables.tests.test_array.SI1NAOpenTestCase + tables.tests.test_array.SI2NACloseTestCase + tables.tests.test_array.SI2NAOpenTestCase + tables.tests.test_basics.HDF5ErrorHandling + tables.tests.test_basics.OpenFileFailureTestCase + tables.tests.test_create.SetBloscMaxThreadsTestCase + tables.tests.test_tablesMD.CompressTwoTablesTestCase + tables.tests.test_utils.ptdumpTestCase.test_paths_windows + tables.tests.test_utils.ptrepackTestCase.test_paths_windows + tables.tests.test_utils.pttreeTestCase.test_paths_windows + """.strip().split()) + + +def _match_case(test_id: str) -> bool: + for pat in _EXCLUDE_CASES: + if test_id.startswith(pat): + return True + return False + + +def run_suite( + suite: unittest.TestSuite, + *, + randomize: bool = False, + threads: int = DEFAULT_THREADS, + verbosity: int = 1, + max_tests: int = 0, +) -> unittest.TestResult: + test_cases = {} + for test_case in get_individual_tests(suite): + test_id = test_case.id() + if _match_case(test_id): + continue + test_cases[test_id] = test_case + test_ids = list(test_cases) + if randomize: + rnd = random.SystemRandom() + rnd.shuffle(test_ids) + else: + test_ids.sort() + if max_tests: + test_ids = test_ids[:max_tests] + + LOG.info( + "Ready to run %d tests:\n %s", len(test_ids), "\n ".join(test_ids) + ) + + output = Output(total=len(test_ids), verbosity=verbosity) + result = FTTestResult() + + with ThreadPoolExecutor(max_workers=threads) as pool: + futures = [ + pool.submit(run_single_test, test_cases[test_id]) + for test_id in test_ids + ] + for fut in as_completed(futures): + test_id, test_result = fut.result() + result += test_result + output.render(test_id, test_result) + result.stopTestRun() + + return result + + +def main(): + parser = argparse.ArgumentParser( + description="Run PyTables tests with free-threading support" + ) + parser.add_argument( + "--randomize", + action="store_true", + help="Run tests in random order", + ) + parser.add_argument( + "--verbosity", + type=int, + default=1, + help="Logger verbosity level (default: 1)", + ) + parser.add_argument( + "--threads", + type=int, + default=DEFAULT_THREADS, + help=f"Number of worker threads to use (default: {DEFAULT_THREADS})", + ) + parser.add_argument( + "--max-tests", + type=int, + default=0, + help="Maximum number of tests to run, 0 = unlimited (default: 0)", + ) + args = parser.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.verbosity > 1 else logging.INFO + ) + + from tables.tests import test_suite + + suite = test_suite.suite() + result = run_suite( + suite, + verbosity=args.verbosity, + threads=args.threads, + randomize=args.randomize, + max_tests=args.max_tests, + ) + print(result) + + +if __name__ == "__main__": + main() diff --git a/venv/Lib/site-packages/tables/tests/scalar.h5 b/venv/Lib/site-packages/tables/tests/scalar.h5 new file mode 100644 index 0000000..a6a1012 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/scalar.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/slink.h5 b/venv/Lib/site-packages/tables/tests/slink.h5 new file mode 100644 index 0000000..b95b7af Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/slink.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/smpl_SDSextendible.h5 b/venv/Lib/site-packages/tables/tests/smpl_SDSextendible.h5 new file mode 100644 index 0000000..7a7bcc2 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/smpl_SDSextendible.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/smpl_compound_chunked.h5 b/venv/Lib/site-packages/tables/tests/smpl_compound_chunked.h5 new file mode 100644 index 0000000..1cd1d33 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/smpl_compound_chunked.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/smpl_enum.h5 b/venv/Lib/site-packages/tables/tests/smpl_enum.h5 new file mode 100644 index 0000000..8bc6050 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/smpl_enum.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/smpl_f64be.h5 b/venv/Lib/site-packages/tables/tests/smpl_f64be.h5 new file mode 100644 index 0000000..5ce30e9 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/smpl_f64be.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/smpl_f64le.h5 b/venv/Lib/site-packages/tables/tests/smpl_f64le.h5 new file mode 100644 index 0000000..c54b96b Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/smpl_f64le.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/smpl_i32be.h5 b/venv/Lib/site-packages/tables/tests/smpl_i32be.h5 new file mode 100644 index 0000000..c79980a Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/smpl_i32be.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/smpl_i32le.h5 b/venv/Lib/site-packages/tables/tests/smpl_i32le.h5 new file mode 100644 index 0000000..5f24e12 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/smpl_i32le.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/smpl_i64be.h5 b/venv/Lib/site-packages/tables/tests/smpl_i64be.h5 new file mode 100644 index 0000000..97f518c Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/smpl_i64be.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/smpl_i64le.h5 b/venv/Lib/site-packages/tables/tests/smpl_i64le.h5 new file mode 100644 index 0000000..c867416 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/smpl_i64le.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/smpl_unsupptype.h5 b/venv/Lib/site-packages/tables/tests/smpl_unsupptype.h5 new file mode 100644 index 0000000..0ed3901 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/smpl_unsupptype.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/test_all.py b/venv/Lib/site-packages/tables/tests/test_all.py new file mode 100644 index 0000000..c19269f --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_all.py @@ -0,0 +1,59 @@ +"""Run all test cases.""" + +import os +import sys +import faulthandler + +import numpy as np +from packaging.version import Version + +import tables as tb +from tables.tests import common +from tables.tests.test_suite import suite, test + +# Give people a way to opt out of enabling faulthandler +if os.getenv("PYTABLES_DISABLE_FAULTHANDLER", "").lower() not in ("1", "true"): + faulthandler.enable() + + +def get_tuple_version(hexversion): + """Get a tuple from a compact version in hex.""" + + h = hexversion + return (h & 0xFF0000) >> 16, (h & 0xFF00) >> 8, h & 0xFF + + +if __name__ == "__main__": + + common.parse_argv(sys.argv) + + hdf5_version = get_tuple_version(tb.which_lib_version("hdf5")[0]) + hdf5_version_str = "%s.%s.%s" % hdf5_version + if Version(hdf5_version_str) < tb.req_versions.min_hdf5_version: + print( + f"*Warning*: HDF5 version is lower than recommended: " + f"{hdf5_version} < {tb.req_versions.min_hdf5_version}" + ) + + if Version(np.__version__) < tb.req_versions.min_numpy_version: + print( + f"*Warning*: NumPy version is lower than recommended: " + f"{np.__version__} < {tb.req_versions.min_numpy_version}" + ) + + # Handle some global flags (i.e. only useful for test_all.py) + only_versions = 0 + args = sys.argv[:] + for arg in args: + # Remove 'show-versions' for PyTables 2.3 or higher + if arg in ["--print-versions", "--show-versions"]: + only_versions = True + sys.argv.remove(arg) + elif arg == "--show-memory": + common.show_memory = True + sys.argv.remove(arg) + + common.print_versions() + if not only_versions: + common.print_heavy(common.heavy) + common.unittest.main(defaultTest="tb.tests.suite") diff --git a/venv/Lib/site-packages/tables/tests/test_array.py b/venv/Lib/site-packages/tables/tests/test_array.py new file mode 100644 index 0000000..4e8fe0d --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_array.py @@ -0,0 +1,2903 @@ +import sys +import tempfile +from pathlib import Path + +import numpy as np + +import tables as tb +from tables.tests import common + +# warnings.resetwarnings() + + +class BasicTestCase(common.PyTablesTestCase): + """Basic test for all the supported typecodes present in numpy. + + All of them are included on pytables. + + """ + + endiancheck = False + + def write_read(self, testarray): + a = testarray + if common.verbose: + print("\n", "-=" * 30) + print( + "Running test for array with type '%s'" % a.dtype.type, end=" " + ) + print("for class check:", self.title) + + # Create an instance of HDF5 file + filename = tempfile.mktemp(".h5") + try: + with tb.open_file(filename, mode="w") as fileh: + root = fileh.root + + # Create the array under root and name 'somearray' + if self.endiancheck and a.dtype.kind != "S": + b = a.byteswap() + b.dtype = a.dtype.newbyteorder() + a = b + + fileh.create_array(root, "somearray", a, "Some array") + + # Re-open the file in read-only mode + with tb.open_file(filename, mode="r") as fileh: + root = fileh.root + + # Read the saved array + b = root.somearray.read() + + # Compare them. They should be equal. + if common.verbose and not common.allequal(a, b): + print("Write and read arrays differ!") + # print("Array written:", a) + print("Array written shape:", a.shape) + print("Array written itemsize:", a.itemsize) + print("Array written type:", a.dtype.type) + # print("Array read:", b) + print("Array read shape:", b.shape) + print("Array read itemsize:", b.itemsize) + print("Array read type:", b.dtype.type) + if a.dtype.kind != "S": + print("Array written byteorder:", a.dtype.byteorder) + print("Array read byteorder:", b.dtype.byteorder) + + # Check strictly the array equality + self.assertEqual(a.shape, b.shape) + self.assertEqual(a.shape, root.somearray.shape) + if a.dtype.kind == "S": + self.assertEqual(root.somearray.atom.type, "string") + else: + self.assertEqual(a.dtype.type, b.dtype.type) + self.assertEqual( + a.dtype.type, root.somearray.atom.dtype.type + ) + abo = tb.utils.byteorders[a.dtype.byteorder] + bbo = tb.utils.byteorders[b.dtype.byteorder] + if abo != "irrelevant": + self.assertEqual(abo, root.somearray.byteorder) + self.assertEqual(bbo, sys.byteorder) + if self.endiancheck: + self.assertNotEqual(bbo, abo) + + obj = root.somearray + self.assertEqual(obj.flavor, "numpy") + self.assertEqual(obj.shape, a.shape) + self.assertEqual(obj.ndim, a.ndim) + self.assertEqual(obj.chunkshape, None) + if a.shape: + nrows = a.shape[0] + else: + # scalar + nrows = 1 + + self.assertEqual(obj.nrows, nrows) + + self.assertTrue(common.allequal(a, b)) + finally: + # Then, delete the file + Path(filename).unlink() + + def write_read_out_arg(self, testarray): + a = testarray + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running test for array with type '%s'" % a.dtype.type, end=" " + ) + print("for class check:", self.title) + + # Create an instance of HDF5 file + filename = tempfile.mktemp(".h5") + try: + with tb.open_file(filename, mode="w") as fileh: + root = fileh.root + + # Create the array under root and name 'somearray' + if self.endiancheck and a.dtype.kind != "S": + b = a.byteswap() + b.dtype = a.dtype.newbyteorder() + a = b + + fileh.create_array(root, "somearray", a, "Some array") + + # Re-open the file in read-only mode + with tb.open_file(filename, mode="r") as fileh: + root = fileh.root + + # Read the saved array + b = np.empty_like(a, dtype=a.dtype) + root.somearray.read(out=b) + + # Check strictly the array equality + self.assertEqual(a.shape, b.shape) + self.assertEqual(a.shape, root.somearray.shape) + if a.dtype.kind == "S": + self.assertEqual(root.somearray.atom.type, "string") + else: + self.assertEqual(a.dtype.type, b.dtype.type) + self.assertEqual( + a.dtype.type, root.somearray.atom.dtype.type + ) + abo = tb.utils.byteorders[a.dtype.byteorder] + bbo = tb.utils.byteorders[b.dtype.byteorder] + if abo != "irrelevant": + self.assertEqual(abo, root.somearray.byteorder) + self.assertEqual(abo, bbo) + if self.endiancheck: + self.assertNotEqual(bbo, sys.byteorder) + + self.assertTrue(common.allequal(a, b)) + finally: + # Then, delete the file + Path(filename).unlink() + + def write_read_atom_shape_args(self, testarray): + a = testarray + atom = tb.Atom.from_dtype(a.dtype) + shape = a.shape + byteorder = None + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running test for array with type '%s'" % a.dtype.type, end=" " + ) + print("for class check:", self.title) + + # Create an instance of HDF5 file + filename = tempfile.mktemp(".h5") + try: + with tb.open_file(filename, mode="w") as fileh: + root = fileh.root + + # Create the array under root and name 'somearray' + if self.endiancheck and a.dtype.kind != "S": + b = a.byteswap() + b.dtype = a.dtype.newbyteorder() + if b.dtype.byteorder in (">", "<"): + byteorder = tb.utils.byteorders[b.dtype.byteorder] + a = b + + ptarr = fileh.create_array( + root, + "somearray", + atom=atom, + shape=shape, + title="Some array", + # specify the byteorder explicitly + # since there is no way to deduce + # it in this case + byteorder=byteorder, + ) + self.assertEqual(shape, ptarr.shape) + self.assertEqual(atom, ptarr.atom) + ptarr[...] = a + + # Re-open the file in read-only mode + with tb.open_file(filename, mode="r") as fileh: + root = fileh.root + + # Read the saved array + b = root.somearray.read() + + # Compare them. They should be equal. + if common.verbose and not common.allequal(a, b): + print("Write and read arrays differ!") + # print("Array written:", a) + print("Array written shape:", a.shape) + print("Array written itemsize:", a.itemsize) + print("Array written type:", a.dtype.type) + # print("Array read:", b) + print("Array read shape:", b.shape) + print("Array read itemsize:", b.itemsize) + print("Array read type:", b.dtype.type) + if a.dtype.kind != "S": + print("Array written byteorder:", a.dtype.byteorder) + print("Array read byteorder:", b.dtype.byteorder) + + # Check strictly the array equality + self.assertEqual(a.shape, b.shape) + self.assertEqual(a.shape, root.somearray.shape) + if a.dtype.kind == "S": + self.assertEqual(root.somearray.atom.type, "string") + else: + self.assertEqual(a.dtype.type, b.dtype.type) + self.assertEqual( + a.dtype.type, root.somearray.atom.dtype.type + ) + abo = tb.utils.byteorders[a.dtype.byteorder] + bbo = tb.utils.byteorders[b.dtype.byteorder] + if abo != "irrelevant": + self.assertEqual(abo, root.somearray.byteorder) + self.assertEqual(bbo, sys.byteorder) + if self.endiancheck: + self.assertNotEqual(bbo, abo) + + obj = root.somearray + self.assertEqual(obj.flavor, "numpy") + self.assertEqual(obj.shape, a.shape) + self.assertEqual(obj.ndim, a.ndim) + self.assertEqual(obj.chunkshape, None) + if a.shape: + nrows = a.shape[0] + else: + # scalar + nrows = 1 + + self.assertEqual(obj.nrows, nrows) + + self.assertTrue(common.allequal(a, b)) + finally: + # Then, delete the file + Path(filename).unlink() + + def setup00_char(self): + """Data integrity during recovery (character objects)""" + + if not isinstance(self.tupleChar, np.ndarray): + a = np.array(self.tupleChar, dtype="S") + else: + a = self.tupleChar + + return a + + def test00_char(self): + a = self.setup00_char() + self.write_read(a) + + def test00_char_out_arg(self): + a = self.setup00_char() + self.write_read_out_arg(a) + + def test00_char_atom_shape_args(self): + a = self.setup00_char() + self.write_read_atom_shape_args(a) + + def test00b_char(self): + """Data integrity during recovery (string objects)""" + + a = self.tupleChar + + filename = tempfile.mktemp(".h5") + try: + # Create an instance of HDF5 file + with tb.open_file(filename, mode="w") as fileh: + fileh.create_array(fileh.root, "somearray", a, "Some array") + + # Re-open the file in read-only mode + with tb.open_file(filename, mode="r") as fileh: + # Read the saved array + b = fileh.root.somearray.read() + if isinstance(a, bytes): + self.assertEqual(type(b), bytes) + self.assertEqual(a, b) + else: + # If a is not a python string, then it should be a list + # or ndarray + self.assertIn(type(b), [list, np.ndarray]) + finally: + # Then, delete the file + Path(filename).unlink() + + def test00b_char_out_arg(self): + """Data integrity during recovery (string objects)""" + + a = self.tupleChar + + filename = tempfile.mktemp(".h5") + try: + # Create an instance of HDF5 file + with tb.open_file(filename, mode="w") as fileh: + fileh.create_array(fileh.root, "somearray", a, "Some array") + + # Re-open the file in read-only mode + with tb.open_file(filename, mode="r") as fileh: + # Read the saved array + b = np.empty_like(a) + if fileh.root.somearray.flavor != "numpy": + self.assertRaises( + TypeError, lambda: fileh.root.somearray.read(out=b) + ) + else: + fileh.root.somearray.read(out=b) + self.assertIsInstance(b, np.ndarray) + finally: + # Then, delete the file + Path(filename).unlink() + + def test00b_char_atom_shape_args(self): + """Data integrity during recovery (string objects)""" + + a = self.tupleChar + + filename = tempfile.mktemp(".h5") + try: + # Create an instance of HDF5 file + with tb.open_file(filename, mode="w") as fileh: + nparr = np.asarray(a) + atom = tb.Atom.from_dtype(nparr.dtype) + shape = nparr.shape + if nparr.dtype.byteorder in (">", "<"): + byteorder = tb.utils.byteorders[nparr.dtype.byteorder] + else: + byteorder = None + + ptarr = fileh.create_array( + fileh.root, + "somearray", + atom=atom, + shape=shape, + byteorder=byteorder, + title="Some array", + ) + self.assertEqual(shape, ptarr.shape) + self.assertEqual(atom, ptarr.atom) + ptarr[...] = a + + # Re-open the file in read-only mode + with tb.open_file(filename, mode="r") as fileh: + # Read the saved array + b = np.empty_like(a) + if fileh.root.somearray.flavor != "numpy": + self.assertRaises( + TypeError, lambda: fileh.root.somearray.read(out=b) + ) + else: + fileh.root.somearray.read(out=b) + self.assertIsInstance(b, np.ndarray) + finally: + # Then, delete the file + Path(filename).unlink() + + def setup01_char_nc(self): + """Data integrity during recovery (non-contiguous character objects)""" + + if not isinstance(self.tupleChar, np.ndarray): + a = np.array(self.tupleChar, dtype="S") + else: + a = self.tupleChar + if a.ndim == 0: + b = a.copy() + else: + b = a[::2] + # Ensure that this numpy string is non-contiguous + if len(b) > 1: + self.assertEqual(b.flags.contiguous, False) + return b + + def test01_char_nc(self): + b = self.setup01_char_nc() + self.write_read(b) + + def test01_char_nc_out_arg(self): + b = self.setup01_char_nc() + self.write_read_out_arg(b) + + def test01_char_nc_atom_shape_args(self): + b = self.setup01_char_nc() + self.write_read_atom_shape_args(b) + + def test02_types(self): + """Data integrity during recovery (numerical types)""" + + typecodes = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float32", + "float64", + "complex64", + "complex128", + ] + + for name in ( + "float16", + "float96", + "float128", + "complex192", + "complex256", + ): + atomname = name.capitalize() + "Atom" + if hasattr(tb, atomname): + typecodes.append(name) + + for typecode in typecodes: + a = np.array(self.tupleInt, typecode) + self.write_read(a) + b = np.array(self.tupleInt, typecode) + self.write_read_out_arg(b) + c = np.array(self.tupleInt, typecode) + self.write_read_atom_shape_args(c) + + def test03_types_nc(self): + """Data integrity during recovery (non-contiguous numerical types)""" + + typecodes = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float32", + "float64", + "complex64", + "complex128", + ] + + for name in ( + "float16", + "float96", + "float128", + "complex192", + "complex256", + ): + atomname = name.capitalize() + "Atom" + if hasattr(tb, atomname): + typecodes.append(name) + + for typecode in typecodes: + a = np.array(self.tupleInt, typecode) + if a.ndim == 0: + b1 = a.copy() + b2 = a.copy() + b3 = a.copy() + else: + b1 = a[::2] + b2 = a[::2] + b3 = a[::2] + # Ensure that this array is non-contiguous + if len(b1) > 1: + self.assertEqual(b1.flags.contiguous, False) + if len(b2) > 1: + self.assertEqual(b2.flags.contiguous, False) + if len(b3) > 1: + self.assertEqual(b3.flags.contiguous, False) + self.write_read(b1) + self.write_read_out_arg(b2) + self.write_read_atom_shape_args(b3) + + +class Basic0DOneTestCase(BasicTestCase): + # Scalar case + title = "Rank-0 case 1" + tupleInt = 3 + tupleChar = b"3" + endiancheck = True + + +class Basic0DTwoTestCase(BasicTestCase): + # Scalar case + title = "Rank-0 case 2" + tupleInt = 33 + tupleChar = b"33" + endiancheck = True + + +class Basic1DZeroTestCase(BasicTestCase): + # This test case is not supported by PyTables (HDF5 limitations) + # 1D case + title = "Rank-1 case 0" + tupleInt = () + tupleChar = () + endiancheck = False + + +class Basic1DOneTestCase(BasicTestCase): + # 1D case + title = "Rank-1 case 1" + tupleInt = (3,) + tupleChar = (b"a",) + endiancheck = True + + +class Basic1DTwoTestCase(BasicTestCase): + # 1D case + title = "Rank-1 case 2" + tupleInt = (3, 4) + tupleChar = (b"aaa",) + endiancheck = True + + +class Basic1DThreeTestCase(BasicTestCase): + # 1D case + title = "Rank-1 case 3" + tupleInt = (3, 4, 5) + tupleChar = ( + b"aaa", + b"bbb", + ) + endiancheck = True + + +class Basic2DOneTestCase(BasicTestCase): + # 2D case + title = "Rank-2 case 1" + tupleInt = np.array(np.arange((4) ** 2)) + tupleInt.shape = (4,) * 2 + tupleChar = np.array(["abc"] * 3**2, dtype="S3") + tupleChar.shape = (3,) * 2 + endiancheck = True + + +class Basic2DTwoTestCase(BasicTestCase): + # 2D case, with a multidimensional dtype + title = "Rank-2 case 2" + tupleInt = np.tile(np.arange(4, dtype=np.int64), [4, 1]) + tupleChar = np.array(["abc"] * 3, dtype=("S3", (3,))) + endiancheck = True + + +class Basic10DTestCase(BasicTestCase): + # 10D case + title = "Rank-10 test" + tupleInt = np.array(np.arange((2) ** 10)) + tupleInt.shape = (2,) * 10 + tupleChar = np.array(["abc"] * 2**10, dtype="S3") + tupleChar.shape = (2,) * 10 + endiancheck = True + + +class Basic32DTestCase(BasicTestCase): + # 32D case (maximum) + title = "Rank-32 test" + tupleInt = np.array((32,)) + tupleInt.shape = (1,) * 32 + tupleChar = np.array(["121"], dtype="S3") + tupleChar.shape = (1,) * 32 + + +class ReadOutArgumentTests(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + self.size = 1000 + + def create_array(self): + array = np.arange(self.size, dtype="f8") + disk_array = self.h5file.create_array("/", "array", array) + return array, disk_array + + def test_read_entire_array(self): + array, disk_array = self.create_array() + out_buffer = np.empty((self.size,), "f8") + disk_array.read(out=out_buffer) + np.testing.assert_equal(out_buffer, array) + + def test_read_contiguous_slice1(self): + array, disk_array = self.create_array() + out_buffer = np.arange(self.size, dtype="f8") + out_buffer = np.random.permutation(out_buffer) + out_buffer_orig = out_buffer.copy() + start = self.size // 2 + disk_array.read(start=start, stop=self.size, out=out_buffer[start:]) + np.testing.assert_equal(out_buffer[start:], array[start:]) + np.testing.assert_equal(out_buffer[:start], out_buffer_orig[:start]) + + def test_read_contiguous_slice2(self): + array, disk_array = self.create_array() + out_buffer = np.arange(self.size, dtype="f8") + out_buffer = np.random.permutation(out_buffer) + out_buffer_orig = out_buffer.copy() + start = self.size // 4 + stop = self.size - start + disk_array.read(start=start, stop=stop, out=out_buffer[start:stop]) + np.testing.assert_equal(out_buffer[start:stop], array[start:stop]) + np.testing.assert_equal(out_buffer[:start], out_buffer_orig[:start]) + np.testing.assert_equal(out_buffer[stop:], out_buffer_orig[stop:]) + + def test_read_non_contiguous_slice_contiguous_buffer(self): + array, disk_array = self.create_array() + out_buffer = np.empty((self.size // 2,), dtype="f8") + disk_array.read(start=0, stop=self.size, step=2, out=out_buffer) + np.testing.assert_equal(out_buffer, array[0 : self.size : 2]) + + def test_read_non_contiguous_buffer(self): + array, disk_array = self.create_array() + out_buffer = np.empty((self.size,), "f8") + out_buffer_slice = out_buffer[0 : self.size : 2] + + with self.assertRaisesRegex( + ValueError, "output array not C contiguous" + ): + disk_array.read(0, self.size, 2, out_buffer_slice) + + def test_buffer_too_small(self): + array, disk_array = self.create_array() + out_buffer = np.empty((self.size // 2,), "f8") + self.assertRaises( + ValueError, disk_array.read, 0, self.size, 1, out_buffer + ) + try: + disk_array.read(0, self.size, 1, out_buffer) + except ValueError as exc: + self.assertIn("output array size invalid, got", str(exc)) + + def test_buffer_too_large(self): + array, disk_array = self.create_array() + out_buffer = np.empty((self.size + 1,), "f8") + self.assertRaises( + ValueError, disk_array.read, 0, self.size, 1, out_buffer + ) + try: + disk_array.read(0, self.size, 1, out_buffer) + except ValueError as exc: + self.assertIn("output array size invalid, got", str(exc)) + + +class SizeOnDiskInMemoryPropertyTestCase( + common.TempFileMixin, common.PyTablesTestCase +): + + def setUp(self): + super().setUp() + self.array_size = (10, 10) + self.array = self.h5file.create_array( + "/", "somearray", np.zeros(self.array_size, "i4") + ) + + def test_all_zeros(self): + self.assertEqual(self.array.size_on_disk, 10 * 10 * 4) + self.assertEqual(self.array.size_in_memory, 10 * 10 * 4) + + +class UnalignedAndComplexTestCase( + common.TempFileMixin, common.PyTablesTestCase +): + """Basic test for all the supported typecodes present in numpy. + + Most of them are included on PyTables. + + """ + + def setUp(self): + super().setUp() + self.root = self.h5file.root + + def write_read(self, testArray): + if common.verbose: + print("\n", "-=" * 30) + print( + "\nRunning test for array with type '%s'" + % testArray.dtype.type + ) + + # Create the array under root and name 'somearray' + a = testArray + if self.endiancheck: + byteorder = {"little": "big", "big": "little"}[sys.byteorder] + else: + byteorder = sys.byteorder + + self.h5file.create_array( + self.root, "somearray", a, "Some array", byteorder=byteorder + ) + + if self.reopen: + self._reopen() + self.root = self.h5file.root + + # Read the saved array + b = self.root.somearray.read() + + # Get an array to be compared in the correct byteorder + c = a.view(a.dtype.newbyteorder(byteorder)) + + # Compare them. They should be equal. + if not common.allequal(c, b) and common.verbose: + print("Write and read arrays differ!") + print("Array written:", a) + print("Array written shape:", a.shape) + print("Array written itemsize:", a.itemsize) + print("Array written type:", a.dtype.type) + print("Array read:", b) + print("Array read shape:", b.shape) + print("Array read itemsize:", b.itemsize) + print("Array read type:", b.dtype.type) + + # Check strictly the array equality + self.assertEqual(a.shape, b.shape) + self.assertEqual(a.shape, self.root.somearray.shape) + if a.dtype.byteorder != "|": + self.assertEqual(a.dtype, b.dtype) + self.assertEqual(a.dtype, self.root.somearray.atom.dtype) + self.assertEqual( + tb.utils.byteorders[b.dtype.byteorder], sys.byteorder + ) + self.assertEqual(self.root.somearray.byteorder, byteorder) + + self.assertTrue(common.allequal(c, b)) + + def test01_signedShort_unaligned(self): + """Checking an unaligned signed short integer array""" + + r = np.rec.array(b"a" * 200, formats="i1,f4,i2", shape=10) + a = r["f2"] + # Ensure that this array is non-aligned + self.assertEqual(a.flags.aligned, False) + self.assertEqual(a.dtype.type, np.int16) + self.write_read(a) + + def test02_float_unaligned(self): + """Checking an unaligned single precision array""" + + r = np.rec.array(b"a" * 200, formats="i1,f4,i2", shape=10) + a = r["f1"] + # Ensure that this array is non-aligned + self.assertEqual(a.flags.aligned, 0) + self.assertEqual(a.dtype.type, np.float32) + self.write_read(a) + + def test03_byte_offset(self): + """Checking an offset byte array""" + + r = np.arange(100, dtype=np.int8) + r.shape = (10, 10) + a = r[2] + self.write_read(a) + + def test04_short_offset(self): + """Checking an offset unsigned short int precision array""" + + r = np.arange(100, dtype=np.uint32) + r.shape = (10, 10) + a = r[2] + self.write_read(a) + + def test05_int_offset(self): + """Checking an offset integer array""" + + r = np.arange(100, dtype=np.int32) + r.shape = (10, 10) + a = r[2] + self.write_read(a) + + def test06_longlongint_offset(self): + """Checking an offset long long integer array""" + + r = np.arange(100, dtype=np.int64) + r.shape = (10, 10) + a = r[2] + self.write_read(a) + + def test07_float_offset(self): + """Checking an offset single precision array""" + + r = np.arange(100, dtype=np.float32) + r.shape = (10, 10) + a = r[2] + self.write_read(a) + + def test08_double_offset(self): + """Checking an offset double precision array""" + + r = np.arange(100, dtype=np.float64) + r.shape = (10, 10) + a = r[2] + self.write_read(a) + + def test09_float_offset_unaligned(self): + """Checking an unaligned and offset single precision array""" + + r = np.rec.array(b"a" * 200, formats="i1,3f4,i2", shape=10) + a = r["f1"][3] + # Ensure that this array is non-aligned + self.assertEqual(a.flags.aligned, False) + self.assertEqual(a.dtype.type, np.float32) + self.write_read(a) + + def test10_double_offset_unaligned(self): + """Checking an unaligned and offset double precision array""" + + r = np.rec.array(b"a" * 400, formats="i1,3f8,i2", shape=10) + a = r["f1"][3] + # Ensure that this array is non-aligned + self.assertEqual(a.flags.aligned, False) + self.assertEqual(a.dtype.type, np.float64) + self.write_read(a) + + def test11_int_byteorder(self): + """Checking setting data with different byteorder in a range + (integer)""" + + # Save an array with the reversed byteorder on it + a = np.arange(25, dtype=np.int32).reshape(5, 5) + a = a.byteswap() + a = a.view(a.dtype.newbyteorder()) + array = self.h5file.create_array( + self.h5file.root, "array", a, "byteorder (int)" + ) + # Read a subarray (got an array with the machine byteorder) + b = array[2:4, 3:5] + b = b.byteswap() + b = b.view(b.dtype.newbyteorder()) + # Set this subarray back to the array + array[2:4, 3:5] = b + b = b.byteswap() + b = b.view(b.dtype.newbyteorder()) + # Set this subarray back to the array + array[2:4, 3:5] = b + # Check that the array is back in the correct byteorder + c = array[...] + if common.verbose: + print("byteorder of array on disk-->", array.byteorder) + print("byteorder of subarray-->", b.dtype.byteorder) + print("subarray-->", b) + print("retrieved array-->", c) + self.assertTrue(common.allequal(a, c)) + + def test12_float_byteorder(self): + """Checking setting data with different byteorder in a range (float)""" + + # Save an array with the reversed byteorder on it + a = np.arange(25, dtype=np.float64).reshape(5, 5) + a = a.byteswap() + a = a.view(a.dtype.newbyteorder()) + array = self.h5file.create_array( + self.h5file.root, "array", a, "byteorder (float)" + ) + # Read a subarray (got an array with the machine byteorder) + b = array[2:4, 3:5] + b = b.byteswap() + b = b.view(b.dtype.newbyteorder()) + # Set this subarray back to the array + array[2:4, 3:5] = b + b = b.byteswap() + b = b.view(b.dtype.newbyteorder()) + # Set this subarray back to the array + array[2:4, 3:5] = b + # Check that the array is back in the correct byteorder + c = array[...] + if common.verbose: + print("byteorder of array on disk-->", array.byteorder) + print("byteorder of subarray-->", b.dtype.byteorder) + print("subarray-->", b) + print("retrieved array-->", c) + self.assertTrue(common.allequal(a, c)) + + +class ComplexNotReopenNotEndianTestCase(UnalignedAndComplexTestCase): + endiancheck = False + reopen = False + + +class ComplexReopenNotEndianTestCase(UnalignedAndComplexTestCase): + endiancheck = False + reopen = True + + +class ComplexNotReopenEndianTestCase(UnalignedAndComplexTestCase): + endiancheck = True + reopen = False + + +class ComplexReopenEndianTestCase(UnalignedAndComplexTestCase): + endiancheck = True + reopen = True + + +class GroupsArrayTestCase(common.TempFileMixin, common.PyTablesTestCase): + """This test class checks combinations of arrays with groups.""" + + def test00_iterativeGroups(self): + """Checking combinations of arrays with groups.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test00_iterativeGroups..." + % self.__class__.__name__ + ) + + # Get the root group + group = self.h5file.root + + # Set the type codes to test + # The typecodes below does expose an ambiguity that is reported in: + # http://projects.scipy.org/scipy/numpy/ticket/283 and + # http://projects.scipy.org/scipy/numpy/ticket/290 + typecodes = [ + "b", + "B", + "h", + "H", + "i", + "I", + "l", + "L", + "q", + "f", + "d", + "F", + "D", + ] + + if hasattr(tb, "Float16Atom"): + typecodes.append("e") + if hasattr(tb, "Float96Atom") or hasattr(tb, "Float128Atom"): + typecodes.append("g") + if hasattr(tb, "Complex192Atom") or hasattr(tb, "Complex256Atom"): + typecodes.append("G") + + for i, typecode in enumerate(typecodes): + a = np.ones((3,), typecode) + dsetname = "array_" + typecode + if common.verbose: + print("Creating dataset:", group._g_join(dsetname)) + self.h5file.create_array(group, dsetname, a, "Large array") + group = self.h5file.create_group(group, "group" + str(i)) + + # Reopen the file + self._reopen() + + # Get the root group + group = self.h5file.root + + # Get the metadata on the previosly saved arrays + for i, typecode in enumerate(typecodes): + # Create an array for later comparison + a = np.ones((3,), typecode) + # Get the dset object hanging from group + dset = getattr(group, "array_" + typecode) + # Get the actual array + b = dset.read() + if common.verbose: + print("Info from dataset:", dset._v_pathname) + print(" shape ==>", dset.shape, end=" ") + print(" type ==> %s" % dset.atom.dtype) + print("Array b read from file. Shape: ==>", b.shape, end=" ") + print(". Type ==> %s" % b.dtype) + self.assertEqual(a.shape, b.shape) + self.assertEqual(a.dtype, b.dtype) + self.assertTrue(common.allequal(a, b)) + + # Iterate over the next group + group = getattr(group, "group" + str(i)) + + def test01_largeRankArrays(self): + """Checking creation of large rank arrays (0 < rank <= 32) + It also uses arrays ranks which ranges until maxrank. + """ + + # maximum level of recursivity (deepest group level) achieved: + # maxrank = 32 (for an effective maximum rank of 32) + # This limit is due to HDF5 library limitations. + minrank = 1 + maxrank = 32 + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01_largeRankArrays..." + % self.__class__.__name__ + ) + print("Maximum rank for tested arrays:", maxrank) + + group = self.h5file.root + if common.verbose: + print("Rank array writing progress: ", end=" ") + for rank in range(minrank, maxrank + 1): + # Create an array of integers, with incrementally bigger ranges + a = np.ones((1,) * rank, np.int32) + if common.verbose: + print("%3d," % (rank), end=" ") + self.h5file.create_array(group, "array", a, "Rank: %s" % rank) + group = self.h5file.create_group(group, "group" + str(rank)) + + # Reopen the file + self._reopen() + + group = self.h5file.root + if common.verbose: + print() + print("Rank array reading progress: ") + # Get the metadata on the previously saved arrays + for rank in range(minrank, maxrank + 1): + # Create an array for later comparison + a = np.ones((1,) * rank, np.int32) + # Get the actual array + b = group.array.read() + if common.verbose: + print("%3d," % (rank), end=" ") + if common.verbose and not common.allequal(a, b): + print("Info from dataset:", group.array._v_pathname) + print(" Shape: ==>", group.array.shape, end=" ") + print(" typecode ==> %c" % group.array.typecode) + print("Array b read from file. Shape: ==>", b.shape, end=" ") + print(". Type ==> %c" % b.dtype) + + self.assertEqual(a.shape, b.shape) + self.assertEqual(a.dtype, b.dtype) + self.assertTrue(common.allequal(a, b)) + + # print(self.h5file) + # Iterate over the next group + group = self.h5file.get_node(group, "group" + str(rank)) + + if common.verbose: + print() # This flush the stdout buffer + + +class CopyTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test01_copy(self): + """Checking Array.copy() method.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_copy..." % self.__class__.__name__) + + # Create an Array + arr = np.array([[456, 2], [3, 457]], dtype="int16") + array1 = self.h5file.create_array( + self.h5file.root, "array1", arr, "title array1" + ) + + # Copy to another Array + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + # print("dirs-->", dir(array1), dir(array2)) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + self.assertTrue(common.allequal(array1.read(), array2.read())) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.flavor, array2.flavor) + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.title, array2.title) + + def test02_copy(self): + """Checking Array.copy() method (where specified)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_copy..." % self.__class__.__name__) + + # Create an Array + arr = np.array([[456, 2], [3, 457]], dtype="int16") + array1 = self.h5file.create_array( + self.h5file.root, "array1", arr, "title array1" + ) + + # Copy to another Array + group1 = self.h5file.create_group("/", "group1") + array2 = array1.copy(group1, "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.group1.array2 + + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + # print("dirs-->", dir(array1), dir(array2)) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + self.assertTrue(common.allequal(array1.read(), array2.read())) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.flavor, array2.flavor) + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.title, array2.title) + + def test03_copy(self): + """Checking Array.copy() method (checking title copying)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_copy..." % self.__class__.__name__) + + # Create an Array + arr = np.array([[456, 2], [3, 457]], dtype="int16") + array1 = self.h5file.create_array( + self.h5file.root, "array1", arr, "title array1" + ) + # Append some user attrs + array1.attrs.attr1 = "attr1" + array1.attrs.attr2 = 2 + # Copy it to another Array + array2 = array1.copy("/", "array2", title="title array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + # Assert user attributes + if common.verbose: + print("title of destination array-->", array2.title) + self.assertEqual(array2.title, "title array2") + + def test04_copy(self): + """Checking Array.copy() method (user attributes copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05_copy..." % self.__class__.__name__) + + # Create an Array + arr = np.array([[456, 2], [3, 457]], dtype="int16") + array1 = self.h5file.create_array( + self.h5file.root, "array1", arr, "title array1" + ) + # Append some user attrs + array1.attrs.attr1 = "attr1" + array1.attrs.attr2 = 2 + # Copy it to another Array + array2 = array1.copy("/", "array2", copyuserattrs=1) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Assert user attributes + self.assertEqual(array2.attrs.attr1, "attr1") + self.assertEqual(array2.attrs.attr2, 2) + + def test04b_copy(self): + """Checking Array.copy() method (user attributes not copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05b_copy..." % self.__class__.__name__) + + # Create an Array + arr = np.array([[456, 2], [3, 457]], dtype="int16") + array1 = self.h5file.create_array( + self.h5file.root, "array1", arr, "title array1" + ) + # Append some user attrs + array1.attrs.attr1 = "attr1" + array1.attrs.attr2 = 2 + # Copy it to another Array + array2 = array1.copy("/", "array2", copyuserattrs=0) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Assert user attributes + self.assertEqual(hasattr(array2.attrs, "attr1"), 0) + self.assertEqual(hasattr(array2.attrs, "attr2"), 0) + + +class CloseCopyTestCase(CopyTestCase): + close = 1 + + +class OpenCopyTestCase(CopyTestCase): + close = 0 + + +class CopyIndexTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test01_index(self): + """Checking Array.copy() method with indexes.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_index..." % self.__class__.__name__) + + # Create a numpy + r = np.arange(200, dtype="int32") + r.shape = (100, 2) + # Save it in an array: + array1 = self.h5file.create_array( + self.h5file.root, "array1", r, "title array1" + ) + + # Copy to another array + array2 = array1.copy( + "/", "array2", start=self.start, stop=self.stop, step=self.step + ) + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + r2 = r[self.start : self.stop : self.step] + self.assertTrue(common.allequal(r2, array2.read())) + + # Assert the number of rows in array + if common.verbose: + print("nrows in array2-->", array2.nrows) + print("and it should be-->", r2.shape[0]) + self.assertEqual(r2.shape[0], array2.nrows) + + def test02_indexclosef(self): + """Checking Array.copy() method with indexes (close file version)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_indexclosef..." % self.__class__.__name__) + + # Create a numpy + r = np.arange(200, dtype="int32") + r.shape = (100, 2) + # Save it in an array: + array1 = self.h5file.create_array( + self.h5file.root, "array1", r, "title array1" + ) + + # Copy to another array + array2 = array1.copy( + "/", "array2", start=self.start, stop=self.stop, step=self.step + ) + # Close and reopen the file + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + r2 = r[self.start : self.stop : self.step] + self.assertTrue(common.allequal(r2, array2.read())) + + # Assert the number of rows in array + if common.verbose: + print("nrows in array2-->", array2.nrows) + print("and it should be-->", r2.shape[0]) + self.assertEqual(r2.shape[0], array2.nrows) + + +class CopyIndex1TestCase(CopyIndexTestCase): + start = 0 + stop = 7 + step = 1 + + +class CopyIndex2TestCase(CopyIndexTestCase): + start = 0 + stop = -1 + step = 1 + + +class CopyIndex3TestCase(CopyIndexTestCase): + start = 1 + stop = 7 + step = 1 + + +class CopyIndex4TestCase(CopyIndexTestCase): + start = 0 + stop = 6 + step = 1 + + +class CopyIndex5TestCase(CopyIndexTestCase): + start = 3 + stop = 7 + step = 1 + + +class CopyIndex6TestCase(CopyIndexTestCase): + start = 3 + stop = 6 + step = 2 + + +class CopyIndex7TestCase(CopyIndexTestCase): + start = 0 + stop = 7 + step = 10 + + +class CopyIndex8TestCase(CopyIndexTestCase): + start = 6 + stop = -1 # Negative values means starting from the end + step = 1 + + +class CopyIndex9TestCase(CopyIndexTestCase): + start = 3 + stop = 4 + step = 1 + + +class CopyIndex10TestCase(CopyIndexTestCase): + start = 3 + stop = 4 + step = 2 + + +class CopyIndex11TestCase(CopyIndexTestCase): + start = -3 + stop = -1 + step = 2 + + +class CopyIndex12TestCase(CopyIndexTestCase): + start = -1 # Should point to the last element + stop = None # None should mean the last element (including it) + step = 1 + + +class GetItemTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00_single(self): + """Single element access (character types)""" + + # Create the array under root and name 'somearray' + a = self.charList + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + if common.verbose: + print("Original first element:", a[0], type(a[0])) + print("Read first element:", arr[0], type(arr[0])) + self.assertTrue(common.allequal(a[0], arr[0])) + self.assertEqual(type(a[0]), type(arr[0])) + + def test01_single(self): + """Single element access (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalList + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + if common.verbose: + print("Original first element:", a[0], type(a[0])) + print("Read first element:", arr[0], type(arr[0])) + self.assertEqual(a[0], arr[0]) + self.assertEqual(type(a[0]), type(arr[0])) + + def test02_range(self): + """Range element access (character types)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + if common.verbose: + print("Original elements:", a[1:4]) + print("Read elements:", arr[1:4]) + self.assertTrue(common.allequal(a[1:4], arr[1:4])) + + def test03_range(self): + """Range element access (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + if common.verbose: + print("Original elements:", a[1:4]) + print("Read elements:", arr[1:4]) + self.assertTrue(common.allequal(a[1:4], arr[1:4])) + + def test04_range(self): + """Range element access, strided (character types)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + if common.verbose: + print("Original elements:", a[1:4:2]) + print("Read elements:", arr[1:4:2]) + self.assertTrue(common.allequal(a[1:4:2], arr[1:4:2])) + + def test05_range(self): + """Range element access, strided (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + if common.verbose: + print("Original elements:", a[1:4:2]) + print("Read elements:", arr[1:4:2]) + self.assertTrue(common.allequal(a[1:4:2], arr[1:4:2])) + + def test06_negativeIndex(self): + """Negative Index element access (character types)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + if common.verbose: + print("Original last element:", a[-1]) + print("Read last element:", arr[-1]) + self.assertTrue(common.allequal(a[-1], arr[-1])) + + def test07_negativeIndex(self): + """Negative Index element access (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + if common.verbose: + print("Original before last element:", a[-2]) + print("Read before last element:", arr[-2]) + if isinstance(a[-2], np.ndarray): + self.assertTrue(common.allequal(a[-2], arr[-2])) + else: + self.assertEqual(a[-2], arr[-2]) + + def test08_negativeRange(self): + """Negative range element access (character types)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + if common.verbose: + print("Original last elements:", a[-4:-1]) + print("Read last elements:", arr[-4:-1]) + self.assertTrue(common.allequal(a[-4:-1], arr[-4:-1])) + + def test09_negativeRange(self): + """Negative range element access (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + if common.verbose: + print("Original last elements:", a[-4:-1]) + print("Read last elements:", arr[-4:-1]) + self.assertTrue(common.allequal(a[-4:-1], arr[-4:-1])) + + +class GI1NATestCase(GetItemTestCase, common.PyTablesTestCase): + title = "Rank-1 case 1" + numericalList = np.array([3]) + numericalListME = np.array([3, 2, 1, 0, 4, 5, 6]) + charList = np.array(["3"], "S") + charListME = np.array( + ["321", "221", "121", "021", "421", "521", "621"], "S" + ) + + +class GI1NAOpenTestCase(GI1NATestCase): + close = 0 + + +class GI1NACloseTestCase(GI1NATestCase): + close = 1 + + +class GI2NATestCase(GetItemTestCase): + # A more complex example + title = "Rank-1,2 case 2" + numericalList = np.array([3, 4]) + numericalListME = np.array( + [ + [3, 2, 1, 0, 4, 5, 6], + [2, 1, 0, 4, 5, 6, 7], + [4, 3, 2, 1, 0, 4, 5], + [3, 2, 1, 0, 4, 5, 6], + [3, 2, 1, 0, 4, 5, 6], + ] + ) + + charList = np.array(["a", "b"], "S") + charListME = np.array( + [ + ["321", "221", "121", "021", "421", "521", "621"], + ["21", "21", "11", "02", "42", "21", "61"], + ["31", "21", "12", "21", "41", "51", "621"], + ["321", "221", "121", "021", "421", "521", "621"], + ["3241", "2321", "13216", "0621", "4421", "5421", "a621"], + ["a321", "s221", "d121", "g021", "b421", "5vvv21", "6zxzxs21"], + ], + "S", + ) + + +class GI2NAOpenTestCase(GI2NATestCase): + close = 0 + + +class GI2NACloseTestCase(GI2NATestCase): + close = 1 + + +class SetItemTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00_single(self): + """Single element update (character types)""" + + # Create the array under root and name 'somearray' + a = self.charList + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen("a") + arr = self.h5file.root.somearray + + # Modify a single element of a and arr: + a[0] = b"b" + arr[0] = b"b" + + # Get and compare an element + if common.verbose: + print("Original first element:", a[0]) + print("Read first element:", arr[0]) + self.assertTrue(common.allequal(a[0], arr[0])) + + def test01_single(self): + """Single element update (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalList + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen("a") + arr = self.h5file.root.somearray + + # Modify elements of a and arr: + a[0] = 333 + arr[0] = 333 + + # Get and compare an element + if common.verbose: + print("Original first element:", a[0]) + print("Read first element:", arr[0]) + self.assertEqual(a[0], arr[0]) + + def test02_range(self): + """Range element update (character types)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen("a") + arr = self.h5file.root.somearray + + # Modify elements of a and arr: + a[1:3] = b"xXx" + arr[1:3] = b"xXx" + + # Get and compare an element + if common.verbose: + print("Original elements:", a[1:4]) + print("Read elements:", arr[1:4]) + self.assertTrue(common.allequal(a[1:4], arr[1:4])) + + def test03_range(self): + """Range element update (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen("a") + arr = self.h5file.root.somearray + + # Modify elements of a and arr: + s = slice(1, 3, None) + rng = np.arange(a[s].size) * 2 + 3 + rng.shape = a[s].shape + a[s] = rng + arr[s] = rng + + # Get and compare an element + if common.verbose: + print("Original elements:", a[1:4]) + print("Read elements:", arr[1:4]) + self.assertTrue(common.allequal(a[1:4], arr[1:4])) + + def test04_range(self): + """Range element update, strided (character types)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen("a") + arr = self.h5file.root.somearray + + # Modify elements of a and arr: + s = slice(1, 4, 2) + a[s] = b"xXx" + arr[s] = b"xXx" + + # Get and compare an element + if common.verbose: + print("Original elements:", a[1:4:2]) + print("Read elements:", arr[1:4:2]) + self.assertTrue(common.allequal(a[1:4:2], arr[1:4:2])) + + def test05_range(self): + """Range element update, strided (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen("a") + arr = self.h5file.root.somearray + + # Modify elements of a and arr: + s = slice(1, 4, 2) + rng = np.arange(a[s].size) * 2 + 3 + rng.shape = a[s].shape + a[s] = rng + arr[s] = rng + + # Get and compare an element + if common.verbose: + print("Original elements:", a[1:4:2]) + print("Read elements:", arr[1:4:2]) + self.assertTrue(common.allequal(a[1:4:2], arr[1:4:2])) + + def test06_negativeIndex(self): + """Negative Index element update (character types)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen("a") + arr = self.h5file.root.somearray + + # Modify elements of a and arr: + s = -1 + a[s] = b"xXx" + arr[s] = b"xXx" + + # Get and compare an element + if common.verbose: + print("Original last element:", a[-1]) + print("Read last element:", arr[-1]) + self.assertTrue(common.allequal(a[-1], arr[-1])) + + def test07_negativeIndex(self): + """Negative Index element update (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen("a") + arr = self.h5file.root.somearray + + # Modify elements of a and arr: + s = -2 + a[s] = a[s] * 2 + 3 + arr[s] = arr[s] * 2 + 3 + + # Get and compare an element + if common.verbose: + print("Original before last element:", a[-2]) + print("Read before last element:", arr[-2]) + if isinstance(a[-2], np.ndarray): + self.assertTrue(common.allequal(a[-2], arr[-2])) + else: + self.assertEqual(a[-2], arr[-2]) + + def test08_negativeRange(self): + """Negative range element update (character types)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen("a") + arr = self.h5file.root.somearray + + # Modify elements of a and arr: + s = slice(-4, -1, None) + a[s] = b"xXx" + arr[s] = b"xXx" + + # Get and compare an element + if common.verbose: + print("Original last elements:", a[-4:-1]) + print("Read last elements:", arr[-4:-1]) + self.assertTrue(common.allequal(a[-4:-1], arr[-4:-1])) + + def test09_negativeRange(self): + """Negative range element update (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen("a") + arr = self.h5file.root.somearray + + # Modify elements of a and arr: + s = slice(-3, -1, None) + rng = np.arange(a[s].size) * 2 + 3 + rng.shape = a[s].shape + a[s] = rng + arr[s] = rng + + # Get and compare an element + if common.verbose: + print("Original last elements:", a[-4:-1]) + print("Read last elements:", arr[-4:-1]) + self.assertTrue(common.allequal(a[-4:-1], arr[-4:-1])) + + def test10_outOfRange(self): + """Out of range update (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen("a") + arr = self.h5file.root.somearray + + # Modify elements of arr that are out of range: + s = slice(1, a.shape[0] + 1, None) + s2 = slice(1, 1000, None) + rng = np.arange(a[s].size) * 2 + 3 + rng.shape = a[s].shape + a[s] = rng + rng2 = np.arange(a[s2].size) * 2 + 3 + rng2.shape = a[s2].shape + arr[s2] = rng2 + + # Get and compare an element + if common.verbose: + print("Original last elements:", a[-4:-1]) + print("Read last elements:", arr[-4:-1]) + self.assertTrue(common.allequal(a[-4:-1], arr[-4:-1])) + + +class SI1NATestCase(SetItemTestCase, common.PyTablesTestCase): + title = "Rank-1 case 1" + numericalList = np.array([3]) + numericalListME = np.array([3, 2, 1, 0, 4, 5, 6]) + charList = np.array(["3"], "S") + charListME = np.array( + ["321", "221", "121", "021", "421", "521", "621"], "S" + ) + + +class SI1NAOpenTestCase(SI1NATestCase): + close = 0 + + +class SI1NACloseTestCase(SI1NATestCase): + close = 1 + + +class SI2NATestCase(SetItemTestCase): + # A more complex example + title = "Rank-1,2 case 2" + numericalList = np.array([3, 4]) + numericalListME = np.array( + [ + [3, 2, 1, 0, 4, 5, 6], + [2, 1, 0, 4, 5, 6, 7], + [4, 3, 2, 1, 0, 4, 5], + [3, 2, 1, 0, 4, 5, 6], + [3, 2, 1, 0, 4, 5, 6], + ] + ) + + charList = np.array(["a", "b"], "S") + charListME = np.array( + [ + ["321", "221", "121", "021", "421", "521", "621"], + ["21", "21", "11", "02", "42", "21", "61"], + ["31", "21", "12", "21", "41", "51", "621"], + ["321", "221", "121", "021", "421", "521", "621"], + ["3241", "2321", "13216", "0621", "4421", "5421", "a621"], + ["a321", "s221", "d121", "g021", "b421", "5vvv21", "6zxzxs21"], + ], + "S", + ) + + +class SI2NAOpenTestCase(SI2NATestCase): + close = 0 + + +class SI2NACloseTestCase(SI2NATestCase): + close = 1 + + +class GeneratorTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00a_single(self): + """Testing generator access to Arrays, single elements (char)""" + + # Create the array under root and name 'somearray' + a = self.charList + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + ga = [i for i in a] + garr = [i for i in arr] + if common.verbose: + print("Result of original iterator:", ga) + print("Result of read generator:", garr) + self.assertEqual(ga, garr) + + def test00b_me(self): + """Testing generator access to Arrays, multiple elements (char)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + ga = list(a) + garr = list(arr) + + if common.verbose: + print("Result of original iterator:", ga) + print("Result of read generator:", garr) + for x_ga, x_garr in zip(ga, garr): + self.assertTrue(common.allequal(x_ga, x_garr)) + + def test01a_single(self): + """Testing generator access to Arrays, single elements (numeric)""" + + # Create the array under root and name 'somearray' + a = self.numericalList + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + ga = [i for i in a] + garr = [i for i in arr] + if common.verbose: + print("Result of original iterator:", ga) + print("Result of read generator:", garr) + self.assertEqual(ga, garr) + + def test01b_me(self): + """Testing generator access to Arrays, multiple elements (numeric)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + if self.close: + self._reopen() + arr = self.h5file.root.somearray + + # Get and compare an element + ga = list(a) + garr = list(arr) + if common.verbose: + print("Result of original iterator:", ga) + print("Result of read generator:", garr) + for x_ga, x_garr in zip(ga, garr): + self.assertTrue(common.allequal(x_ga, x_garr)) + + +class GE1NATestCase(GeneratorTestCase): + title = "Rank-1 case 1" + numericalList = np.array([3]) + numericalListME = np.array([3, 2, 1, 0, 4, 5, 6]) + charList = np.array(["3"], "S") + charListME = np.array( + ["321", "221", "121", "021", "421", "521", "621"], "S" + ) + + +class GE1NAOpenTestCase(GE1NATestCase): + close = 0 + + +class GE1NACloseTestCase(GE1NATestCase): + close = 1 + + +class GE2NATestCase(GeneratorTestCase): + # A more complex example + title = "Rank-1,2 case 2" + numericalList = np.array([3, 4]) + numericalListME = np.array( + [ + [3, 2, 1, 0, 4, 5, 6], + [2, 1, 0, 4, 5, 6, 7], + [4, 3, 2, 1, 0, 4, 5], + [3, 2, 1, 0, 4, 5, 6], + [3, 2, 1, 0, 4, 5, 6], + ] + ) + + charList = np.array(["a", "b"], "S") + charListME = np.array( + [ + ["321", "221", "121", "021", "421", "521", "621"], + ["21", "21", "11", "02", "42", "21", "61"], + ["31", "21", "12", "21", "41", "51", "621"], + ["321", "221", "121", "021", "421", "521", "621"], + ["3241", "2321", "13216", "0621", "4421", "5421", "a621"], + ["a321", "s221", "d121", "g021", "b421", "5vvv21", "6zxzxs21"], + ], + "S", + ) + + +class GE2NAOpenTestCase(GE2NATestCase): + close = 0 + + +class GE2NACloseTestCase(GE2NATestCase): + close = 1 + + +class NonHomogeneousTestCase(common.TempFileMixin, common.PyTablesTestCase): + def test(self): + """Test for creation of non-homogeneous arrays.""" + + # This checks ticket #12. + self.assertRaises( + (ValueError, TypeError), + self.h5file.create_array, + "/", + "test", + [1, [2, 3]], + ) + self.assertRaises(tb.NoSuchNodeError, self.h5file.remove_node, "/test") + + +class TruncateTestCase(common.TempFileMixin, common.PyTablesTestCase): + def test(self): + """Test for unability to truncate Array objects.""" + + array1 = self.h5file.create_array("/", "array1", [0, 2]) + self.assertRaises(TypeError, array1.truncate, 0) + + +class PointSelectionTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + # Limits for selections + self.limits = [ + (0, 1), # just one element + (20, -10), # no elements + (-10, 4), # several elements + (0, 10), # several elements (again) + ] + + # Create a sample array + size = np.prod(self.shape) + nparr = np.arange(size, dtype=np.int32).reshape(self.shape) + self.nparr = nparr + self.tbarr = self.h5file.create_array(self.h5file.root, "array", nparr) + + def test01a_read(self): + """Test for point-selections (read, boolean keys).""" + + nparr = self.nparr + tbarr = self.tbarr + for value1, value2 in self.limits: + key = (nparr >= value1) & (nparr < value2) + if common.verbose: + print("Selection to test:", key) + a = nparr[key] + b = tbarr[key] + self.assertTrue( + np.all(a == b), + "NumPy array and PyTables selections does not match.", + ) + + def test01b_read(self): + """Test for point-selections (read, integer keys).""" + + nparr = self.nparr + tbarr = self.tbarr + for value1, value2 in self.limits: + key = np.where((nparr >= value1) & (nparr < value2)) + if common.verbose: + print("Selection to test:", key) + a = nparr[key] + b = tbarr[key] + self.assertTrue( + np.all(a == b), + "NumPy array and PyTables selections does not match.", + ) + + def test01c_read(self): + """Test for point-selections (read, float keys).""" + + nparr = self.nparr + tbarr = self.tbarr + for value1, value2 in self.limits: + key = np.where((nparr >= value1) & (nparr < value2)) + if common.verbose: + print("Selection to test:", key) + # a = nparr[key] + fkey = np.array(key, "f4") + self.assertRaises((IndexError, TypeError), tbarr.__getitem__, fkey) + + def test01d_read(self): + nparr = self.nparr + tbarr = self.tbarr + + for key in self.working_keyset: + if nparr.ndim > 1: + key = tuple(key) + if common.verbose: + print("Selection to test:", key) + a = nparr[key] + b = tbarr[key] + np.testing.assert_array_equal( + a, b, "NumPy array and PyTables selections does not match." + ) + + def test01e_read(self): + tbarr = self.tbarr + + for key in self.not_working_keyset: + if common.verbose: + print("Selection to test:", key) + + self.assertRaises(IndexError, tbarr.__getitem__, key) + + def test02a_write(self): + """Test for point-selections (write, boolean keys).""" + + nparr = self.nparr + tbarr = self.tbarr + for value1, value2 in self.limits: + key = (nparr >= value1) & (nparr < value2) + if common.verbose: + print("Selection to test:", key) + s = nparr[key] + nparr[key] = s * 2 + tbarr[key] = s * 2 + a = nparr[:] + b = tbarr[:] + self.assertTrue( + np.all(a == b), + "NumPy array and PyTables modifications does not match.", + ) + + def test02b_write(self): + """Test for point-selections (write, integer keys).""" + + nparr = self.nparr + tbarr = self.tbarr + for value1, value2 in self.limits: + key = np.where((nparr >= value1) & (nparr < value2)) + if common.verbose: + print("Selection to test:", key) + s = nparr[key] + nparr[key] = s * 2 + tbarr[key] = s * 2 + a = nparr[:] + b = tbarr[:] + self.assertTrue( + np.all(a == b), + "NumPy array and PyTables modifications does not match.", + ) + + def test02c_write(self): + """Test for point-selections (write, integer values, broadcast).""" + + nparr = self.nparr + tbarr = self.tbarr + for value1, value2 in self.limits: + key = np.where((nparr >= value1) & (nparr < value2)) + if common.verbose: + print("Selection to test:", key) + # s = nparr[key] + nparr[key] = 2 # force a broadcast + tbarr[key] = 2 # force a broadcast + a = nparr[:] + b = tbarr[:] + self.assertTrue( + np.all(a == b), + "NumPy array and PyTables modifications does not match.", + ) + + +class PointSelection0(PointSelectionTestCase): + shape = (3,) + working_keyset = [ + [0, 1], + [0, -1], + ] + not_working_keyset = [ + [0, 3], + [0, 4], + [0, -4], + ] + + +class PointSelection1(PointSelectionTestCase): + shape = (5, 3, 3) + working_keyset = [ + [(0, 0), (0, 1), (0, 0)], + [(0, 0), (0, -1), (0, 0)], + ] + not_working_keyset = [ + [(0, 0), (0, 3), (0, 0)], + [(0, 0), (0, 4), (0, 0)], + [(0, 0), (0, -4), (0, 0)], + [(0, 0), (0, -5), (0, 0)], + ] + + +class PointSelection2(PointSelectionTestCase): + shape = (7, 3) + working_keyset = [ + [(0, 0), (0, 1)], + [(0, 0), (0, -1)], + [(0, 0), (0, -2)], + ] + not_working_keyset = [ + [(0, 0), (0, 3)], + [(0, 0), (0, 4)], + [(0, 0), (0, -4)], + [(0, 0), (0, -5)], + ] + + +class PointSelection3(PointSelectionTestCase): + shape = (4, 3, 2, 1) + working_keyset = [ + [(0, 0), (0, 1), (0, 0), (0, 0)], + [(0, 0), (0, -1), (0, 0), (0, 0)], + ] + not_working_keyset = [ + [(0, 0), (0, 3), (0, 0), (0, 0)], + [(0, 0), (0, 4), (0, 0), (0, 0)], + [(0, 0), (0, -4), (0, 0), (0, 0)], + ] + + +class PointSelection4(PointSelectionTestCase): + shape = (1, 3, 2, 5, 6) + working_keyset = [ + [(0, 0), (0, 1), (0, 0), (0, 0), (0, 0)], + [(0, 0), (0, -1), (0, 0), (0, 0), (0, 0)], + ] + not_working_keyset = [ + [(0, 0), (0, 3), (0, 0), (0, 0), (0, 0)], + [(0, 0), (0, 4), (0, 0), (0, 0), (0, 0)], + [(0, 0), (0, -4), (0, 0), (0, 0), (0, 0)], + ] + + +class FancySelectionTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + + m, n, o = self.shape + + # The next are valid selections for both NumPy and PyTables + self.working_keyset = [ + ([1, 3], slice(1, n - 1), 2), + ([m - 1, 1, 3, 2], slice(None), 2), # unordered lists supported + (slice(m), [n - 1, 1, 0], slice(None)), + (slice(1, m, 3), slice(1, n), [o - 1, 1, 0]), + (m - 1, [2, 1], 1), + (1, 2, 1), # regular selection + ([1, 2], -2, -1), # negative indices + ([1, -2], 2, -1), # more negative indices + ([1, -2], 2, Ellipsis), # one ellipsis + (Ellipsis, [1, 2]), # one ellipsis + (np.array([1, -2], "i4"), 2, -1), # array 32-bit instead of list + (np.array([-1, 2], "i8"), 2, -1), # array 64-bit instead of list + ] + + # Using booleans instead of ints is deprecated since numpy 1.8 + # Tests for keys that have to support the __index__ attribute + # self.working_keyset.append( + # (False, True), # equivalent to (0,1) ;-) + # ) + + # Valid selections for NumPy, but not for PyTables (yet) + # The next should raise an IndexError + self.not_working_keyset = [ + np.array([False, True], dtype="b1"), # boolean arrays + ([1, 2, 1], 2, 1), # repeated values + ([1, 2], 2, [1, 2]), # several lists + ([], 2, 1), # empty selections + (Ellipsis, [1, 2], Ellipsis), # several ellipsis + # Using booleans instead of ints is deprecated since numpy 1.8 + ([False, True]), # boolean values with incompatible shape + ] + + # The next should raise an IndexError in both NumPy and PyTables + self.not_working_oob = [ + ([1, 2], 2, 1000), # out-of-bounds selections + ([1, 2], 2000, 1), # out-of-bounds selections + ] + + # The next should raise a IndexError in both NumPy and PyTables + self.not_working_too_many = [ + ([1, 2], 2, 1, 1), + ] + + # Create a sample array + nparr = np.empty(self.shape, dtype=np.int32) + data = np.arange(n * o, dtype=np.int32).reshape(n, o) + for i in range(m): + nparr[i] = data * i + self.nparr = nparr + self.tbarr = self.h5file.create_array(self.h5file.root, "array", nparr) + + def test01a_read(self): + """Test for fancy-selections (working selections, read).""" + + nparr = self.nparr + tbarr = self.tbarr + for key in self.working_keyset: + if common.verbose: + print("Selection to test:", key) + a = nparr[key] + b = tbarr[key] + self.assertTrue( + np.all(a == b), + "NumPy array and PyTables selections does not match.", + ) + + def test01b_read(self): + """Test for fancy-selections (not working selections, read).""" + + # nparr = self.nparr + tbarr = self.tbarr + for key in self.not_working_keyset: + if common.verbose: + print("Selection to test:", key) + # a = nparr[key] + self.assertRaises(IndexError, tbarr.__getitem__, key) + + def test01c_read(self): + """Test for fancy-selections (out-of-bound indexes, read).""" + + nparr = self.nparr + tbarr = self.tbarr + for key in self.not_working_oob: + if common.verbose: + print("Selection to test:", key) + self.assertRaises(IndexError, nparr.__getitem__, key) + self.assertRaises(IndexError, tbarr.__getitem__, key) + + def test01d_read(self): + """Test for fancy-selections (too many indexes, read).""" + + nparr = self.nparr + tbarr = self.tbarr + for key in self.not_working_too_many: + if common.verbose: + print("Selection to test:", key) + # ValueError for numpy 1.6.x and earlier + # IndexError in numpy > 1.8.0 + self.assertRaises((ValueError, IndexError), nparr.__getitem__, key) + self.assertRaises(IndexError, tbarr.__getitem__, key) + + def test02a_write(self): + """Test for fancy-selections (working selections, write).""" + + nparr = self.nparr + tbarr = self.tbarr + for key in self.working_keyset: + if common.verbose: + print("Selection to test:", key) + s = nparr[key] + nparr[key] = s * 2 + tbarr[key] = s * 2 + a = nparr[:] + b = tbarr[:] + self.assertTrue( + np.all(a == b), + "NumPy array and PyTables modifications does not match.", + ) + + def test02b_write(self): + """Test for fancy-selections (working selections, write, broadcast).""" + + nparr = self.nparr + tbarr = self.tbarr + for key in self.working_keyset: + if common.verbose: + print("Selection to test:", key) + # s = nparr[key] + nparr[key] = 2 # broadcast value + tbarr[key] = 2 # broadcast value + a = nparr[:] + b = tbarr[:] + # if common.verbose: + # print("NumPy modified array:", a) + # print("PyTables modified array:", b) + self.assertTrue( + np.all(a == b), + "NumPy array and PyTables modifications does not match.", + ) + + +class FancySelection1(FancySelectionTestCase): + shape = (5, 3, 3) # Minimum values + + +class FancySelection2(FancySelectionTestCase): + # shape = (5, 3, 3) # Minimum values + shape = (7, 3, 3) + + +class FancySelection3(FancySelectionTestCase): + # shape = (5, 3, 3) # Minimum values + shape = (7, 4, 5) + + +class FancySelection4(FancySelectionTestCase): + # shape = (5, 3, 3) # Minimum values + shape = (5, 3, 10) + + +class CopyNativeHDF5MDAtom(common.PyTablesTestCase): + + def setUp(self): + super().setUp() + filename = common.test_filename("array_mdatom.h5") + self.h5file = tb.open_file(filename, "r") + self.arr = self.h5file.root.arr + self.copy = tempfile.mktemp(".h5") + self.copyh = tb.open_file(self.copy, mode="w") + self.arr2 = self.arr.copy(self.copyh.root, newname="arr2") + + def tearDown(self): + self.h5file.close() + self.copyh.close() + Path(self.copy).unlink() + super().tearDown() + + def test01_copy(self): + """Checking that native MD atoms are copied as-is""" + + self.assertEqual(self.arr.atom, self.arr2.atom) + self.assertEqual(self.arr.shape, self.arr2.shape) + + def test02_reopen(self): + """Checking that native MD atoms are copied as-is (re-open)""" + + self.copyh.close() + self.copyh = tb.open_file(self.copy, mode="r") + self.arr2 = self.copyh.root.arr2 + self.assertEqual(self.arr.atom, self.arr2.atom) + self.assertEqual(self.arr.shape, self.arr2.shape) + + +class AccessClosedTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + + a = np.zeros((10, 10)) + self.array = self.h5file.create_array(self.h5file.root, "array", a) + + def test_read(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.read) + + def test_getitem(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.__getitem__, 0) + + def test_setitem(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.__setitem__, 0, 0) + + +class BroadcastTest(common.TempFileMixin, common.PyTablesTestCase): + + def test(self): + """Test correct broadcasting when the array atom is not scalar.""" + + array_shape = (2, 3) + element_shape = (3,) + + dtype = np.dtype((np.int64, element_shape)) + atom = tb.Atom.from_dtype(dtype) + h5arr = self.h5file.create_array( + self.h5file.root, "array", atom=atom, shape=array_shape + ) + + size = np.prod(element_shape) + nparr = np.arange(size).reshape(element_shape) + + h5arr[0] = nparr + self.assertTrue(np.all(h5arr[0] == nparr)) + + +class TestCreateArrayArgs(common.TempFileMixin, common.PyTablesTestCase): + where = "/" + name = "array" + obj = np.array([[1, 2], [3, 4]]) + title = "title" + byteorder = None + createparents = False + atom = tb.Atom.from_dtype(obj.dtype) + shape = obj.shape + + def test_positional_args(self): + self.h5file.create_array(self.where, self.name, self.obj, self.title) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_positional_args_atom_shape(self): + self.h5file.create_array( + self.where, + self.name, + None, + self.title, + self.byteorder, + self.createparents, + self.atom, + self.shape, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(np.zeros_like(self.obj), nparr)) + + def test_kwargs_obj(self): + self.h5file.create_array( + self.where, self.name, title=self.title, obj=self.obj + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_atom_shape_01(self): + ptarr = self.h5file.create_array( + self.where, + self.name, + title=self.title, + atom=self.atom, + shape=self.shape, + ) + ptarr[...] = self.obj + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_atom_shape_02(self): + ptarr = self.h5file.create_array( + self.where, + self.name, + title=self.title, + atom=self.atom, + shape=self.shape, + ) + # ptarr[...] = self.obj + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(np.zeros_like(self.obj), nparr)) + + def test_kwargs_obj_atom(self): + ptarr = self.h5file.create_array( + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=self.atom, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj_shape(self): + ptarr = self.h5file.create_array( + self.where, + self.name, + title=self.title, + obj=self.obj, + shape=self.shape, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj_atom_shape(self): + ptarr = self.h5file.create_array( + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=self.atom, + shape=self.shape, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj_atom_error(self): + atom = tb.Atom.from_dtype(np.dtype("complex")) + # shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_array, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=atom, + ) + + def test_kwargs_obj_shape_error(self): + # atom = Atom.from_dtype(np.dtype('complex')) + shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_array, + self.where, + self.name, + title=self.title, + obj=self.obj, + shape=shape, + ) + + def test_kwargs_obj_atom_shape_error_01(self): + atom = tb.Atom.from_dtype(np.dtype("complex")) + # shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_array, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=atom, + shape=self.shape, + ) + + def test_kwargs_obj_atom_shape_error_02(self): + # atom = Atom.from_dtype(numpy.dtype('complex')) + shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_array, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=self.atom, + shape=shape, + ) + + def test_kwargs_obj_atom_shape_error_03(self): + atom = tb.Atom.from_dtype(np.dtype("complex")) + shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_array, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=atom, + shape=shape, + ) + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + + for i in range(niter): + # The scalar case test should be refined in order to work + theSuite.addTest(common.make_suite(Basic0DOneTestCase)) + theSuite.addTest(common.make_suite(Basic0DTwoTestCase)) + # theSuite.addTest(make_suite(Basic1DZeroTestCase)) + theSuite.addTest(common.make_suite(Basic1DOneTestCase)) + theSuite.addTest(common.make_suite(Basic1DTwoTestCase)) + theSuite.addTest(common.make_suite(Basic1DThreeTestCase)) + theSuite.addTest(common.make_suite(Basic2DOneTestCase)) + theSuite.addTest(common.make_suite(Basic2DTwoTestCase)) + theSuite.addTest(common.make_suite(Basic10DTestCase)) + # The 32 dimensions case is tested on GroupsArray + # theSuite.addTest(make_suite(Basic32DTestCase)) + theSuite.addTest(common.make_suite(ReadOutArgumentTests)) + theSuite.addTest(common.make_suite(SizeOnDiskInMemoryPropertyTestCase)) + theSuite.addTest(common.make_suite(GroupsArrayTestCase)) + theSuite.addTest(common.make_suite(ComplexNotReopenNotEndianTestCase)) + theSuite.addTest(common.make_suite(ComplexReopenNotEndianTestCase)) + theSuite.addTest(common.make_suite(ComplexNotReopenEndianTestCase)) + theSuite.addTest(common.make_suite(ComplexReopenEndianTestCase)) + theSuite.addTest(common.make_suite(CloseCopyTestCase)) + theSuite.addTest(common.make_suite(OpenCopyTestCase)) + theSuite.addTest(common.make_suite(CopyIndex1TestCase)) + theSuite.addTest(common.make_suite(CopyIndex2TestCase)) + theSuite.addTest(common.make_suite(CopyIndex3TestCase)) + theSuite.addTest(common.make_suite(CopyIndex4TestCase)) + theSuite.addTest(common.make_suite(CopyIndex5TestCase)) + theSuite.addTest(common.make_suite(CopyIndex6TestCase)) + theSuite.addTest(common.make_suite(CopyIndex7TestCase)) + theSuite.addTest(common.make_suite(CopyIndex8TestCase)) + theSuite.addTest(common.make_suite(CopyIndex9TestCase)) + theSuite.addTest(common.make_suite(CopyIndex10TestCase)) + theSuite.addTest(common.make_suite(CopyIndex11TestCase)) + theSuite.addTest(common.make_suite(CopyIndex12TestCase)) + theSuite.addTest(common.make_suite(GI1NAOpenTestCase)) + theSuite.addTest(common.make_suite(GI1NACloseTestCase)) + theSuite.addTest(common.make_suite(GI2NAOpenTestCase)) + theSuite.addTest(common.make_suite(GI2NACloseTestCase)) + theSuite.addTest(common.make_suite(SI1NAOpenTestCase)) + theSuite.addTest(common.make_suite(SI1NACloseTestCase)) + theSuite.addTest(common.make_suite(SI2NAOpenTestCase)) + theSuite.addTest(common.make_suite(SI2NACloseTestCase)) + theSuite.addTest(common.make_suite(GE1NAOpenTestCase)) + theSuite.addTest(common.make_suite(GE1NACloseTestCase)) + theSuite.addTest(common.make_suite(GE2NAOpenTestCase)) + theSuite.addTest(common.make_suite(GE2NACloseTestCase)) + theSuite.addTest(common.make_suite(NonHomogeneousTestCase)) + theSuite.addTest(common.make_suite(TruncateTestCase)) + theSuite.addTest(common.make_suite(FancySelection1)) + theSuite.addTest(common.make_suite(FancySelection2)) + theSuite.addTest(common.make_suite(FancySelection3)) + theSuite.addTest(common.make_suite(FancySelection4)) + theSuite.addTest(common.make_suite(PointSelection0)) + theSuite.addTest(common.make_suite(PointSelection1)) + theSuite.addTest(common.make_suite(PointSelection2)) + theSuite.addTest(common.make_suite(PointSelection3)) + theSuite.addTest(common.make_suite(PointSelection4)) + theSuite.addTest(common.make_suite(CopyNativeHDF5MDAtom)) + theSuite.addTest(common.make_suite(AccessClosedTestCase)) + theSuite.addTest(common.make_suite(TestCreateArrayArgs)) + theSuite.addTest(common.make_suite(BroadcastTest)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_attributes.py b/venv/Lib/site-packages/tables/tests/test_attributes.py new file mode 100644 index 0000000..c16e921 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_attributes.py @@ -0,0 +1,2049 @@ +"""This test unit checks node attributes that are persistent (AttributeSet).""" + +import sys +import datetime +import warnings + +import numpy as np +from packaging.version import Version + +import tables as tb +from tables.tests import common + + +class Record(tb.IsDescription): + var1 = tb.StringCol(itemsize=4) # 4-character String + var2 = tb.IntCol() # integer + var3 = tb.Int16Col() # short integer + var4 = tb.FloatCol() # double (double-precision) + var5 = tb.Float32Col() # float (single-precision) + + +class CreateTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + self.root = self.h5file.root + + # Create a table object + self.table = self.h5file.create_table( + self.root, "atable", Record, "Table title" + ) + # Create an array object + self.array = self.h5file.create_array( + self.root, "anarray", [1], "Array title" + ) + # Create a group object + self.group = self.h5file.create_group( + self.root, "agroup", "Group title" + ) + + def test01_setAttributes(self): + """Checking setting large string attributes (File methods)""" + + attrlength = 2048 + # Try to put a long string attribute on a group object + self.h5file.set_node_attr(self.root.agroup, "attr1", "p" * attrlength) + + # Now, try with a Table object + self.h5file.set_node_attr(self.root.atable, "attr1", "a" * attrlength) + + # Finally, try with an Array object + self.h5file.set_node_attr(self.root.anarray, "attr1", "n" * attrlength) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + self.assertEqual( + self.h5file.get_node_attr(self.root.agroup, "attr1"), + "p" * attrlength, + ) + self.assertEqual( + self.h5file.get_node_attr(self.root.atable, "attr1"), + "a" * attrlength, + ) + self.assertEqual( + self.h5file.get_node_attr(self.root.anarray, "attr1"), + "n" * attrlength, + ) + + def reopen(self): + # Reopen + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + def check_missing(self, name): + self.reopen() + self.assertNotIn(name, self.root.agroup._v_attrs) + self.assertNotIn(name, self.root.atable.attrs) + self.assertNotIn(name, self.root.anarray.attrs) + + def check_name(self, name, val=""): + """Check validity of attribute name filtering""" + self.check_missing(name) + # Using File methods + self.h5file.set_node_attr(self.root.agroup, name, val) + self.h5file.set_node_attr(self.root.atable, name, val) + self.h5file.set_node_attr(self.root.anarray, name, val) + # Check File methods + self.reopen() + self.assertEqual( + self.h5file.get_node_attr(self.root.agroup, name), val + ) + self.assertEqual( + self.h5file.get_node_attr(self.root.atable, name), val + ) + self.assertEqual( + self.h5file.get_node_attr(self.root.anarray, name), val + ) + # Remove, file methods + self.h5file.del_node_attr(self.root.agroup, name) + self.h5file.del_node_attr(self.root.atable, name) + self.h5file.del_node_attr(self.root.anarray, name) + self.check_missing(name) + + # Using Node methods + self.root.agroup._f_setattr(name, val) + self.root.atable.set_attr(name, val) + self.root.anarray.set_attr(name, val) + # Check Node methods + self.reopen() + self.assertEqual(self.root.agroup._f_getattr(name), val) + self.assertEqual(self.root.atable.get_attr(name), val) + self.assertEqual(self.root.anarray.get_attr(name), val) + self.root.agroup._f_delattr(name) + self.root.atable.del_attr(name) + self.root.anarray.del_attr(name) + self.check_missing(name) + + # Using AttributeSet methods + setattr(self.root.agroup._v_attrs, name, val) + setattr(self.root.atable.attrs, name, val) + setattr(self.root.anarray.attrs, name, val) + # Check AttributeSet methods + self.reopen() + self.assertEqual(getattr(self.root.agroup._v_attrs, name), val) + self.assertEqual(getattr(self.root.atable.attrs, name), val) + self.assertEqual(getattr(self.root.anarray.attrs, name), val) + delattr(self.root.agroup._v_attrs, name) + delattr(self.root.atable.attrs, name) + delattr(self.root.anarray.attrs, name) + self.check_missing(name) + + # Using dict [] + self.root.agroup._v_attrs[name] = val + self.root.atable.attrs[name] = val + self.root.anarray.attrs[name] = val + # Check dict [] + self.reopen() + self.assertEqual(self.root.agroup._v_attrs[name], val) + self.assertEqual(self.root.atable.attrs[name], val) + self.assertEqual(self.root.anarray.attrs[name], val) + del self.root.agroup._v_attrs[name] + del self.root.atable.attrs[name] + del self.root.anarray.attrs[name] + self.check_missing(name) + + def test01a_setAttributes(self): + """Checking attribute names validity""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore", tb.NaturalNameWarning) + self.check_name("a") + self.check_name("a:b") + self.check_name("/a/b") + self.check_name(".") + self.assertRaises(ValueError, self.check_name, "") + self.assertRaises(ValueError, self.check_name, "__members__") + self.assertRaises(TypeError, self.check_name, 0) + + def test02_setAttributes(self): + """Checking setting large string attributes (Node methods)""" + + attrlength = 2048 + # Try to put a long string attribute on a group object + self.root.agroup._f_setattr("attr1", "p" * attrlength) + # Now, try with a Table object + self.root.atable.set_attr("attr1", "a" * attrlength) + + # Finally, try with an Array object + self.root.anarray.set_attr("attr1", "n" * attrlength) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + self.assertEqual( + self.root.agroup._f_getattr("attr1"), "p" * attrlength + ) + self.assertEqual(self.root.atable.get_attr("attr1"), "a" * attrlength) + self.assertEqual(self.root.anarray.get_attr("attr1"), "n" * attrlength) + + def test03_setAttributes(self): + """Checking setting large string attributes (AttributeSet methods)""" + + attrlength = 2048 + # Try to put a long string attribute on a group object + self.group._v_attrs.attr1 = "p" * attrlength + # Now, try with a Table object + self.table.attrs.attr1 = "a" * attrlength + # Finally, try with an Array object + self.array.attrs.attr1 = "n" * attrlength + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + # This should work even when the node cache is disabled + self.assertEqual(self.root.agroup._v_attrs.attr1, "p" * attrlength) + self.assertEqual(self.root.atable.attrs.attr1, "a" * attrlength) + self.assertEqual(self.root.anarray.attrs.attr1, "n" * attrlength) + + def test04_listAttributes(self): + """Checking listing attributes.""" + + # With a Group object + self.group._v_attrs.pq = "1" + self.group._v_attrs.qr = "2" + self.group._v_attrs.rs = "3" + if common.verbose: + print("Attribute list:", self.group._v_attrs._f_list()) + + # Now, try with a Table object + self.table.attrs.a = "1" + self.table.attrs.c = "2" + self.table.attrs.b = "3" + if common.verbose: + print("Attribute list:", self.table.attrs._f_list()) + + # Finally, try with an Array object + self.array.attrs.k = "1" + self.array.attrs.j = "2" + self.array.attrs.i = "3" + if common.verbose: + print("Attribute list:", self.array.attrs._f_list()) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + agroup = self.root.agroup + self.assertEqual(agroup._v_attrs._f_list("user"), ["pq", "qr", "rs"]) + self.assertEqual( + agroup._v_attrs._f_list("sys"), ["CLASS", "TITLE", "VERSION"] + ) + self.assertEqual( + agroup._v_attrs._f_list("all"), + ["CLASS", "TITLE", "VERSION", "pq", "qr", "rs"], + ) + + atable = self.root.atable + self.assertEqual(atable.attrs._f_list(), ["a", "b", "c"]) + self.assertEqual( + atable.attrs._f_list("sys"), + [ + "CLASS", + "FIELD_0_FILL", + "FIELD_0_NAME", + "FIELD_1_FILL", + "FIELD_1_NAME", + "FIELD_2_FILL", + "FIELD_2_NAME", + "FIELD_3_FILL", + "FIELD_3_NAME", + "FIELD_4_FILL", + "FIELD_4_NAME", + "NROWS", + "TITLE", + "VERSION", + ], + ) + self.assertEqual( + atable.attrs._f_list("all"), + [ + "CLASS", + "FIELD_0_FILL", + "FIELD_0_NAME", + "FIELD_1_FILL", + "FIELD_1_NAME", + "FIELD_2_FILL", + "FIELD_2_NAME", + "FIELD_3_FILL", + "FIELD_3_NAME", + "FIELD_4_FILL", + "FIELD_4_NAME", + "NROWS", + "TITLE", + "VERSION", + "a", + "b", + "c", + ], + ) + + anarray = self.root.anarray + self.assertEqual(anarray.attrs._f_list(), ["i", "j", "k"]) + self.assertEqual( + anarray.attrs._f_list("sys"), + ["CLASS", "FLAVOR", "TITLE", "VERSION"], + ) + self.assertEqual( + anarray.attrs._f_list("all"), + ["CLASS", "FLAVOR", "TITLE", "VERSION", "i", "j", "k"], + ) + + def test05_removeAttributes(self): + """Checking removing attributes.""" + + # With a Group object + self.group._v_attrs.pq = "1" + self.group._v_attrs.qr = "2" + self.group._v_attrs.rs = "3" + # delete an attribute + del self.group._v_attrs.pq + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + agroup = self.root.agroup + if common.verbose: + print("Attribute list:", agroup._v_attrs._f_list()) + # Check the local attributes names + self.assertEqual(agroup._v_attrs._f_list(), ["qr", "rs"]) + if common.verbose: + print("Attribute list in disk:", agroup._v_attrs._f_list("all")) + # Check the disk attribute names + self.assertEqual( + agroup._v_attrs._f_list("all"), + ["CLASS", "TITLE", "VERSION", "qr", "rs"], + ) + + # delete an attribute (__delattr__ method) + del agroup._v_attrs.qr + if common.verbose: + print("Attribute list:", agroup._v_attrs._f_list()) + # Check the local attributes names + self.assertEqual(agroup._v_attrs._f_list(), ["rs"]) + if common.verbose: + print("Attribute list in disk:", agroup._v_attrs._f_list()) + # Check the disk attribute names + self.assertEqual( + agroup._v_attrs._f_list("all"), ["CLASS", "TITLE", "VERSION", "rs"] + ) + + def test05b_removeAttributes(self): + """Checking removing attributes (using File.del_node_attr())""" + + # With a Group object + self.group._v_attrs.pq = "1" + self.group._v_attrs.qr = "2" + self.group._v_attrs.rs = "3" + # delete an attribute + self.h5file.del_node_attr(self.group, "pq") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + agroup = self.root.agroup + if common.verbose: + print("Attribute list:", agroup._v_attrs._f_list()) + # Check the local attributes names + self.assertEqual(agroup._v_attrs._f_list(), ["qr", "rs"]) + if common.verbose: + print("Attribute list in disk:", agroup._v_attrs._f_list("all")) + # Check the disk attribute names + self.assertEqual( + agroup._v_attrs._f_list("all"), + ["CLASS", "TITLE", "VERSION", "qr", "rs"], + ) + + # delete an attribute (File.del_node_attr method) + self.h5file.del_node_attr(self.root, "qr", "agroup") + if common.verbose: + print("Attribute list:", agroup._v_attrs._f_list()) + # Check the local attributes names + self.assertEqual(agroup._v_attrs._f_list(), ["rs"]) + if common.verbose: + print("Attribute list in disk:", agroup._v_attrs._f_list()) + # Check the disk attribute names + self.assertEqual( + agroup._v_attrs._f_list("all"), ["CLASS", "TITLE", "VERSION", "rs"] + ) + + def test06_removeAttributes(self): + """Checking removing system attributes.""" + + # remove a system attribute + if common.verbose: + print("Before removing CLASS attribute") + print("System attrs:", self.group._v_attrs._v_attrnamessys) + del self.group._v_attrs.CLASS + self.assertEqual( + self.group._v_attrs._f_list("sys"), ["TITLE", "VERSION"] + ) + if common.verbose: + print("After removing CLASS attribute") + print("System attrs:", self.group._v_attrs._v_attrnamessys) + + def test07_renameAttributes(self): + """Checking renaming attributes.""" + + # With a Group object + self.group._v_attrs.pq = "1" + self.group._v_attrs.qr = "2" + self.group._v_attrs.rs = "3" + # rename an attribute + self.group._v_attrs._f_rename("pq", "op") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + agroup = self.root.agroup + if common.verbose: + print("Attribute list:", agroup._v_attrs._f_list()) + # Check the local attributes names (alphabetically sorted) + self.assertEqual(agroup._v_attrs._f_list(), ["op", "qr", "rs"]) + if common.verbose: + print("Attribute list in disk:", agroup._v_attrs._f_list("all")) + # Check the disk attribute names (not sorted) + self.assertEqual( + agroup._v_attrs._f_list("all"), + ["CLASS", "TITLE", "VERSION", "op", "qr", "rs"], + ) + + def test08_renameAttributes(self): + """Checking renaming system attributes.""" + + if common.verbose: + print("Before renaming CLASS attribute") + print("All attrs:", self.group._v_attrs._v_attrnames) + # rename a system attribute + self.group._v_attrs._f_rename("CLASS", "op") + if common.verbose: + print("After renaming CLASS attribute") + print("All attrs:", self.group._v_attrs._v_attrnames) + + # Check the disk attribute names (not sorted) + agroup = self.root.agroup + self.assertEqual( + agroup._v_attrs._f_list("all"), ["TITLE", "VERSION", "op"] + ) + + def test09_overwriteAttributes(self): + """Checking overwriting attributes.""" + + # With a Group object + self.group._v_attrs.pq = "1" + self.group._v_attrs.qr = "2" + self.group._v_attrs.rs = "3" + # overwrite attributes + self.group._v_attrs.pq = "4" + self.group._v_attrs.qr = 2 + self.group._v_attrs.rs = [1, 2, 3] + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + agroup = self.root.agroup + if common.verbose: + print("Value of Attribute pq:", agroup._v_attrs.pq) + # Check the local attributes names (alphabetically sorted) + self.assertEqual(agroup._v_attrs.pq, "4") + self.assertEqual(agroup._v_attrs.qr, 2) + self.assertEqual(agroup._v_attrs.rs, [1, 2, 3]) + if common.verbose: + print("Attribute list in disk:", agroup._v_attrs._f_list("all")) + # Check the disk attribute names (not sorted) + self.assertEqual( + agroup._v_attrs._f_list("all"), + ["CLASS", "TITLE", "VERSION", "pq", "qr", "rs"], + ) + + def test10a_copyAttributes(self): + """Checking copying attributes.""" + + # With a Group object + self.group._v_attrs.pq = "1" + self.group._v_attrs.qr = "2" + self.group._v_attrs.rs = "3" + # copy all attributes from "/agroup" to "/atable" + self.group._v_attrs._f_copy(self.root.atable) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + atable = self.root.atable + if common.verbose: + print("Attribute list:", atable._v_attrs._f_list()) + # Check the local attributes names (alphabetically sorted) + self.assertEqual(atable._v_attrs._f_list(), ["pq", "qr", "rs"]) + if common.verbose: + print("Complete attribute list:", atable._v_attrs._f_list("all")) + # Check the disk attribute names (not sorted) + self.assertEqual( + atable._v_attrs._f_list("all"), + [ + "CLASS", + "FIELD_0_FILL", + "FIELD_0_NAME", + "FIELD_1_FILL", + "FIELD_1_NAME", + "FIELD_2_FILL", + "FIELD_2_NAME", + "FIELD_3_FILL", + "FIELD_3_NAME", + "FIELD_4_FILL", + "FIELD_4_NAME", + "NROWS", + "TITLE", + "VERSION", + "pq", + "qr", + "rs", + ], + ) + + def test10b_copyAttributes(self): + """Checking copying attributes (copy_node_attrs)""" + + # With a Group object + self.group._v_attrs.pq = "1" + self.group._v_attrs.qr = "2" + self.group._v_attrs.rs = "3" + # copy all attributes from "/agroup" to "/atable" + self.h5file.copy_node_attrs(self.group, self.root.atable) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + atable = self.root.atable + if common.verbose: + print("Attribute list:", atable._v_attrs._f_list()) + # Check the local attributes names (alphabetically sorted) + self.assertEqual(atable._v_attrs._f_list(), ["pq", "qr", "rs"]) + if common.verbose: + print("Complete attribute list:", atable._v_attrs._f_list("all")) + # Check the disk attribute names (not sorted) + self.assertEqual( + atable._v_attrs._f_list("all"), + [ + "CLASS", + "FIELD_0_FILL", + "FIELD_0_NAME", + "FIELD_1_FILL", + "FIELD_1_NAME", + "FIELD_2_FILL", + "FIELD_2_NAME", + "FIELD_3_FILL", + "FIELD_3_NAME", + "FIELD_4_FILL", + "FIELD_4_NAME", + "NROWS", + "TITLE", + "VERSION", + "pq", + "qr", + "rs", + ], + ) + + def test10c_copyAttributes(self): + """Checking copying attributes during group copies.""" + + # With a Group object + self.group._v_attrs["CLASS"] = "GROUP2" + self.group._v_attrs["VERSION"] = "1.3" + # copy "/agroup" to "/agroup2" + self.h5file.copy_node(self.group, self.root, "agroup2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + agroup2 = self.root.agroup2 + if common.verbose: + print("Complete attribute list:", agroup2._v_attrs._f_list("all")) + self.assertEqual(agroup2._v_attrs["CLASS"], "GROUP2") + self.assertEqual(agroup2._v_attrs["VERSION"], "1.3") + + def test10d_copyAttributes(self): + """Checking copying attributes during leaf copies.""" + + # With a Group object + atable = self.root.atable + atable._v_attrs["CLASS"] = "TABLE2" + atable._v_attrs["VERSION"] = "1.3" + # copy "/agroup" to "/agroup2" + self.h5file.copy_node(atable, self.root, "atable2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.root = self.h5file.root + + atable2 = self.root.atable2 + if common.verbose: + print("Complete attribute list:", atable2._v_attrs._f_list("all")) + self.assertEqual(atable2._v_attrs["CLASS"], "TABLE2") + self.assertEqual(atable2._v_attrs["VERSION"], "1.3") + + def test11a_getitem(self): + """Checking the __getitem__ interface.""" + + attrs = self.group._v_attrs + attrs.pq = "1" + self.assertEqual(attrs["pq"], "1") + + def test11b_setitem(self): + """Checking the __setitem__ interface.""" + + attrs = self.group._v_attrs + attrs["pq"] = "2" + self.assertEqual(attrs["pq"], "2") + + def test11c_delitem(self): + """Checking the __delitem__ interface.""" + + attrs = self.group._v_attrs + attrs.pq = "1" + del attrs["pq"] + self.assertNotIn("pq", attrs._f_list()) + + def test11d_KeyError(self): + """Checking that KeyError is raised in __getitem__/__delitem__.""" + + attrs = self.group._v_attrs + self.assertRaises(KeyError, attrs.__getitem__, "pq") + self.assertRaises(KeyError, attrs.__delitem__, "pq") + + def test_2d_non_contiguous(self): + """Checking setting 2D and non-contiguous NumPy attributes""" + + # Regression for gh-176 numpy. + # In the views old implementation PyTAbles performs a copy of the + # array: + # + # value = np.array(value) + # + # in order to get a contiguous array. + # Unfortunately array with swapped axis are copied as they are so + # they are stored in to HDF5 attributes without being actually + # contiguous and ths causes an error whn they are restored. + + data = np.array([[0, 1], [2, 3]]) + + self.array.attrs["a"] = data + self.array.attrs["b"] = data.T.copy() + self.array.attrs["c"] = data.T + + np.testing.assert_array_equal(self.array.attrs["a"], data) + np.testing.assert_array_equal(self.array.attrs["b"], data.T) + # AssertionError: + np.testing.assert_array_equal(self.array.attrs["c"], data.T) + + def test12_dir(self): + """Checking AttributeSet.__dir__""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test12_dir..." % self.__class__.__name__) + + attrset = self.group._v_attrs + + user_attr = "good_attr" + sys_attr = "BETTER_ATTR" + for a in [user_attr, sys_attr]: + attrset[a] = 1 + + bad_user = "5bad" + bad_sys = "SYS%" + for a in [bad_user, bad_sys]: + with warnings.catch_warnings(): + warnings.simplefilter("ignore", tb.NaturalNameWarning) + attrset[a] = 1 + + completions = dir(attrset) + + # Check some regular attributes. + self.assertIn("__class__", completions) + self.assertIn("_f_copy", completions) + self.assertEqual(completions.count("_f_copy"), 1) + + # Check SYS attrs. + self.assertNotIn(bad_sys, completions) + self.assertIn(sys_attr, completions) + self.assertEqual(completions.count(sys_attr), 1) + + # Check USER attrs. + self.assertIn(user_attr, completions) + self.assertNotIn(bad_user, completions) + self.assertEqual(completions.count(user_attr), 1) + + # Now check all for no duplicates. + self.assertSequenceEqual(sorted(set(completions)), sorted(completions)) + + +class NotCloseCreate(CreateTestCase): + close = False + node_cache_slots = tb.parameters.NODE_CACHE_SLOTS + open_kwargs = dict(node_cache_slots=node_cache_slots) + + +class CloseCreate(CreateTestCase): + close = True + node_cache_slots = tb.parameters.NODE_CACHE_SLOTS + open_kwargs = dict(node_cache_slots=node_cache_slots) + + +class NoCacheNotCloseCreate(CreateTestCase): + close = False + node_cache_slots = 0 + open_kwargs = dict(node_cache_slots=node_cache_slots) + + +class NoCacheCloseCreate(CreateTestCase): + close = True + node_cache_slots = 0 + open_kwargs = dict(node_cache_slots=node_cache_slots) + + +class DictCacheNotCloseCreate(CreateTestCase): + close = False + node_cache_slots = -tb.parameters.NODE_CACHE_SLOTS + open_kwargs = dict(node_cache_slots=node_cache_slots) + + +class DictCacheCloseCreate(CreateTestCase): + close = True + node_cache_slots = -tb.parameters.NODE_CACHE_SLOTS + open_kwargs = dict(node_cache_slots=node_cache_slots) + + +class TypesTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + self.open_kwargs = {"allow_padding": self.allow_padding} + super().setUp() + self.root = self.h5file.root + + # Create an array object + self.array = self.h5file.create_array( + self.root, "anarray", [1], "Array title" + ) + # Create a group object + self.group = self.h5file.create_group( + self.root, "agroup", "Group title" + ) + + def test00a_setBoolAttributes(self): + """Checking setting Bool attributes (scalar, Python case)""" + + self.array.attrs.pq = True + self.array.attrs.qr = False + self.array.attrs.rs = True + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertEqual(self.root.anarray.attrs.pq, True) + self.assertEqual(self.root.anarray.attrs.qr, False) + self.assertEqual(self.root.anarray.attrs.rs, True) + + def test00b_setBoolAttributes(self): + """Checking setting Bool attributes (scalar, NumPy case)""" + + self.array.attrs.pq = np.bool_(True) + self.array.attrs.qr = np.bool_(False) + self.array.attrs.rs = np.bool_(True) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertIsInstance(self.root.anarray.attrs.pq, np.bool_) + self.assertIsInstance(self.root.anarray.attrs.qr, np.bool_) + self.assertIsInstance(self.root.anarray.attrs.rs, np.bool_) + self.assertEqual(self.root.anarray.attrs.pq, True) + self.assertEqual(self.root.anarray.attrs.qr, False) + self.assertEqual(self.root.anarray.attrs.rs, True) + + def test00c_setBoolAttributes(self): + """Checking setting Bool attributes (NumPy, 0-dim case)""" + + self.array.attrs.pq = np.array(True) + self.array.attrs.qr = np.array(False) + self.array.attrs.rs = np.array(True) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertEqual(self.root.anarray.attrs.pq, True) + self.assertEqual(self.root.anarray.attrs.qr, False) + self.assertEqual(self.root.anarray.attrs.rs, True) + + def test00d_setBoolAttributes(self): + """Checking setting Bool attributes (NumPy, multidim case)""" + + self.array.attrs.pq = np.array([True]) + self.array.attrs.qr = np.array([[False]]) + self.array.attrs.rs = np.array([[True, False], [True, False]]) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + np.testing.assert_array_equal( + self.root.anarray.attrs.pq, np.array([True]) + ) + np.testing.assert_array_equal( + self.root.anarray.attrs.qr, np.array([[False]]) + ) + np.testing.assert_array_equal( + self.root.anarray.attrs.rs, + np.array([[True, False], [True, False]]), + ) + + def test01a_setIntAttributes(self): + """Checking setting Int attributes (scalar, Python case)""" + + self.array.attrs.pq = 1 + self.array.attrs.qr = 2 + self.array.attrs.rs = 3 + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertIsInstance(self.root.anarray.attrs.pq, np.int_) + self.assertIsInstance(self.root.anarray.attrs.qr, np.int_) + self.assertIsInstance(self.root.anarray.attrs.rs, np.int_) + self.assertEqual(self.root.anarray.attrs.pq, 1) + self.assertEqual(self.root.anarray.attrs.qr, 2) + self.assertEqual(self.root.anarray.attrs.rs, 3) + + def test01b_setIntAttributes(self): + """Checking setting Int attributes (scalar, NumPy case)""" + + # 'UInt64' not supported on Win + checktypes = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + ] + + for dtype in checktypes: + setattr(self.array.attrs, dtype, np.array(1, dtype=dtype)) + + # Check the results + if common.verbose: + for dtype in checktypes: + print( + "type, value-->", dtype, getattr(self.array.attrs, dtype) + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + for dtype in checktypes: + np.testing.assert_array_equal( + getattr(self.array.attrs, dtype), np.array(1, dtype=dtype) + ) + + def test01c_setIntAttributes(self): + """Checking setting Int attributes (unidimensional NumPy case)""" + + # 'UInt64' not supported on Win + checktypes = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + ] + + for dtype in checktypes: + setattr(self.array.attrs, dtype, np.array([1, 2], dtype=dtype)) + + # Check the results + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + for dtype in checktypes: + if common.verbose: + print( + "type, value-->", dtype, getattr(self.array.attrs, dtype) + ) + np.testing.assert_array_equal( + getattr(self.array.attrs, dtype), np.array([1, 2], dtype=dtype) + ) + + def test01d_setIntAttributes(self): + """Checking setting Int attributes (unidimensional, non-contiguous)""" + + # 'UInt64' not supported on Win + checktypes = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + ] + + for dtype in checktypes: + arr = np.array([1, 2, 3, 4], dtype=dtype)[::2] + setattr(self.array.attrs, dtype, arr) + + # Check the results + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + for dtype in checktypes: + arr = np.array([1, 2, 3, 4], dtype=dtype)[::2] + if common.verbose: + print( + "type, value-->", dtype, getattr(self.array.attrs, dtype) + ) + np.testing.assert_array_equal( + getattr(self.array.attrs, dtype), arr + ) + + def test01e_setIntAttributes(self): + """Checking setting Int attributes (bidimensional NumPy case)""" + + # 'UInt64' not supported on Win + checktypes = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + ] + + for dtype in checktypes: + setattr( + self.array.attrs, + dtype, + np.array([[1, 2], [2, 3]], dtype=dtype), + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + # Check the results + for dtype in checktypes: + if common.verbose: + print( + "type, value-->", dtype, getattr(self.array.attrs, dtype) + ) + np.testing.assert_array_equal( + getattr(self.array.attrs, dtype), + np.array([[1, 2], [2, 3]], dtype=dtype), + ) + + def test02a_setFloatAttributes(self): + """Checking setting Float (double) attributes.""" + + # Set some attrs + self.array.attrs.pq = 1.0 + self.array.attrs.qr = 2.0 + self.array.attrs.rs = 3.0 + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertIsInstance(self.root.anarray.attrs.pq, np.float64) + self.assertIsInstance(self.root.anarray.attrs.qr, np.float64) + self.assertIsInstance(self.root.anarray.attrs.rs, np.float64) + self.assertEqual(self.root.anarray.attrs.pq, 1.0) + self.assertEqual(self.root.anarray.attrs.qr, 2.0) + self.assertEqual(self.root.anarray.attrs.rs, 3.0) + + def test02b_setFloatAttributes(self): + """Checking setting Float attributes (scalar, NumPy case)""" + + checktypes = ["float32", "float64"] + + for dtype in checktypes: + setattr(self.array.attrs, dtype, np.array(1.1, dtype=dtype)) + + # Check the results + if common.verbose: + for dtype in checktypes: + print( + "type, value-->", dtype, getattr(self.array.attrs, dtype) + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + for dtype in checktypes: + # assert getattr(self.array.attrs, dtype) == 1.1 + # In order to make Float32 tests pass. This is legal, not a trick. + np.testing.assert_almost_equal( + getattr(self.array.attrs, dtype), 1.1 + ) + + def test02c_setFloatAttributes(self): + """Checking setting Float attributes (unidimensional NumPy case)""" + + checktypes = ["float32", "float64"] + + for dtype in checktypes: + setattr(self.array.attrs, dtype, np.array([1.1, 2.1], dtype=dtype)) + + # Check the results + if common.verbose: + for dtype in checktypes: + print( + "type, value-->", dtype, getattr(self.array.attrs, dtype) + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + for dtype in checktypes: + np.testing.assert_array_equal( + getattr(self.array.attrs, dtype), + np.array([1.1, 2.1], dtype=dtype), + ) + + def test02d_setFloatAttributes(self): + """Checking setting Float attributes (unidimensional, + non-contiguous)""" + + checktypes = ["float32", "float64"] + + for dtype in checktypes: + arr = np.array([1.1, 2.1, 3.1, 4.1], dtype=dtype)[1::2] + setattr(self.array.attrs, dtype, arr) + + # Check the results + if common.verbose: + for dtype in checktypes: + print( + "type, value-->", dtype, getattr(self.array.attrs, dtype) + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + for dtype in checktypes: + arr = np.array([1.1, 2.1, 3.1, 4.1], dtype=dtype)[1::2] + np.testing.assert_array_equal( + getattr(self.array.attrs, dtype), arr + ) + + def test02e_setFloatAttributes(self): + """Checking setting Int attributes (bidimensional NumPy case)""" + + checktypes = ["float32", "float64"] + + for dtype in checktypes: + setattr( + self.array.attrs, + dtype, + np.array([[1.1, 2.1], [2.1, 3.1]], dtype=dtype), + ) + + # Check the results + if common.verbose: + for dtype in checktypes: + print( + "type, value-->", dtype, getattr(self.array.attrs, dtype) + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + for dtype in checktypes: + np.testing.assert_array_equal( + getattr(self.array.attrs, dtype), + np.array([[1.1, 2.1], [2.1, 3.1]], dtype=dtype), + ) + + def test03_setObjectAttributes(self): + """Checking setting Object attributes.""" + + # Set some attrs + self.array.attrs.pq = [1.0, 2] + self.array.attrs.qr = (1, 2) + self.array.attrs.rs = {"ddf": 32.1, "dsd": 1} + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertEqual(self.root.anarray.attrs.pq, [1.0, 2]) + self.assertEqual(self.root.anarray.attrs.qr, (1, 2)) + self.assertEqual(self.root.anarray.attrs.rs, {"ddf": 32.1, "dsd": 1}) + + def test04a_setStringAttributes(self): + """Checking setting string attributes (scalar case)""" + + self.array.attrs.pq = "foo" + self.array.attrs.qr = "bar" + self.array.attrs.rs = "baz" + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertIsInstance(self.root.anarray.attrs.pq, np.str_) + self.assertIsInstance(self.root.anarray.attrs.qr, np.str_) + self.assertIsInstance(self.root.anarray.attrs.rs, np.str_) + self.assertEqual(self.root.anarray.attrs.pq, "foo") + self.assertEqual(self.root.anarray.attrs.qr, "bar") + self.assertEqual(self.root.anarray.attrs.rs, "baz") + + def test04b_setStringAttributes(self): + """Checking setting string attributes (unidimensional 1-elem case)""" + + self.array.attrs.pq = np.array(["foo"]) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + np.testing.assert_array_equal( + self.root.anarray.attrs.pq, np.array(["foo"]) + ) + + def test04c_setStringAttributes(self): + """Checking setting string attributes (empty unidimensional + 1-elem case)""" + + self.array.attrs.pq = np.array([""]) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + if common.verbose: + print("pq -->", self.array.attrs.pq) + + np.testing.assert_array_equal( + self.root.anarray.attrs.pq, np.array([""]) + ) + + def test04d_setStringAttributes(self): + """Checking setting string attributes (unidimensional 2-elem case)""" + + self.array.attrs.pq = np.array(["foo", "bar3"]) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + np.testing.assert_array_equal( + self.root.anarray.attrs.pq, np.array(["foo", "bar3"]) + ) + + def test04e_setStringAttributes(self): + """Checking setting string attributes (empty unidimensional + 2-elem case)""" + + self.array.attrs.pq = np.array(["", ""]) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + np.testing.assert_array_equal( + self.root.anarray.attrs.pq, np.array(["", ""]) + ) + + def test04f_setStringAttributes(self): + """Checking setting string attributes (bidimensional 4-elem case)""" + + self.array.attrs.pq = np.array([["foo", "foo2"], ["foo3", "foo4"]]) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + np.testing.assert_array_equal( + self.root.anarray.attrs.pq, + np.array([["foo", "foo2"], ["foo3", "foo4"]]), + ) + + def test05a_setComplexAttributes(self): + """Checking setting Complex (python) attributes.""" + + # Set some attrs + self.array.attrs.pq = 1.0 + 2j + self.array.attrs.qr = 2.0 + 3j + self.array.attrs.rs = 3.0 + 4j + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertIsInstance(self.root.anarray.attrs.pq, np.complex128) + self.assertIsInstance(self.root.anarray.attrs.qr, np.complex128) + self.assertIsInstance(self.root.anarray.attrs.rs, np.complex128) + self.assertEqual(self.root.anarray.attrs.pq, 1.0 + 2j) + self.assertEqual(self.root.anarray.attrs.qr, 2.0 + 3j) + self.assertEqual(self.root.anarray.attrs.rs, 3.0 + 4j) + + def test05b_setComplexAttributes(self): + """Checking setting Complex attributes (scalar, NumPy case)""" + + checktypes = ["complex64", "complex128"] + + for dtype in checktypes: + setattr(self.array.attrs, dtype, np.array(1.1 + 2j, dtype=dtype)) + + # Check the results + if common.verbose: + for dtype in checktypes: + print( + "type, value-->", dtype, getattr(self.array.attrs, dtype) + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + for dtype in checktypes: + # assert getattr(self.array.attrs, dtype) == 1.1 + 2j + # In order to make Complex32 tests pass. + np.testing.assert_almost_equal( + getattr(self.array.attrs, dtype), 1.1 + 2j + ) + + def test05c_setComplexAttributes(self): + """Checking setting Complex attributes (unidimensional NumPy case)""" + + checktypes = ["complex64", "complex128"] + + for dtype in checktypes: + setattr(self.array.attrs, dtype, np.array([1.1, 2.1], dtype=dtype)) + + # Check the results + if common.verbose: + for dtype in checktypes: + print( + "type, value-->", dtype, getattr(self.array.attrs, dtype) + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + for dtype in checktypes: + np.testing.assert_array_equal( + getattr(self.array.attrs, dtype), + np.array([1.1, 2.1], dtype=dtype), + ) + + def test05d_setComplexAttributes(self): + """Checking setting Int attributes (bidimensional NumPy case)""" + + checktypes = ["complex64", "complex128"] + + for dtype in checktypes: + setattr( + self.array.attrs, + dtype, + np.array([[1.1, 2.1], [2.1, 3.1]], dtype=dtype), + ) + + # Check the results + if common.verbose: + for dtype in checktypes: + print( + "type, value-->", dtype, getattr(self.array.attrs, dtype) + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + for dtype in checktypes: + np.testing.assert_array_equal( + getattr(self.array.attrs, dtype), + np.array([[1.1, 2.1], [2.1, 3.1]], dtype=dtype), + ) + + def test06a_setUnicodeAttributes(self): + """Checking setting unicode attributes (scalar case)""" + + self.array.attrs.pq = "para\u0140lel" + self.array.attrs.qr = "" # check #213 or gh-64 + self.array.attrs.rs = "baz" + + # Check the results + if common.verbose: + if sys.platform != "win32": + # It seems that Windows cannot print this + print("pq -->", repr(self.array.attrs.pq)) + # XXX: try to use repr instead + # print("pq -->", repr(self.array.attrs.pq)) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertIsInstance(self.array.attrs.pq, np.str_) + self.assertIsInstance(self.array.attrs.qr, np.str_) + self.assertIsInstance(self.array.attrs.rs, np.str_) + self.assertEqual(self.array.attrs.pq, "para\u0140lel") + self.assertEqual(self.array.attrs.qr, "") + self.assertEqual(self.array.attrs.rs, "baz") + + def test06b_setUnicodeAttributes(self): + """Checking setting unicode attributes (unidimensional 1-elem case)""" + + self.array.attrs.pq = np.array(["para\u0140lel"]) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + np.testing.assert_array_equal( + self.array.attrs.pq, np.array(["para\u0140lel"]) + ) + + def test06c_setUnicodeAttributes(self): + """Checking setting unicode attributes (empty unidimensional + 1-elem case)""" + + # The next raises a `TypeError` when unpickled. See: + # http://projects.scipy.org/numpy/ticket/1037 + # self.array.attrs.pq = np.array(['']) + self.array.attrs.pq = np.array([""], dtype="U1") + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + if common.verbose: + print("pq -->", repr(self.array.attrs.pq)) + + np.testing.assert_array_equal( + self.array.attrs.pq, np.array([""], dtype="U1") + ) + + def test06d_setUnicodeAttributes(self): + """Checking setting unicode attributes (unidimensional 2-elem case)""" + + self.array.attrs.pq = np.array(["para\u0140lel", "bar3"]) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + np.testing.assert_array_equal( + self.array.attrs.pq, np.array(["para\u0140lel", "bar3"]) + ) + + def test06e_setUnicodeAttributes(self): + """Checking setting unicode attributes (empty unidimensional + 2-elem case)""" + + self.array.attrs.pq = np.array(["", ""], dtype="U1") + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + np.testing.assert_array_equal( + self.array.attrs.pq, np.array(["", ""], dtype="U1") + ) + + def test06f_setUnicodeAttributes(self): + """Checking setting unicode attributes (bidimensional 4-elem case)""" + + self.array.attrs.pq = np.array( + [["para\u0140lel", "foo2"], ["foo3", "para\u0140lel4"]] + ) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + np.testing.assert_array_equal( + self.array.attrs.pq, + np.array([["para\u0140lel", "foo2"], ["foo3", "para\u0140lel4"]]), + ) + + def test07a_setRecArrayAttributes(self): + """Checking setting RecArray (NumPy) attributes.""" + + dt = np.dtype("i4,f8", align=self.aligned) + # Set some attrs + self.array.attrs.pq = np.zeros(2, dt) + self.array.attrs.qr = np.ones((2, 2), dt) + self.array.attrs.rs = np.array([(1, 2.0)], dt) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertIsInstance(self.array.attrs.pq, np.ndarray) + self.assertIsInstance(self.array.attrs.qr, np.ndarray) + self.assertIsInstance(self.array.attrs.rs, np.ndarray) + np.testing.assert_array_equal(self.array.attrs.pq, np.zeros(2, dt)) + np.testing.assert_array_equal(self.array.attrs.qr, np.ones((2, 2), dt)) + np.testing.assert_array_equal( + self.array.attrs.rs, np.array([(1, 2.0)], dt) + ) + + def test07b_setRecArrayAttributes(self): + """Checking setting nested RecArray (NumPy) attributes.""" + + # Build a nested dtype + dt = np.dtype([("f1", [("f1", "i2"), ("f2", "f8")])]) + # Set some attrs + self.array.attrs.pq = np.zeros(2, dt) + self.array.attrs.qr = np.ones((2, 2), dt) + self.array.attrs.rs = np.array([((1, 2.0),)], dt) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertIsInstance(self.array.attrs.pq, np.ndarray) + self.assertIsInstance(self.array.attrs.qr, np.ndarray) + self.assertIsInstance(self.array.attrs.rs, np.ndarray) + np.testing.assert_array_equal(self.array.attrs.pq, np.zeros(2, dt)) + np.testing.assert_array_equal(self.array.attrs.qr, np.ones((2, 2), dt)) + np.testing.assert_array_equal( + self.array.attrs.rs, np.array([((1, 2),)], dt) + ) + + def test07c_setRecArrayAttributes(self): + """Checking setting multidim nested RecArray (NumPy) attributes.""" + + # Build a nested dtype + dt = np.dtype([("f1", [("f1", "i2", (2,)), ("f2", "f8")])], align=True) + + # Set some attrs + self.array.attrs.pq = np.zeros(2, dt) + self.array.attrs.qr = np.ones((2, 2), dt) + self.array.attrs.rs = np.array([(([1, 3], 2.0),)], dt) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertIsInstance(self.array.attrs.pq, np.ndarray) + self.assertIsInstance(self.array.attrs.qr, np.ndarray) + self.assertIsInstance(self.array.attrs.rs, np.ndarray) + np.testing.assert_array_equal(self.array.attrs.pq, np.zeros(2, dt)) + np.testing.assert_array_equal(self.array.attrs.qr, np.ones((2, 2), dt)) + np.testing.assert_array_equal( + self.array.attrs.rs, np.array([(([1, 3], 2),)], dt) + ) + + def test08_setRecArrayNotAllowPadding(self): + """Checking setting aligned RecArray (NumPy) attributes with + `allow_aligned` param set to False when reopened.""" + + dt = np.dtype("i4,f8", align=self.aligned) + # Set some attrs + self.array.attrs.pq = np.zeros(2, dt) + self.array.attrs.qr = np.ones((2, 2), dt) + self.array.attrs.rs = np.array([(1, 2.0)], dt) + + # Check the results + if common.verbose: + print("pq -->", self.array.attrs.pq) + print("qr -->", self.array.attrs.qr) + print("rs -->", self.array.attrs.rs) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+", allow_align=False) + self.root = self.h5file.root + self.array = self.h5file.root.anarray + + self.assertIsInstance(self.array.attrs.pq, np.ndarray) + self.assertIsInstance(self.array.attrs.qr, np.ndarray) + self.assertIsInstance(self.array.attrs.rs, np.ndarray) + np.testing.assert_array_equal(self.array.attrs.pq, np.zeros(2, dt)) + np.testing.assert_array_equal(self.array.attrs.qr, np.ones((2, 2), dt)) + np.testing.assert_array_equal( + self.array.attrs.rs, np.array([(1, 2.0)], dt) + ) + + +class NotCloseTypesTestCase(TypesTestCase): + allow_padding = False + aligned = False + close = False + + +class NoCloseAlignedTypesTestCase(TypesTestCase): + allow_padding = True + aligned = True + close = False + + +class CloseNotAlignedPaddedTypesTestCase(TypesTestCase): + allow_padding = False + aligned = False + close = True + + +class CloseTypesTestCase(TypesTestCase): + allow_padding = True + aligned = False + close = True + + +class CloseAlignedTypesTestCase(TypesTestCase): + allow_padding = False + aligned = True + close = True + + +class CloseAlignedPaddedTypesTestCase(TypesTestCase): + allow_padding = True + aligned = True + close = True + + +class NoSysAttrsTestCase(common.TempFileMixin, common.PyTablesTestCase): + open_kwargs = dict(pytables_sys_attrs=False) + + def setUp(self): + super().setUp() + self.root = self.h5file.root + + # Create a table object + self.table = self.h5file.create_table( + self.root, "atable", Record, "Table title" + ) + # Create an array object + self.array = self.h5file.create_array( + self.root, "anarray", [1], "Array title" + ) + # Create a group object + self.group = self.h5file.create_group( + self.root, "agroup", "Group title" + ) + + def test00_listAttributes(self): + """Checking listing attributes (no system attrs version).""" + + # With a Group object + self.group._v_attrs.pq = "1" + self.group._v_attrs.qr = "2" + self.group._v_attrs.rs = "3" + if common.verbose: + print("Attribute list:", self.group._v_attrs._f_list()) + + # Now, try with a Table object + self.table.attrs.a = "1" + self.table.attrs.c = "2" + self.table.attrs.b = "3" + if common.verbose: + print("Attribute list:", self.table.attrs._f_list()) + + # Finally, try with an Array object + self.array.attrs.k = "1" + self.array.attrs.j = "2" + self.array.attrs.i = "3" + if common.verbose: + print("Attribute list:", self.array.attrs._f_list()) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="r+") + self.root = self.h5file.root + + agroup = self.root.agroup + self.assertEqual(agroup._v_attrs._f_list("user"), ["pq", "qr", "rs"]) + self.assertEqual(agroup._v_attrs._f_list("sys"), []) + self.assertEqual(agroup._v_attrs._f_list("all"), ["pq", "qr", "rs"]) + + atable = self.root.atable + self.assertEqual(atable.attrs._f_list(), ["a", "b", "c"]) + self.assertEqual(atable.attrs._f_list("sys"), []) + self.assertEqual(atable.attrs._f_list("all"), ["a", "b", "c"]) + + anarray = self.root.anarray + self.assertEqual(anarray.attrs._f_list(), ["i", "j", "k"]) + self.assertEqual(anarray.attrs._f_list("sys"), []) + self.assertEqual(anarray.attrs._f_list("all"), ["i", "j", "k"]) + + +class NoSysAttrsNotClose(NoSysAttrsTestCase): + close = False + + +class NoSysAttrsClose(NoSysAttrsTestCase): + close = True + + +class CompatibilityTestCase(common.TestFileMixin, common.PyTablesTestCase): + h5fname = common.test_filename("issue_368.h5") + + @common.unittest.skipIf( + Version(np.__version__) < Version("1.9.0"), "requires numpy >= 1.9" + ) + def test_pickled_unicode_attrs(self): + # See also gh-368 and https://github.com/numpy/numpy/issues/4879. + # + # This is a compatibility test. In PyTables < 3.0 unicode + # attributes were stored as pickled unicode strings. + # In PyTables >= 3.0 unicode strings are stored as encoded utf-8 + # strings (the utf-8 marker is set at HDF5 level). + # + # In any case PyTables (>= 3.0) should be able to handle correctly + # also data files generated with older versions of PyTables. + # Unfortunately a bug in numpy < 1.9 + # (https://github.com/numpy/numpy/issues/4879) makes it impossible + # unpickle numpy arrays with dtype "U" resulting in an incorrect + # behaviour of PyTables. + + self.assertEqual( + self.h5file.get_node_attr("/", "py2_pickled_unicode"), "abc" + ) + + +class PicklePy2UnpicklePy3TestCase( + common.TestFileMixin, common.PyTablesTestCase +): + h5fname = common.test_filename("issue_560.h5") + + def test_pickled_datetime_object(self): + # See also gh-560 + # + # Objects (classes) that are pickled using python 2 may contain + # non-ascii characters in the pickled string. This will cause + # a UnicodeDecodeError when unpickling on python 3. + # Python 3.4 adds encoding='bytes' to fix this + # http://bugs.python.org/issue6784 + # Objects pickled in the testfile have non-ascii chars in the + # picklestring and will throw UnicodeDecodeError when unpickled + # on python 3. + + # datetime will be unpickled with encoding='bytes' + self.assertIsInstance( + self.h5file.get_node_attr("/", "py2_pickled_datetime"), + datetime.datetime, + ) + # dict will be unpickled with encoding='latin1' + d = self.h5file.get_node_attr("/", "py2_pickled_dict") + self.assertIsInstance(d, dict) + self.assertEqual(d["s"], "just a string") + + +class SegFaultPythonTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00_segfault(self): + """Checking workaround for Python unpickle problem (see #253).""" + + self.h5file.root._v_attrs.trouble1 = "0" + self.assertEqual(self.h5file.root._v_attrs.trouble1, "0") + self.h5file.root._v_attrs.trouble2 = "0." + self.assertEqual(self.h5file.root._v_attrs.trouble2, "0.") + # Problem happens after reopening + self._reopen() + self.assertEqual(self.h5file.root._v_attrs.trouble1, "0") + self.assertEqual(self.h5file.root._v_attrs.trouble2, "0.") + if common.verbose: + print("Great! '0' and '0.' values can be safely retrieved.") + + +class EmbeddedNullsTestCase(common.TempFileMixin, common.PyTablesTestCase): + # See laso gh-371 (https://github.com/PyTables/PyTables/issues/371) + + def test_unicode(self): + value = "string with a null byte \x00 in it" + + self.h5file.root._v_attrs.name = value + self.assertEqual(self.h5file.root._v_attrs.name, value) + + self._reopen() + + self.assertEqual(self.h5file.root._v_attrs.name, value) + + def test_bytes(self): + value = b"string with a null byte \x00 in it" + + self.h5file.root._v_attrs.name = value + self.assertEqual(self.h5file.root._v_attrs.name, value) + + self._reopen() + + self.assertEqual(self.h5file.root._v_attrs.name, value) + + +class VlenStrAttrTestCase(common.PyTablesTestCase): + def setUp(self): + super().setUp() + self.h5fname = common.test_filename("vlstr_attr.h5") + self.h5file = tb.open_file(self.h5fname) + + def tearDown(self): + self.h5file.close() + super().tearDown() + + def test01_vlen_str_scalar(self): + """Checking file with variable length string attributes.""" + + attr = "vlen_str_scalar" + self.assertEqual( + self.h5file.get_node_attr("/", attr), attr.encode("ascii") + ) + + def test02_vlen_str_array(self): + """Checking file with variable length string attributes (1d).""" + + attr = "vlen_str_array" + v = self.h5file.get_node_attr("/", attr) + self.assertEqual(v.ndim, 1) + for idx, item in enumerate(v): + value = "%s_%d" % (attr, idx) + self.assertEqual(item, value.encode("ascii")) + + def test03_vlen_str_matrix(self): + """Checking file with variable length string attributes (2d).""" + + attr = "vlen_str_matrix" + m = self.h5file.get_node_attr("/", attr) + self.assertEqual(m.ndim, 2) + for row, rowdata in enumerate(m): + for col, item in enumerate(rowdata): + value = "%s_%d%d" % (attr, row, col) + self.assertEqual(item, value.encode("ascii")) + + +class UnsupportedAttrTypeTestCase( + common.TestFileMixin, common.PyTablesTestCase +): + h5fname = common.test_filename("attr-u16.h5") + + def test00_unsupportedType(self): + """Checking file with unsupported type.""" + + self.assertWarns(tb.exceptions.DataTypeWarning, repr, self.h5file) + + +# Test for specific system attributes +class SpecificAttrsTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00_earray(self): + """Testing EArray specific attrs (create).""" + + ea = self.h5file.create_earray("/", "ea", tb.Int32Atom(), (2, 0, 4)) + if common.verbose: + print("EXTDIM-->", ea.attrs.EXTDIM) + self.assertEqual(ea.attrs.EXTDIM, 1) + + def test01_earray(self): + """Testing EArray specific attrs (open).""" + + ea = self.h5file.create_earray("/", "ea", tb.Int32Atom(), (0, 1, 4)) + self._reopen("r") + ea = self.h5file.root.ea + if common.verbose: + print("EXTDIM-->", ea.attrs.EXTDIM) + self.assertEqual(ea.attrs.EXTDIM, 0) + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + + for i in range(niter): + theSuite.addTest(common.make_suite(NotCloseCreate)) + theSuite.addTest(common.make_suite(CloseCreate)) + theSuite.addTest(common.make_suite(NoCacheNotCloseCreate)) + theSuite.addTest(common.make_suite(NoCacheCloseCreate)) + theSuite.addTest(common.make_suite(DictCacheNotCloseCreate)) + theSuite.addTest(common.make_suite(DictCacheCloseCreate)) + theSuite.addTest(common.make_suite(NotCloseTypesTestCase)) + theSuite.addTest(common.make_suite(CloseTypesTestCase)) + theSuite.addTest(common.make_suite(CloseNotAlignedPaddedTypesTestCase)) + theSuite.addTest(common.make_suite(NoCloseAlignedTypesTestCase)) + theSuite.addTest(common.make_suite(CloseAlignedTypesTestCase)) + theSuite.addTest(common.make_suite(CloseAlignedPaddedTypesTestCase)) + theSuite.addTest(common.make_suite(NoSysAttrsNotClose)) + theSuite.addTest(common.make_suite(NoSysAttrsClose)) + theSuite.addTest(common.make_suite(CompatibilityTestCase)) + theSuite.addTest(common.make_suite(PicklePy2UnpicklePy3TestCase)) + theSuite.addTest(common.make_suite(SegFaultPythonTestCase)) + theSuite.addTest(common.make_suite(EmbeddedNullsTestCase)) + theSuite.addTest(common.make_suite(VlenStrAttrTestCase)) + theSuite.addTest(common.make_suite(UnsupportedAttrTypeTestCase)) + theSuite.addTest(common.make_suite(SpecificAttrsTestCase)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_aux.py b/venv/Lib/site-packages/tables/tests/test_aux.py new file mode 100644 index 0000000..542e65b --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_aux.py @@ -0,0 +1,35 @@ +import unittest + +import numpy as np + +import tables as tb +from tables.tests import common + + +class TestAuxiliaryFunctions(unittest.TestCase): + def test_keysort(self): + N = 1000 + rnd = np.random.randint(N, size=N) + for dtype1 in ("S6", "b1", "i1", "i8", "u4", "u8", "f4", "f8"): + for dtype2 in ("u4", "i8"): + a = np.array(rnd, dtype1) + b = np.array(rnd, dtype2) + + c = a.copy() + d = c.argsort() + e = c[d] + f = b[d] + + tb.indexesextension.keysort(a, b) + self.assertTrue((a == e).all()) + self.assertTrue((b == f).all()) + + +def suite(): + theSuite = unittest.TestSuite() + theSuite.addTest(common.make_suite(TestAuxiliaryFunctions)) + return theSuite + + +if __name__ == "__main__": + unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_backcompat.py b/venv/Lib/site-packages/tables/tests/test_backcompat.py new file mode 100644 index 0000000..3adf16a --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_backcompat.py @@ -0,0 +1,228 @@ +import shutil +import tempfile +import warnings +from pathlib import Path + +import numpy as np + +import tables as tb +from tables.tests import common + + +# Check read Tables from pytables version 0.8 +class BackCompatTablesTestCase(common.PyTablesTestCase): + def test01_readTable(self): + """Checking backward compatibility of old formats of tables.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_readTable..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + h5file = tb.open_file(common.test_filename(self.h5fname), "r") + + try: + table = h5file.get_node("/tuple0") + + # Read the 100 records + result = [rec["var2"] for rec in table] + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last record in table ==>", table[-1]) + print("Total selected records in table ==> ", len(result)) + + self.assertEqual(len(result), 100) + finally: + h5file.close() + + +@common.unittest.skipIf(not common.lzo_avail, "lzo not available") +class Table2_1LZO(BackCompatTablesTestCase): + # pytables 0.8.x versions and after + h5fname = "Table2_1_lzo_nrv2e_shuffle.h5" + + +@common.unittest.skipIf(not common.lzo_avail, "lzo not available") +class Tables_LZO1(BackCompatTablesTestCase): + h5fname = "Tables_lzo1.h5" # files compressed with LZO1 + + +@common.unittest.skipIf(not common.lzo_avail, "lzo not available") +class Tables_LZO1_shuffle(BackCompatTablesTestCase): + # files compressed with LZO1 and shuffle + h5fname = "Tables_lzo1_shuffle.h5" + + +@common.unittest.skipIf(not common.lzo_avail, "lzo not available") +class Tables_LZO2(BackCompatTablesTestCase): + h5fname = "Tables_lzo2.h5" # files compressed with LZO2 + + +@common.unittest.skipIf(not common.lzo_avail, "lzo not available") +class Tables_LZO2_shuffle(BackCompatTablesTestCase): + # files compressed with LZO2 and shuffle + h5fname = "Tables_lzo2_shuffle.h5" + + +# Check read attributes from PyTables >= 1.0 properly +class BackCompatAttrsTestCase(common.TestFileMixin, common.PyTablesTestCase): + FILENAME = "zerodim-attrs-%s.h5" + + def setUp(self): + self.h5fname = common.test_filename(self.FILENAME % self.format) + super().setUp() + + def test01_readAttr(self): + """Checking backward compatibility of old formats for attributes.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_readAttr..." % self.__class__.__name__) + + # Read old formats + a = self.h5file.get_node("/a") + scalar = np.array(1, dtype="int32") + vector = np.array([1], dtype="int32") + if self.format == "1.3": + self.assertTrue(common.allequal(a.attrs.arrdim1, vector)) + self.assertTrue(common.allequal(a.attrs.arrscalar, scalar)) + self.assertEqual(a.attrs.pythonscalar, 1) + elif self.format == "1.4": + self.assertTrue(common.allequal(a.attrs.arrdim1, vector)) + self.assertTrue(common.allequal(a.attrs.arrscalar, scalar)) + self.assertTrue(common.allequal(a.attrs.pythonscalar, scalar)) + + +class Attrs_1_3(BackCompatAttrsTestCase): + format = "1.3" # pytables 1.0.x versions and earlier + + +class Attrs_1_4(BackCompatAttrsTestCase): + format = "1.4" # pytables 1.1.x versions and later + + +class VLArrayTestCase(common.TestFileMixin, common.PyTablesTestCase): + h5fname = common.test_filename("flavored_vlarrays-format1.6.h5") + + def test01_backCompat(self): + """Checking backward compatibility with old flavors of VLArray.""" + + # Check that we can read the contents without problems (nor warnings!) + vlarray1 = self.h5file.root.vlarray1 + self.assertEqual(vlarray1.flavor, "numeric") + vlarray2 = self.h5file.root.vlarray2 + self.assertEqual(vlarray2.flavor, "python") + self.assertEqual(vlarray2[1], [b"5", b"6", b"77"]) + + +# Make sure that 1.x files with TimeXX types continue to be readable +# and that its byteorder is correctly retrieved. +class TimeTestCase(common.TestFileMixin, common.PyTablesTestCase): + # Open a PYTABLES_FORMAT_VERSION=1.x file + h5fname = common.test_filename("time-table-vlarray-1_x.h5") + + def test00_table(self): + """Checking backward compatibility with old TimeXX types (tables).""" + + # Check that we can read the contents without problems (nor warnings!) + table = self.h5file.root.table + self.assertEqual(table.byteorder, "little") + + def test01_vlarray(self): + """Checking backward compatibility with old TimeXX types (vlarrays).""" + + # Check that we can read the contents without problems (nor warnings!) + vlarray4 = self.h5file.root.vlarray4 + self.assertEqual(vlarray4.byteorder, "little") + vlarray8 = self.h5file.root.vlarray4 + self.assertEqual(vlarray8.byteorder, "little") + + +class OldFlavorsTestCase01(common.PyTablesTestCase): + close = False + + # numeric + def test01_open(self): + """Checking opening of (X)Array (old 'numeric' flavor)""" + + # Open the HDF5 with old numeric flavor + h5fname = common.test_filename("oldflavor_numeric.h5") + with tb.open_file(h5fname) as h5file: + + # Assert other properties in array + self.assertEqual(h5file.root.array1.flavor, "numeric") + self.assertEqual(h5file.root.array2.flavor, "python") + self.assertEqual(h5file.root.carray1.flavor, "numeric") + self.assertEqual(h5file.root.carray2.flavor, "python") + self.assertEqual(h5file.root.vlarray1.flavor, "numeric") + self.assertEqual(h5file.root.vlarray2.flavor, "python") + + def test02_copy(self): + """Checking (X)Array.copy() method ('numetic' flavor)""" + + srcfile = common.test_filename("oldflavor_numeric.h5") + tmpfile = tempfile.mktemp(".h5") + shutil.copy(srcfile, tmpfile) + try: + # Open the HDF5 with old numeric flavor + with tb.open_file(tmpfile, "r+") as h5file: + # Copy to another location + self.assertWarns( + tb.exceptions.FlavorWarning, + h5file.root.array1.copy, + "/", + "array1copy", + ) + h5file.root.array2.copy("/", "array2copy") + h5file.root.carray1.copy("/", "carray1copy") + h5file.root.carray2.copy("/", "carray2copy") + h5file.root.vlarray1.copy("/", "vlarray1copy") + h5file.root.vlarray2.copy("/", "vlarray2copy") + + if self.close: + h5file.close() + h5file = tb.open_file(tmpfile) + else: + h5file.flush() + + # Assert other properties in array + self.assertEqual(h5file.root.array1copy.flavor, "numeric") + self.assertEqual(h5file.root.array2copy.flavor, "python") + self.assertEqual(h5file.root.carray1copy.flavor, "numeric") + self.assertEqual(h5file.root.carray2copy.flavor, "python") + self.assertEqual(h5file.root.vlarray1copy.flavor, "numeric") + self.assertEqual(h5file.root.vlarray2copy.flavor, "python") + finally: + Path(tmpfile).unlink() + + +class OldFlavorsTestCase02(common.PyTablesTestCase): + close = True + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + + for n in range(niter): + theSuite.addTest(common.make_suite(VLArrayTestCase)) + theSuite.addTest(common.make_suite(TimeTestCase)) + theSuite.addTest(common.make_suite(OldFlavorsTestCase01)) + theSuite.addTest(common.make_suite(OldFlavorsTestCase02)) + theSuite.addTest(common.make_suite(Table2_1LZO)) + theSuite.addTest(common.make_suite(Tables_LZO1)) + theSuite.addTest(common.make_suite(Tables_LZO1_shuffle)) + theSuite.addTest(common.make_suite(Tables_LZO2)) + theSuite.addTest(common.make_suite(Tables_LZO2_shuffle)) + + return theSuite + + +if __name__ == "__main__": + import sys + + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_basics.py b/venv/Lib/site-packages/tables/tests/test_basics.py new file mode 100644 index 0000000..ed81bd5 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_basics.py @@ -0,0 +1,2675 @@ +import os +import sys +import queue +import shutil +import platform +import tempfile +import warnings +import threading +import subprocess +from pathlib import Path + +import tables + +try: + import multiprocessing as mp + + multiprocessing_imported = True +except ImportError: + multiprocessing_imported = False + +import numpy as np + +import tables as tb +from tables.tests import common + + +class OpenFileFailureTestCase(common.PyTablesTestCase): + def setUp(self): + super().setUp() + + self.N = len(tb.file._open_files) + self.open_files = tb.file._open_files + + def test01_open_file(self): + """Checking opening of a non existing file.""" + + h5fname = tempfile.mktemp(".h5") + with self.assertRaises(IOError): + h5file = tb.open_file(h5fname) + h5file.close() + + self.assertEqual(self.N, len(self.open_files)) + + def test02_open_file(self): + """Checking opening of an existing non HDF5 file.""" + + # create a dummy file + h5fname = tempfile.mktemp(".h5") + Path(h5fname).write_text("") + + # Try to open the dummy file + try: + with self.assertRaises(tb.HDF5ExtError): + h5file = tb.open_file(h5fname) + h5file.close() + + self.assertEqual(self.N, len(self.open_files)) + finally: + Path(h5fname).unlink() + + def test03_open_file(self): + """Checking opening of an existing file with invalid mode.""" + + # See gh-318 + + # create a dummy file + h5fname = tempfile.mktemp(".h5") + h5file = tb.open_file(h5fname, "w") + h5file.close() + + try: + # Try to open the dummy file + self.assertRaises(ValueError, tb.open_file, h5fname, "ab") + finally: + Path(h5fname).unlink() + + +class OpenFileTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + self.populateFile() + + def populateFile(self): + root = self.h5file.root + + # Create an array + self.h5file.create_array(root, "array", [1, 2], title="Array example") + self.h5file.create_table( + root, "table", {"var1": tb.IntCol()}, "Table example" + ) + root._v_attrs.testattr = 41 + + # Create another array object + self.h5file.create_array(root, "anarray", [1], "Array title") + self.h5file.create_table( + root, "atable", {"var1": tb.IntCol()}, "Table title" + ) + + # Create a group object + group = self.h5file.create_group(root, "agroup", "Group title") + group._v_attrs.testattr = 42 + + # Create a some objects there + array1 = self.h5file.create_array( + group, "anarray1", [1, 2, 3, 4, 5, 6, 7], "Array title 1" + ) + array1.attrs.testattr = 42 + self.h5file.create_array(group, "anarray2", [2], "Array title 2") + self.h5file.create_table( + group, "atable1", {"var1": tb.IntCol()}, "Table title 1" + ) + ra = np.rec.array([(1, 11, "a")], formats="u1,f4,S1") + self.h5file.create_table(group, "atable2", ra, "Table title 2") + + # Create a lonely group in first level + self.h5file.create_group(root, "agroup2", "Group title 2") + + # Create a new group in the second level + group3 = self.h5file.create_group(group, "agroup3", "Group title 3") + + # Create a new group in the third level + self.h5file.create_group(group3, "agroup4", "Group title 4") + + # Create an array in the root with the same name as one in 'agroup' + self.h5file.create_array( + root, "anarray1", [1, 2], title="Array example" + ) + + def test00_newFile(self): + """Checking creation of a new file.""" + + self.h5file.create_array( + self.h5file.root, "array_new", [1, 2], title="Array example" + ) + + # Get the CLASS attribute of the arr object + class_ = self.h5file.root.array.attrs.CLASS + + self.assertEqual(class_.capitalize(), "Array") + + def test00_newFile_unicode_filename(self): + temp_dir = tempfile.mkdtemp() + try: + h5fname = str(Path(temp_dir) / "test.h5") + with tb.open_file(h5fname, "w") as h5file: + self.assertTrue(h5file, tb.File) + finally: + shutil.rmtree(temp_dir) + + def test00_newFile_numpy_str_filename(self): + temp_dir = tempfile.mkdtemp() + try: + h5fname = np.str_(Path(temp_dir) / "test.h5") + with tb.open_file(h5fname, "w") as h5file: + self.assertTrue(h5file, tb.File) + finally: + shutil.rmtree(temp_dir) + + def test00_newFile_numpy_unicode_filename(self): + temp_dir = tempfile.mkdtemp() + try: + h5fname = np.str_(Path(temp_dir) / "test.h5") + with tb.open_file(h5fname, "w") as h5file: + self.assertTrue(h5file, tb.File) + finally: + shutil.rmtree(temp_dir) + + def test01_openFile(self): + """Checking opening of an existing file.""" + + # Open the old HDF5 file + self._reopen(node_cache_slots=self.node_cache_slots) + + # Get the CLASS attribute of the arr object + title = self.h5file.root.array.get_attr("TITLE") + + self.assertEqual(title, "Array example") + + def test01_open_file_pathlib(self): + """Checking opening of an existing file.""" + self.h5file.close() + h5fname = Path(self.h5fname) + with tables.open_file(h5fname) as h5file: + title = h5file.root.array.get_attr("TITLE") + self.assertEqual(title, "Array example") + + def test02_appendFile(self): + """Checking appending objects to an existing file.""" + + # Append a new array to the existing file + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.h5file.create_array( + self.h5file.root, "array2", [3, 4], title="Title example 2" + ) + + # Open this file in read-only mode + self._reopen(node_cache_slots=self.node_cache_slots) + + # Get the CLASS attribute of the arr object + title = self.h5file.root.array2.get_attr("TITLE") + + self.assertEqual(title, "Title example 2") + + def test02b_appendFile2(self): + """Checking appending objects to an existing file ("a" version)""" + + # Append a new array to the existing file + self._reopen(mode="a", node_cache_slots=self.node_cache_slots) + self.h5file.create_array( + self.h5file.root, "array2", [3, 4], title="Title example 2" + ) + + # Open this file in read-only mode + self._reopen(node_cache_slots=self.node_cache_slots) + + # Get the CLASS attribute of the arr object + title = self.h5file.root.array2.get_attr("TITLE") + + self.assertEqual(title, "Title example 2") + + # Begin to raise errors... + + def test03_appendErrorFile(self): + """Checking appending objects to an existing file in "w" mode.""" + + # Append a new array to the existing file but in write mode + # so, the existing file should be deleted! + self._reopen(mode="w", node_cache_slots=self.node_cache_slots) + self.h5file.create_array( + self.h5file.root, "array2", [3, 4], title="Title example 2" + ) + + # Open this file in read-only mode + self._reopen(node_cache_slots=self.node_cache_slots) + + with self.assertRaises(LookupError): + # Try to get the 'array' object in the old existing file + self.h5file.root.array + + def test04a_openErrorFile(self): + """Checking opening a non-existing file for reading""" + + with self.assertRaises(IOError): + tb.open_file( + "nonexistent.h5", + mode="r", + node_cache_slots=self.node_cache_slots, + ) + + def test04b_alternateRootFile(self): + """Checking alternate root access to the object tree.""" + + # Open the existent HDF5 file + self._reopen( + root_uep="/agroup", node_cache_slots=self.node_cache_slots + ) + + # Get the CLASS attribute of the arr object + if common.verbose: + print("\nFile tree dump:", self.h5file) + title = self.h5file.root.anarray1.get_attr("TITLE") + + # Get the node again, as this can trigger errors in some situations + anarray1 = self.h5file.root.anarray1 + self.assertIsNotNone(anarray1) + + self.assertEqual(title, "Array title 1") + + # This test works well, but HDF5 emits a series of messages that + # may lose the user. It is better to deactivate it. + def notest04c_alternateRootFile(self): + """Checking non-existent alternate root access to the object tree""" + + with self.assertRaises(RuntimeError): + self._reopen( + root_uep="/nonexistent", node_cache_slots=self.node_cache_slots + ) + + def test05a_removeGroupRecursively(self): + """Checking removing a group recursively.""" + + # Delete a group with leafs + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + with self.assertRaises(tb.NodeError): + self.h5file.remove_node(self.h5file.root.agroup) + + # This should work now + self.h5file.remove_node(self.h5file.root, "agroup", recursive=1) + + # Open this file in read-only mode + self._reopen(node_cache_slots=self.node_cache_slots) + + # Try to get the removed object + with self.assertRaises(LookupError): + self.h5file.root.agroup + + # Try to get a child of the removed object + with self.assertRaises(LookupError): + self.h5file.get_node("/agroup/agroup3") + + def test05b_removeGroupRecursively(self): + """Checking removing a group recursively and access to it + immediately.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05b_removeGroupRecursively..." + % self.__class__.__name__ + ) + + # Delete a group with leafs + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + with self.assertRaises(tb.NodeError): + self.h5file.remove_node(self.h5file.root, "agroup") + + # This should work now + self.h5file.remove_node(self.h5file.root, "agroup", recursive=1) + + # Try to get the removed object + with self.assertRaises(LookupError): + self.h5file.root.agroup + + # Try to get a child of the removed object + with self.assertRaises(LookupError): + self.h5file.get_node("/agroup/agroup3") + + def test06_removeNodeWithDel(self): + """Checking removing a node using ``__delattr__()``""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + with self.assertRaises(AttributeError): + # This should fail because there is no *Python attribute* + # called ``agroup``. + del self.h5file.root.agroup + + def test06a_removeGroup(self): + """Checking removing a lonely group from an existing file.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + self.h5file.remove_node(self.h5file.root, "agroup2") + + # Open this file in read-only mode + self._reopen(node_cache_slots=self.node_cache_slots) + + # Try to get the removed object + with self.assertRaises(LookupError): + self.h5file.root.agroup2 + + def test06b_removeLeaf(self): + """Checking removing Leaves from an existing file.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.h5file.remove_node(self.h5file.root, "anarray") + + # Open this file in read-only mode + self._reopen(node_cache_slots=self.node_cache_slots) + + # Try to get the removed object + with self.assertRaises(LookupError): + self.h5file.root.anarray + + def test06c_removeLeaf(self): + """Checking removing Leaves and access it immediately.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.h5file.remove_node(self.h5file.root, "anarray") + + # Try to get the removed object + with self.assertRaises(LookupError): + self.h5file.root.anarray + + def test06d_removeLeaf(self): + """Checking removing a non-existent node""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # Try to get the removed object + with self.assertRaises(LookupError): + self.h5file.remove_node(self.h5file.root, "nonexistent") + + def test06e_removeTable(self): + """Checking removing Tables from an existing file.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.h5file.remove_node(self.h5file.root, "atable") + + # Open this file in read-only mode + self._reopen(node_cache_slots=self.node_cache_slots) + + # Try to get the removed object + with self.assertRaises(LookupError): + self.h5file.root.atable + + def test07_renameLeaf(self): + """Checking renaming a leave and access it after a close/open.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.h5file.rename_node(self.h5file.root.anarray, "anarray2") + + # Open this file in read-only mode + self._reopen(node_cache_slots=self.node_cache_slots) + + # Ensure that the new name exists + array_ = self.h5file.root.anarray2 + self.assertEqual(array_.name, "anarray2") + self.assertEqual(array_._v_pathname, "/anarray2") + self.assertEqual(array_._v_depth, 1) + + # Try to get the previous object with the old name + with self.assertRaises(LookupError): + self.h5file.root.anarray + + def test07b_renameLeaf(self): + """Checking renaming Leaves and accesing them immediately.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.h5file.rename_node(self.h5file.root.anarray, "anarray2") + + # Ensure that the new name exists + array_ = self.h5file.root.anarray2 + self.assertEqual(array_.name, "anarray2") + self.assertEqual(array_._v_pathname, "/anarray2") + self.assertEqual(array_._v_depth, 1) + + # Try to get the previous object with the old name + with self.assertRaises(LookupError): + self.h5file.root.anarray + + def test07c_renameLeaf(self): + """Checking renaming Leaves and modify attributes after that.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.h5file.rename_node(self.h5file.root.anarray, "anarray2") + array_ = self.h5file.root.anarray2 + array_.attrs.TITLE = "hello" + + # Ensure that the new attribute has been written correctly + self.assertEqual(array_.title, "hello") + self.assertEqual(array_.attrs.TITLE, "hello") + + def test07d_renameLeaf(self): + """Checking renaming a Group under a nested group.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.h5file.rename_node(self.h5file.root.agroup.anarray2, "anarray3") + + # Ensure that we can access n attributes in the new group + node = self.h5file.root.agroup.anarray3 + self.assertEqual(node._v_title, "Array title 2") + + def test08_renameToExistingLeaf(self): + """Checking renaming a node to an existing name.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # Try to get the previous object with the old name + with self.assertRaises(tb.NodeError): + self.h5file.rename_node(self.h5file.root.anarray, "array") + + # Now overwrite the destination node. + anarray = self.h5file.root.anarray + self.h5file.rename_node(anarray, "array", overwrite=True) + self.assertNotIn("/anarray", self.h5file) + self.assertIs(self.h5file.root.array, anarray) + + def test08b_renameToNotValidNaturalName(self): + """Checking renaming a node to a non-valid natural name""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + with warnings.catch_warnings(): + warnings.filterwarnings("error", category=tb.NaturalNameWarning) + + # Try to get the previous object with the old name + with self.assertRaises(tb.NaturalNameWarning): + self.h5file.rename_node(self.h5file.root.anarray, "array 2") + + def test09_renameGroup(self): + """Checking renaming a Group and access it after a close/open.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.h5file.rename_node(self.h5file.root.agroup, "agroup3") + + # Open this file in read-only mode + self._reopen(node_cache_slots=self.node_cache_slots) + + # Ensure that the new name exists + group = self.h5file.root.agroup3 + self.assertEqual(group._v_name, "agroup3") + self.assertEqual(group._v_pathname, "/agroup3") + + # The children of this group also must be accessible through the + # new name path + group2 = self.h5file.get_node("/agroup3/agroup3") + self.assertEqual(group2._v_name, "agroup3") + self.assertEqual(group2._v_pathname, "/agroup3/agroup3") + + # Try to get the previous object with the old name + with self.assertRaises(LookupError): + self.h5file.root.agroup + + # Try to get a child with the old pathname + with self.assertRaises(LookupError): + self.h5file.get_node("/agroup/agroup3") + + def test09b_renameGroup(self): + """Checking renaming a Group and access it immediately.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.h5file.rename_node(self.h5file.root.agroup, "agroup3") + + # Ensure that the new name exists + group = self.h5file.root.agroup3 + self.assertEqual(group._v_name, "agroup3") + self.assertEqual(group._v_pathname, "/agroup3") + + # The children of this group also must be accessible through the + # new name path + group2 = self.h5file.get_node("/agroup3/agroup3") + self.assertEqual(group2._v_name, "agroup3") + self.assertEqual(group2._v_pathname, "/agroup3/agroup3") + + # Try to get the previous object with the old name + with self.assertRaises(LookupError): + self.h5file.root.agroup + + # Try to get a child with the old pathname + with self.assertRaises(LookupError): + self.h5file.get_node("/agroup/agroup3") + + def test09c_renameGroup(self): + """Checking renaming a Group and modify attributes afterwards.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.h5file.rename_node(self.h5file.root.agroup, "agroup3") + + # Ensure that we can modify attributes in the new group + group = self.h5file.root.agroup3 + group._v_attrs.TITLE = "Hello" + self.assertEqual(group._v_title, "Hello") + self.assertEqual(group._v_attrs.TITLE, "Hello") + + def test09d_renameGroup(self): + """Checking renaming a Group under a nested group.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + self.h5file.rename_node(self.h5file.root.agroup.agroup3, "agroup4") + + # Ensure that we can access n attributes in the new group + group = self.h5file.root.agroup.agroup4 + self.assertEqual(group._v_title, "Group title 3") + + def test09e_renameGroup(self): + """Checking renaming a Group with nested groups in the LRU cache.""" + # This checks for ticket #126. + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # Load intermediate groups and keep a nested one alive. + g = self.h5file.root.agroup.agroup3.agroup4 + self.assertIsNotNone(g) + self.h5file.rename_node("/", name="agroup", newname="agroup_") + + # see ticket #126 + self.assertNotIn("/agroup_/agroup4", self.h5file) + + self.assertNotIn("/agroup", self.h5file) + for newpath in [ + "/agroup_", + "/agroup_/agroup3", + "/agroup_/agroup3/agroup4", + ]: + self.assertIn(newpath, self.h5file) + self.assertEqual( + newpath, self.h5file.get_node(newpath)._v_pathname + ) + + def test10_moveLeaf(self): + """Checking moving a leave and access it after a close/open.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + newgroup = self.h5file.create_group("/", "newgroup") + self.h5file.move_node(self.h5file.root.anarray, newgroup, "anarray2") + + # Open this file in read-only mode + self._reopen(node_cache_slots=self.node_cache_slots) + + # Ensure that the new name exists + array_ = self.h5file.root.newgroup.anarray2 + self.assertEqual(array_.name, "anarray2") + self.assertEqual(array_._v_pathname, "/newgroup/anarray2") + self.assertEqual(array_._v_depth, 2) + + # Try to get the previous object with the old name + with self.assertRaises(LookupError): + self.h5file.root.anarray + + def test10b_moveLeaf(self): + """Checking moving a leave and access it without a close/open.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + newgroup = self.h5file.create_group("/", "newgroup") + self.h5file.move_node(self.h5file.root.anarray, newgroup, "anarray2") + + # Ensure that the new name exists + array_ = self.h5file.root.newgroup.anarray2 + self.assertEqual(array_.name, "anarray2") + self.assertEqual(array_._v_pathname, "/newgroup/anarray2") + self.assertEqual(array_._v_depth, 2) + + # Try to get the previous object with the old name + with self.assertRaises(LookupError): + self.h5file.root.anarray + + def test10c_moveLeaf(self): + """Checking moving Leaves and modify attributes after that.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + newgroup = self.h5file.create_group("/", "newgroup") + self.h5file.move_node(self.h5file.root.anarray, newgroup, "anarray2") + array_ = self.h5file.root.newgroup.anarray2 + array_.attrs.TITLE = "hello" + + # Ensure that the new attribute has been written correctly + self.assertEqual(array_.title, "hello") + self.assertEqual(array_.attrs.TITLE, "hello") + + def test10d_moveToExistingLeaf(self): + """Checking moving a leaf to an existing name.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # Try to get the previous object with the old name + with self.assertRaises(tb.NodeError): + self.h5file.move_node( + self.h5file.root.anarray, self.h5file.root, "array" + ) + + def test10_2_moveTable(self): + """Checking moving a table and access it after a close/open.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + newgroup = self.h5file.create_group("/", "newgroup") + self.h5file.move_node(self.h5file.root.atable, newgroup, "atable2") + + # Open this file in read-only mode + self._reopen(node_cache_slots=self.node_cache_slots) + + # Ensure that the new name exists + table_ = self.h5file.root.newgroup.atable2 + self.assertEqual(table_.name, "atable2") + self.assertEqual(table_._v_pathname, "/newgroup/atable2") + self.assertEqual(table_._v_depth, 2) + + # Try to get the previous object with the old name + with self.assertRaises(LookupError): + self.h5file.root.atable + + def test10_2b_moveTable(self): + """Checking moving a table and access it without a close/open.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + newgroup = self.h5file.create_group("/", "newgroup") + self.h5file.move_node(self.h5file.root.atable, newgroup, "atable2") + + # Ensure that the new name exists + table_ = self.h5file.root.newgroup.atable2 + self.assertEqual(table_.name, "atable2") + self.assertEqual(table_._v_pathname, "/newgroup/atable2") + self.assertEqual(table_._v_depth, 2) + + # Try to get the previous object with the old name + with self.assertRaises(LookupError): + self.h5file.root.atable + + def test10_2b_bis_moveTable(self): + """Checking moving a table and use cached row without a close/open.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + newgroup = self.h5file.create_group("/", "newgroup") + + # Cache the Row attribute prior to the move + row = self.h5file.root.atable.row + self.h5file.move_node(self.h5file.root.atable, newgroup, "atable2") + + # Ensure that the new name exists + table_ = self.h5file.root.newgroup.atable2 + self.assertEqual(table_.name, "atable2") + self.assertEqual(table_._v_pathname, "/newgroup/atable2") + self.assertEqual(table_._v_depth, 2) + + # Ensure that cache Row attribute has been updated + row = table_.row + self.assertEqual(table_._v_pathname, row.table._v_pathname) + nrows = table_.nrows + + # Add a new row just to make sure that this works + row.append() + table_.flush() + self.assertEqual(table_.nrows, nrows + 1) + + def test10_2c_moveTable(self): + """Checking moving tables and modify attributes after that.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + newgroup = self.h5file.create_group("/", "newgroup") + self.h5file.move_node(self.h5file.root.atable, newgroup, "atable2") + table_ = self.h5file.root.newgroup.atable2 + table_.attrs.TITLE = "hello" + + # Ensure that the new attribute has been written correctly + self.assertEqual(table_.title, "hello") + self.assertEqual(table_.attrs.TITLE, "hello") + + def test10_2d_moveToExistingTable(self): + """Checking moving a table to an existing name.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # Try to get the previous object with the old name + with self.assertRaises(tb.NodeError): + self.h5file.move_node( + self.h5file.root.atable, self.h5file.root, "table" + ) + + def test10_2e_moveToExistingTableOverwrite(self): + """Checking moving a table to an existing name, overwriting it.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + srcNode = self.h5file.root.atable + self.h5file.move_node( + srcNode, self.h5file.root, "table", overwrite=True + ) + dstNode = self.h5file.root.table + + self.assertIs(srcNode, dstNode) + + def test11_moveGroup(self): + """Checking moving a Group and access it after a close/open.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + newgroup = self.h5file.create_group(self.h5file.root, "newgroup") + self.h5file.move_node(self.h5file.root.agroup, newgroup, "agroup3") + + # Open this file in read-only mode + self._reopen(node_cache_slots=self.node_cache_slots) + + # Ensure that the new name exists + group = self.h5file.root.newgroup.agroup3 + self.assertEqual(group._v_name, "agroup3") + self.assertEqual(group._v_pathname, "/newgroup/agroup3") + self.assertEqual(group._v_depth, 2) + + # The children of this group must also be accessible through the + # new name path + group2 = self.h5file.get_node("/newgroup/agroup3/agroup3") + self.assertEqual(group2._v_name, "agroup3") + self.assertEqual(group2._v_pathname, "/newgroup/agroup3/agroup3") + self.assertEqual(group2._v_depth, 3) + + # Try to get the previous object with the old name + with self.assertRaises(LookupError): + self.h5file.root.agroup + + # Try to get a child with the old pathname + with self.assertRaises(LookupError): + self.h5file.get_node("/agroup/agroup3") + + def test11b_moveGroup(self): + """Checking moving a Group and access it immediately.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + newgroup = self.h5file.create_group(self.h5file.root, "newgroup") + self.h5file.move_node(self.h5file.root.agroup, newgroup, "agroup3") + + # Ensure that the new name exists + group = self.h5file.root.newgroup.agroup3 + self.assertEqual(group._v_name, "agroup3") + self.assertEqual(group._v_pathname, "/newgroup/agroup3") + self.assertEqual(group._v_depth, 2) + + # The children of this group must also be accessible through the + # new name path + group2 = self.h5file.get_node("/newgroup/agroup3/agroup3") + self.assertEqual(group2._v_name, "agroup3") + self.assertEqual(group2._v_pathname, "/newgroup/agroup3/agroup3") + self.assertEqual(group2._v_depth, 3) + + # Try to get the previous object with the old name + with self.assertRaises(LookupError): + self.h5file.root.agroup + + # Try to get a child with the old pathname + with self.assertRaises(LookupError): + self.h5file.get_node("/agroup/agroup3") + + def test11c_moveGroup(self): + """Checking moving a Group and modify attributes afterwards.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + newgroup = self.h5file.create_group(self.h5file.root, "newgroup") + self.h5file.move_node(self.h5file.root.agroup, newgroup, "agroup3") + + # Ensure that we can modify attributes in the new group + group = self.h5file.root.newgroup.agroup3 + group._v_attrs.TITLE = "Hello" + group._v_attrs.hola = "Hello" + self.assertEqual(group._v_title, "Hello") + self.assertEqual(group._v_attrs.TITLE, "Hello") + self.assertEqual(group._v_attrs.hola, "Hello") + + def test11d_moveToExistingGroup(self): + """Checking moving a group to an existing name.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # Try to get the previous object with the old name + with self.assertRaises(tb.NodeError): + self.h5file.move_node( + self.h5file.root.agroup, self.h5file.root, "agroup2" + ) + + def test11e_moveToExistingGroupOverwrite(self): + """Checking moving a group to an existing name, overwriting it.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # agroup2 -> agroup + srcNode = self.h5file.root.agroup2 + self.h5file.move_node( + srcNode, self.h5file.root, "agroup", overwrite=True + ) + dstNode = self.h5file.root.agroup + + self.assertIs(srcNode, dstNode) + + def test12a_moveNodeOverItself(self): + """Checking moving a node over itself.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # array -> array + srcNode = self.h5file.root.array + self.h5file.move_node(srcNode, self.h5file.root, "array") + dstNode = self.h5file.root.array + + self.assertIs(srcNode, dstNode) + + def test12b_moveGroupIntoItself(self): + """Checking moving a group into itself.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + with self.assertRaises(tb.NodeError): + # agroup2 -> agroup2/ + self.h5file.move_node( + self.h5file.root.agroup2, self.h5file.root.agroup2 + ) + + def test13a_copyLeaf(self): + """Copying a leaf.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # array => agroup2/ + new_node = self.h5file.copy_node( + self.h5file.root.array, self.h5file.root.agroup2 + ) + dstNode = self.h5file.root.agroup2.array + + self.assertIs(new_node, dstNode) + + def test13b_copyGroup(self): + """Copying a group.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # agroup2 => agroup/ + new_node = self.h5file.copy_node( + self.h5file.root.agroup2, self.h5file.root.agroup + ) + dstNode = self.h5file.root.agroup.agroup2 + + self.assertIs(new_node, dstNode) + + def test13c_copyGroupSelf(self): + """Copying a group into itself.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # agroup2 => agroup2/ + new_node = self.h5file.copy_node( + self.h5file.root.agroup2, self.h5file.root.agroup2 + ) + dstNode = self.h5file.root.agroup2.agroup2 + + self.assertIs(new_node, dstNode) + + def test13d_copyGroupRecursive(self): + """Recursively copying a group.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # agroup => agroup2/ + new_node = self.h5file.copy_node( + self.h5file.root.agroup, self.h5file.root.agroup2, recursive=True + ) + dstNode = self.h5file.root.agroup2.agroup + + self.assertIs(new_node, dstNode) + dstChild1 = dstNode.anarray1 + self.assertIsNotNone(dstChild1) + dstChild2 = dstNode.anarray2 + self.assertIsNotNone(dstChild2) + dstChild3 = dstNode.agroup3 + self.assertIsNotNone(dstChild3) + + def test13e_copyRootRecursive(self): + """Recursively copying the root group into the root of another file.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + h5fname2 = tempfile.mktemp(".h5") + h5file2 = tb.open_file( + h5fname2, mode="w", node_cache_slots=self.node_cache_slots + ) + try: + # h5file.root => h5file2.root + new_node = self.h5file.copy_node( + self.h5file.root, h5file2.root, recursive=True + ) + dstNode = h5file2.root + + self.assertIs(new_node, dstNode) + self.assertIn("/agroup", h5file2) + self.assertIn("/agroup/anarray1", h5file2) + self.assertIn("/agroup/agroup3", h5file2) + + finally: + h5file2.close() + Path(h5fname2).unlink() + + def test13f_copyRootRecursive(self): + """Recursively copying the root group into a group in another file.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + h5fname2 = tempfile.mktemp(".h5") + h5file2 = tb.open_file( + h5fname2, mode="w", node_cache_slots=self.node_cache_slots + ) + try: + h5file2.create_group("/", "agroup2") + + # fileh.root => h5file2.root.agroup2 + new_node = self.h5file.copy_node( + self.h5file.root, h5file2.root.agroup2, recursive=True + ) + dstNode = h5file2.root.agroup2 + + self.assertIs(new_node, dstNode) + self.assertIn("/agroup2/agroup", h5file2) + self.assertIn("/agroup2/agroup/anarray1", h5file2) + self.assertIn("/agroup2/agroup/agroup3", h5file2) + + finally: + h5file2.close() + Path(h5fname2).unlink() + + def test13g_copyRootItself(self): + """Recursively copying the root group into itself.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + agroup2 = self.h5file.root + self.assertIsNotNone(agroup2) + + # h5file.root => h5file.root + self.assertRaises( + IOError, + self.h5file.copy_node, + self.h5file.root, + self.h5file.root, + recursive=True, + ) + + def test14a_copyNodeExisting(self): + """Copying over an existing node.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + with self.assertRaises(tb.NodeError): + # agroup2 => agroup + self.h5file.copy_node(self.h5file.root.agroup2, newname="agroup") + + def test14b_copyNodeExistingOverwrite(self): + """Copying over an existing node, overwriting it.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # agroup2 => agroup + new_node = self.h5file.copy_node( + self.h5file.root.agroup2, newname="agroup", overwrite=True + ) + dstNode = self.h5file.root.agroup + + self.assertIs(new_node, dstNode) + + def test14b2_copyNodeExistingOverwrite(self): + """Copying over an existing node in other file, overwriting it.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + h5fname2 = tempfile.mktemp(".h5") + h5file2 = tb.open_file( + h5fname2, mode="w", node_cache_slots=self.node_cache_slots + ) + + try: + # file1:/anarray1 => h5fname2:/anarray1 + new_node = self.h5file.copy_node( + self.h5file.root.agroup.anarray1, newparent=h5file2.root + ) + # file1:/ => h5fname2:/ + new_node = self.h5file.copy_node( + self.h5file.root, h5file2.root, overwrite=True, recursive=True + ) + dstNode = h5file2.root + + self.assertIs(new_node, dstNode) + finally: + h5file2.close() + Path(h5fname2).unlink() + + def test14c_copyNodeExistingSelf(self): + """Copying over self.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + with self.assertRaises(tb.NodeError): + # agroup => agroup + self.h5file.copy_node(self.h5file.root.agroup, newname="agroup") + + def test14d_copyNodeExistingOverwriteSelf(self): + """Copying over self, trying to overwrite.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + with self.assertRaises(tb.NodeError): + # agroup => agroup + self.h5file.copy_node( + self.h5file.root.agroup, newname="agroup", overwrite=True + ) + + def test14e_copyGroupSelfRecursive(self): + """Recursively copying a group into itself.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + with self.assertRaises(tb.NodeError): + # agroup => agroup/ + self.h5file.copy_node( + self.h5file.root.agroup, + self.h5file.root.agroup, + recursive=True, + ) + + def test15a_oneStepMove(self): + """Moving and renaming a node in a single action.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # anarray1 -> agroup/array + srcNode = self.h5file.root.anarray1 + self.h5file.move_node(srcNode, self.h5file.root.agroup, "array") + dstNode = self.h5file.root.agroup.array + + self.assertIs(srcNode, dstNode) + + def test15b_oneStepCopy(self): + """Copying and renaming a node in a single action.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # anarray1 => agroup/array + new_node = self.h5file.copy_node( + self.h5file.root.anarray1, self.h5file.root.agroup, "array" + ) + dstNode = self.h5file.root.agroup.array + + self.assertIs(new_node, dstNode) + + def test16a_fullCopy(self): + """Copying full data and user attributes.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # agroup => groupcopy + srcNode = self.h5file.root.agroup + new_node = self.h5file.copy_node( + srcNode, newname="groupcopy", recursive=True + ) + dstNode = self.h5file.root.groupcopy + + self.assertIs(new_node, dstNode) + self.assertEqual(srcNode._v_attrs.testattr, dstNode._v_attrs.testattr) + self.assertEqual( + srcNode.anarray1.attrs.testattr, dstNode.anarray1.attrs.testattr + ) + self.assertEqual(srcNode.anarray1.read(), dstNode.anarray1.read()) + + def test16b_partialCopy(self): + """Copying partial data and no user attributes.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + # agroup => groupcopy + srcNode = self.h5file.root.agroup + new_node = self.h5file.copy_node( + srcNode, + newname="groupcopy", + recursive=True, + copyuserattrs=False, + start=0, + stop=5, + step=2, + ) + dstNode = self.h5file.root.groupcopy + + self.assertIs(new_node, dstNode) + self.assertFalse(hasattr(dstNode._v_attrs, "testattr")) + self.assertFalse(hasattr(dstNode.anarray1.attrs, "testattr")) + self.assertEqual( + srcNode.anarray1.read()[0:5:2], dstNode.anarray1.read() + ) + + def test16c_fullCopy(self): + """Copying full data and user attributes (from file to file).""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + + h5fname2 = tempfile.mktemp(".h5") + h5file2 = tb.open_file( + h5fname2, mode="w", node_cache_slots=self.node_cache_slots + ) + + try: + # file1:/ => h5fname2:groupcopy + srcNode = self.h5file.root + new_node = self.h5file.copy_node( + srcNode, h5file2.root, newname="groupcopy", recursive=True + ) + dstNode = h5file2.root.groupcopy + + self.assertIs(new_node, dstNode) + self.assertEqual( + srcNode._v_attrs.testattr, dstNode._v_attrs.testattr + ) + self.assertEqual( + srcNode.agroup.anarray1.attrs.testattr, + dstNode.agroup.anarray1.attrs.testattr, + ) + self.assertEqual( + srcNode.agroup.anarray1.read(), dstNode.agroup.anarray1.read() + ) + finally: + h5file2.close() + Path(h5fname2).unlink() + + def test17a_CopyChunkshape(self): + """Copying dataset with a chunkshape.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + srcTable = self.h5file.root.table + newTable = self.h5file.copy_node( + srcTable, newname="tablecopy", chunkshape=11 + ) + + self.assertEqual(newTable.chunkshape, (11,)) + self.assertNotEqual(srcTable.chunkshape, newTable.chunkshape) + + def test17b_CopyChunkshape(self): + """Copying dataset with a chunkshape with 'keep' value.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + srcTable = self.h5file.root.table + newTable = self.h5file.copy_node( + srcTable, newname="tablecopy", chunkshape="keep" + ) + + self.assertEqual(srcTable.chunkshape, newTable.chunkshape) + + def test17c_CopyChunkshape(self): + """Copying dataset with a chunkshape with 'auto' value.""" + + self._reopen(mode="r+", node_cache_slots=self.node_cache_slots) + srcTable = self.h5file.root.table + newTable = self.h5file.copy_node( + srcTable, newname="tablecopy", chunkshape=11 + ) + newTable2 = self.h5file.copy_node( + newTable, newname="tablecopy2", chunkshape="auto" + ) + + self.assertEqual(srcTable.chunkshape, newTable2.chunkshape) + + def test18_closedRepr(self): + """Representing a closed node as a string.""" + + self._reopen(node_cache_slots=self.node_cache_slots) + + for node in [self.h5file.root.agroup, self.h5file.root.anarray]: + node._f_close() + self.assertIn("closed", str(node)) + self.assertIn("closed", repr(node)) + + def test19_fileno(self): + """Checking that the 'fileno()' method works.""" + + # Open the old HDF5 file + self._reopen(mode="r", node_cache_slots=self.node_cache_slots) + + # Get the file descriptor for this file + fd = self.h5file.fileno() + if common.verbose: + print("Value of fileno():", fd) + self.assertGreaterEqual(fd, 0) + + +class NodeCacheOpenFile(OpenFileTestCase): + node_cache_slots = tb.parameters.NODE_CACHE_SLOTS + open_kwargs = dict(node_cache_slots=node_cache_slots) + + +class NoNodeCacheOpenFile(OpenFileTestCase): + node_cache_slots = 0 + open_kwargs = dict(node_cache_slots=node_cache_slots) + + +class DictNodeCacheOpenFile(OpenFileTestCase): + node_cache_slots = -tb.parameters.NODE_CACHE_SLOTS + open_kwargs = dict(node_cache_slots=node_cache_slots) + + +class CheckFileTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + + # Create a regular (text) file + self.txtfile = tempfile.mktemp(".h5") + self.fileh = open(self.txtfile, "w") + self.fileh.write("Hello!") + self.fileh.close() + + def tearDown(self): + self.fileh.close() + Path(self.txtfile).unlink() + super().tearDown() + + def test00_isHDF5File(self): + """Checking tables.is_hdf5_file function (TRUE case)""" + + # Create a PyTables file (and by so, an HDF5 file) + self.h5file.create_array( + self.h5file.root, "array", [1, 2], title="Title example" + ) + + # For this method to run, it needs a closed file + self.h5file.close() + + # When file has an HDF5 format, always returns 1 + if common.verbose: + print( + "\nisHDF5File(%s) ==> %d" + % (self.h5fname, tb.is_hdf5_file(self.h5fname)) + ) + self.assertEqual(tb.is_hdf5_file(self.h5fname), 1) + + def test01_isHDF5File(self): + """Checking tables.is_hdf5_file function (FALSE case)""" + + version = tb.is_hdf5_file(self.txtfile) + + # When file is not an HDF5 format, always returns 0 or + # negative value + self.assertLessEqual(version, 0) + + def test01x_isHDF5File_nonexistent(self): + """Identifying a nonexistent HDF5 file.""" + self.assertRaises(IOError, tb.is_hdf5_file, "nonexistent") + + @common.unittest.skipUnless( + hasattr(os, "getuid") and os.getuid() != 0, "no UID" + ) + def test01x_isHDF5File_unreadable(self): + """Identifying an unreadable HDF5 file.""" + + self.h5file.close() + Path(self.h5fname).chmod(0) # no permissions at all + self.assertRaises(IOError, tb.is_hdf5_file, self.h5fname) + + def test02_isPyTablesFile(self): + """Checking is_pytables_file function (TRUE case)""" + + # Create a PyTables h5fname + self.h5file.create_array( + self.h5file.root, "array", [1, 2], title="Title example" + ) + + # For this method to run, it needs a closed h5fname + self.h5file.close() + + version = tb.is_pytables_file(self.h5fname) + + # When h5fname has a PyTables format, always returns "1.0" string or + # greater + if common.verbose: + print() + print("\nPyTables format version number ==> %s" % version) + self.assertGreaterEqual(version, "1.0") + + def test03_isPyTablesFile(self): + """Checking is_pytables_file function (FALSE case)""" + + version = tb.is_pytables_file(self.txtfile) + + # When file is not a PyTables format, always returns 0 or + # negative value + if common.verbose: + print() + print("\nPyTables format version number ==> %s" % version) + self.assertIsNone(version) + + def test04_openGenericHDF5File(self): + """Checking opening of a generic HDF5 file.""" + + # Open an existing generic HDF5 file + h5fname = common.test_filename("ex-noattr.h5") + with tb.open_file(h5fname, mode="r") as h5file: + # Check for some objects inside + + # A group + columns = h5file.get_node("/columns", classname="Group") + self.assertEqual(columns._v_name, "columns") + + # An Array + array_ = h5file.get_node(columns, "TDC", classname="Array") + self.assertEqual(array_._v_name, "TDC") + + # The new LRU code defers the appearance of a warning to this point + + # Here comes an Array of H5T_ARRAY type + ui = h5file.get_node(columns, "pressure", classname="Array") + self.assertEqual(ui._v_name, "pressure") + if common.verbose: + print("Array object with type H5T_ARRAY -->", repr(ui)) + print("Array contents -->", ui[:]) + + # A Table + table = h5file.get_node("/detector", "table", classname="Table") + self.assertEqual(table._v_name, "table") + + def test04b_UnImplementedOnLoading(self): + """Checking failure loading resulting in an ``UnImplemented`` node.""" + + # ############## Note for developers ############################## + # This test fails if you have the line: # + # ##return ChildClass(self, childname) # uncomment for debugging # + # uncommented in Group.py! # + # ################################################################# + + h5fname = common.test_filename("smpl_unsupptype.h5") + with tb.open_file(h5fname) as h5file: + with self.assertWarns(UserWarning): + node = h5file.get_node("/CompoundChunked") + self.assertIsInstance(node, tb.UnImplemented) + + def test04c_UnImplementedScalar(self): + """Checking opening of HDF5 files containing scalar dataset of + UnImplemented type.""" + + with tb.open_file(common.test_filename("scalar.h5")) as h5file: + with self.assertWarns(UserWarning): + node = h5file.get_node("/variable length string") + self.assertIsInstance(node, tb.UnImplemented) + + def test05_copyUnimplemented(self): + """Checking that an UnImplemented object cannot be copied.""" + + # Open an existing generic HDF5 file + h5fname = common.test_filename("smpl_unsupptype.h5") + with tb.open_file(h5fname, mode="r") as h5file: + self.assertWarns(UserWarning, h5file.get_node, "/CompoundChunked") + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + ui = h5file.get_node("/CompoundChunked") + self.assertEqual(ui._v_name, "CompoundChunked") + if common.verbose: + print("UnImplement object -->", repr(ui)) + + # Check that it cannot be copied to another file: + self.assertWarns(UserWarning, ui.copy, self.h5file.root, "newui") + + # The next can be used to check the copy of Array objects with H5T_ARRAY + # in the future + def _test05_copyUnimplemented(self): + """Checking that an UnImplemented object cannot be copied.""" + + # Open an existing generic HDF5 file + # We don't need to wrap this in a try clause because + # it has already been tried and the warning will not happen again + h5fname2 = common.test_filename("ex-noattr.h5") + with tb.open_file(h5fname2, mode="r") as h5file2: + # An unsupported object (the deprecated H5T_ARRAY type in + # Array, from pytables 0.8 on) + ui = h5file2.get_node(h5file2.root.columns, "pressure") + self.assertEqual(ui._v_name, "pressure") + if common.verbose: + print("UnImplement object -->", repr(ui)) + + # Check that it cannot be copied to another file + with warnings.catch_warnings(): + # Force the userwarning to issue an error + warnings.filterwarnings("error", category=UserWarning) + with self.assertRaises(UserWarning): + ui.copy(self.h5file.root, "newui") + + +@common.unittest.skipIf( + tb.file._FILE_OPEN_POLICY == "strict", 'FILE_OPEN_POLICY = "strict"' +) +class ThreadingTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + self.h5file.create_carray( + "/", "test_array", tb.Int64Atom(), (200, 300) + ) + self.h5file.close() + + def test(self): + lock = threading.Lock() + + def syncronized_open_file(*args, **kwargs): + with lock: + return tb.open_file(*args, **kwargs) + + def syncronized_close_file(self, *args, **kwargs): + with lock: + return self.close(*args, **kwargs) + + filename = self.h5fname + + def run(filename, q): + try: + f = syncronized_open_file(filename, mode="r") + arr = f.root.test_array[8:12, 18:22] + assert arr.max() == arr.min() == 0 + syncronized_close_file(f) + except Exception: + q.put(sys.exc_info()) + else: + q.put("OK") + + threads = [] + q = queue.Queue() + for i in range(10): + t = threading.Thread(target=run, args=(filename, q)) + t.start() + threads.append(t) + + for i in range(10): + self.assertEqual(q.get(), "OK") + + for t in threads: + t.join() + + +class PythonAttrsTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test interactions of Python attributes and child nodes.""" + + def test00_attrOverChild(self): + """Setting a Python attribute over a child node.""" + + root = self.h5file.root + + # Create ``/test`` and overshadow it with ``root.test``. + child = self.h5file.create_array(root, "test", [1]) + attr = "foobar" + self.assertWarns(tb.NaturalNameWarning, setattr, root, "test", attr) + + self.assertIs(root.test, attr) + self.assertIs(root._f_get_child("test"), child) + + # Now bring ``/test`` again to light. + del root.test + + self.assertIs(root.test, child) + + # Now there is no *attribute* named ``test``. + self.assertRaises(AttributeError, delattr, root, "test") + + def test01_childUnderAttr(self): + """Creating a child node under a Python attribute.""" + + h5file = self.h5file + root = h5file.root + + # Create ``root.test`` and an overshadowed ``/test``. + attr = "foobar" + root.test = attr + self.assertWarns( + tb.NaturalNameWarning, h5file.create_array, root, "test", [1] + ) + child = h5file.get_node("/test") + + self.assertIs(root.test, attr) + self.assertIs(root._f_get_child("test"), child) + + # Now bring ``/test`` again to light. + del root.test + + self.assertIs(root.test, child) + + # Now there is no *attribute* named ``test``. + self.assertRaises(AttributeError, delattr, root, "test") + + def test02_nodeAttrInLeaf(self): + """Assigning a ``Node`` value as an attribute to a ``Leaf``.""" + + h5file = self.h5file + + array1 = h5file.create_array("/", "array1", [1]) + array2 = h5file.create_array("/", "array2", [1]) + + # This may make the garbage collector work a little. + array1.array2 = array2 + array2.array1 = array1 + + # Check the assignments. + self.assertIs(array1.array2, array2) + self.assertIs(array2.array1, array1) + self.assertRaises( + tb.NoSuchNodeError, # ``/array1`` is not a group + h5file.get_node, + "/array1/array2", + ) + self.assertRaises( + tb.NoSuchNodeError, # ``/array2`` is not a group + h5file.get_node, + "/array2/array3", + ) + + def test03_nodeAttrInGroup(self): + """Assigning a ``Node`` value as an attribute to a ``Group``.""" + + h5file = self.h5file + root = h5file.root + + array = h5file.create_array("/", "array", [1]) + + # Assign the array to a pair of attributes, + # one of them overshadowing the original. + root.arrayAlias = array + self.assertWarns(tb.NaturalNameWarning, setattr, root, "array", array) + + # Check the assignments. + self.assertIs(root.arrayAlias, array) + self.assertIs(root.array, array) + self.assertRaises(tb.NoSuchNodeError, h5file.get_node, "/arrayAlias") + self.assertIs(h5file.get_node("/array"), array) + + # Remove the attribute overshadowing the child. + del root.array + + # Now there is no *attribute* named ``array``. + self.assertRaises(AttributeError, delattr, root, "array") + + +class StateTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test that ``File`` and ``Node`` operations check their state (open or + closed, readable or writable) before proceeding.""" + + def test00_fileCopyFileClosed(self): + """Test copying a closed file.""" + + self.h5file.close() + h5cfname = tempfile.mktemp(suffix=".h5") + + try: + self.assertRaises( + tb.ClosedFileError, self.h5file.copy_file, h5cfname + ) + finally: + if Path(h5cfname).is_file(): + Path(h5cfname).unlink() + + def test01_fileCloseClosed(self): + """Test closing an already closed file.""" + + self.h5file.close() + + try: + self.h5file.close() + except tb.ClosedFileError: + self.fail("could not close an already closed file") + + def test02_fileFlushClosed(self): + """Test flushing a closed file.""" + + self.h5file.close() + self.assertRaises(tb.ClosedFileError, self.h5file.flush) + + def test03_fileFlushRO(self): + """Flushing a read-only file.""" + + self._reopen("r") + + try: + self.h5file.flush() + except tb.FileModeError: + self.fail("could not flush a read-only file") + + def test04_fileCreateNodeClosed(self): + """Test creating a node in a closed file.""" + + self.h5file.close() + self.assertRaises( + tb.ClosedFileError, self.h5file.create_group, "/", "test" + ) + + def test05_fileCreateNodeRO(self): + """Test creating a node in a read-only file.""" + + self._reopen("r") + self.assertRaises( + tb.FileModeError, self.h5file.create_group, "/", "test" + ) + + def test06_fileRemoveNodeClosed(self): + """Test removing a node from a closed file.""" + + self.h5file.create_group("/", "test") + self.h5file.close() + self.assertRaises( + tb.ClosedFileError, self.h5file.remove_node, "/", "test" + ) + + def test07_fileRemoveNodeRO(self): + """Test removing a node from a read-only file.""" + + self.h5file.create_group("/", "test") + self._reopen("r") + self.assertRaises( + tb.FileModeError, self.h5file.remove_node, "/", "test" + ) + + def test08_fileMoveNodeClosed(self): + """Test moving a node in a closed file.""" + + self.h5file.create_group("/", "test1") + self.h5file.create_group("/", "test2") + self.h5file.close() + self.assertRaises( + tb.ClosedFileError, self.h5file.move_node, "/test1", "/", "test2" + ) + + def test09_fileMoveNodeRO(self): + """Test moving a node in a read-only file.""" + + self.h5file.create_group("/", "test1") + self.h5file.create_group("/", "test2") + self._reopen("r") + self.assertRaises( + tb.FileModeError, self.h5file.move_node, "/test1", "/", "test2" + ) + + def test10_fileCopyNodeClosed(self): + """Test copying a node in a closed file.""" + + self.h5file.create_group("/", "test1") + self.h5file.create_group("/", "test2") + self.h5file.close() + self.assertRaises( + tb.ClosedFileError, self.h5file.copy_node, "/test1", "/", "test2" + ) + + def test11_fileCopyNodeRO(self): + """Test copying a node in a read-only file.""" + + self.h5file.create_group("/", "test1") + self._reopen("r") + self.assertRaises( + tb.FileModeError, self.h5file.copy_node, "/test1", "/", "test2" + ) + + def test13_fileGetNodeClosed(self): + """Test getting a node from a closed file.""" + + self.h5file.create_group("/", "test") + self.h5file.close() + self.assertRaises(tb.ClosedFileError, self.h5file.get_node, "/test") + + def test14_fileWalkNodesClosed(self): + """Test walking a closed file.""" + + self.h5file.create_group("/", "test1") + self.h5file.create_group("/", "test2") + self.h5file.close() + self.assertRaises(tb.ClosedFileError, next, self.h5file.walk_nodes()) + + def test15_fileAttrClosed(self): + """Test setting and deleting a node attribute in a closed file.""" + + self.h5file.create_group("/", "test") + self.h5file.close() + self.assertRaises( + tb.ClosedFileError, + self.h5file.set_node_attr, + "/test", + "foo", + "bar", + ) + self.assertRaises( + tb.ClosedFileError, self.h5file.del_node_attr, "/test", "foo" + ) + + def test16_fileAttrRO(self): + """Test setting and deleting a node attribute in a read-only file.""" + + self.h5file.create_group("/", "test") + self.h5file.set_node_attr("/test", "foo", "foo") + self._reopen("r") + self.assertRaises( + tb.FileModeError, self.h5file.set_node_attr, "/test", "foo", "bar" + ) + self.assertRaises( + tb.FileModeError, self.h5file.del_node_attr, "/test", "foo" + ) + + def test17_fileUndoClosed(self): + """Test undo operations in a closed file.""" + + self.h5file.enable_undo() + self.h5file.create_group("/", "test2") + self.h5file.close() + self.assertRaises(tb.ClosedFileError, self.h5file.is_undo_enabled) + self.assertRaises(tb.ClosedFileError, self.h5file.get_current_mark) + self.assertRaises(tb.ClosedFileError, self.h5file.undo) + self.assertRaises(tb.ClosedFileError, self.h5file.disable_undo) + + def test18_fileUndoRO(self): + """Test undo operations in a read-only file.""" + + self.h5file.enable_undo() + self.h5file.create_group("/", "test") + self._reopen("r") + self.assertEqual(self.h5file._undoEnabled, False) + # self.assertRaises(FileModeError, self.h5file.undo) + # self.assertRaises(FileModeError, self.h5file.disable_undo) + + def test19a_getNode(self): + """Test getting a child of a closed node.""" + + g1 = self.h5file.create_group("/", "g1") + g2 = self.h5file.create_group("/g1", "g2") + + # Close this *object* so that it should not be used. + g1._f_close() + self.assertRaises(tb.ClosedNodeError, g1._f_get_child, "g2") + + # Getting a node by its closed object is not allowed. + self.assertRaises(tb.ClosedNodeError, self.h5file.get_node, g1) + + # Going through that *node* should reopen it automatically. + try: + g2_ = self.h5file.get_node("/g1/g2") + except tb.ClosedNodeError: + self.fail("closed parent group has not been reopened") + + # Already open nodes should be closed now, but not the new ones. + self.assertIs( + g2._v_isopen, + False, + "open child of closed group has not been closed", + ) + self.assertIs( + g2_._v_isopen, + True, + "open child of closed group has not been closed", + ) + + # And existing closed ones should remain closed, but not the new ones. + g1_ = self.h5file.get_node("/g1") + self.assertIs( + g1._v_isopen, False, "already closed group is not closed anymore" + ) + self.assertIs( + g1_._v_isopen, True, "newly opened group is still closed" + ) + + def test19b_getNode(self): + """Test getting a node that does not start with a slash ('/').""" + + # Create an array in the root + self.h5file.create_array("/", "array", [1, 2], title="Title example") + + # Get the array without specifying a leading slash + self.assertRaises(NameError, self.h5file.get_node, "array") + + def test20_removeNode(self): + """Test removing a closed node.""" + + # This test is a little redundant once we know that ``File.get_node()`` + # will reload a closed node, but anyway... + + group = self.h5file.create_group("/", "group") + array = self.h5file.create_array("/group", "array", [1]) + + # The closed *object* can not be used. + group._f_close() + self.assertRaises(tb.ClosedNodeError, group._f_remove) + self.assertRaises(tb.ClosedNodeError, self.h5file.remove_node, group) + + # Still, the *node* is reloaded when necessary. + try: + self.h5file.remove_node("/group", recursive=True) + except tb.ClosedNodeError: + self.fail("closed node has not been reloaded") + + # Objects of descendent removed nodes + # should have been automatically closed when removed. + self.assertRaises(tb.ClosedNodeError, array._f_remove) + + self.assertNotIn("/group/array", self.h5file) # just in case + self.assertNotIn("/group", self.h5file) # just in case + + def test21_attrsOfNode(self): + """Test manipulating the attributes of a closed node.""" + + node = self.h5file.create_group("/", "test") + nodeAttrs = node._v_attrs + + nodeAttrs.test = attr = "foo" + + node._f_close() + self.assertRaises(tb.ClosedNodeError, getattr, node, "_v_attrs") + # The design of ``AttributeSet`` does not yet allow this test. + # self.assertRaises(ClosedNodeError, getattr, nodeAttrs, 'test') + + self.assertEqual(self.h5file.get_node_attr("/test", "test"), attr) + + def test21b_attrsOfNode(self): + """Test manipulating the attributes of a node in a read-only file.""" + + self.h5file.create_group("/", "test") + self.h5file.set_node_attr("/test", "test", "foo") + + self._reopen("r") + self.assertRaises( + tb.FileModeError, self.h5file.set_node_attr, "/test", "test", "bar" + ) + + def test22_fileClosesNode(self): + """Test node closing because of file closing.""" + + node = self.h5file.create_group("/", "test") + + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, getattr, node, "_v_attrs") + + def test23_reopenFile(self): + """Testing reopening a file and closing it several times.""" + + self.h5file.create_array("/", "test", [1, 2, 3]) + self.h5file.close() + + with tb.open_file(self.h5fname, "r") as h5file1: + if tb.file._FILE_OPEN_POLICY == "strict": + self.assertRaises(ValueError, tb.open_file, self.h5fname, "r") + else: + with tb.open_file(self.h5fname, "r") as h5file2: + if common.verbose: + print("(h5file1) test[1]:", h5file1.root.test[1]) + self.assertEqual(h5file1.root.test[1], 2) + h5file1.close() + + if common.verbose: + print("(h5file2) test[1]:", h5file2.root.test[1]) + self.assertEqual(h5file2.root.test[1], 2) + + +class FlavorTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test that setting, getting and changing the ``flavor`` attribute of a + leaf works as expected.""" + + array_data = np.arange(10) + scalar_data = np.int32(10) + + def _reopen(self, mode="r"): + super()._reopen(mode) + self.array = self.h5file.get_node("/array") + self.scalar = self.h5file.get_node("/scalar") + return True + + def setUp(self): + super().setUp() + self.array = self.h5file.create_array("/", "array", self.array_data) + self.scalar = self.h5file.create_array("/", "scalar", self.scalar_data) + + def test00_invalid(self): + """Setting an invalid flavor.""" + + self.assertRaises(tb.FlavorError, setattr, self.array, "flavor", "foo") + + def test01_readonly(self): + """Setting a flavor in a read-only file.""" + + self._reopen(mode="r") + self.assertRaises( + tb.FileModeError, + setattr, + self.array, + "flavor", + tb.flavor.internal_flavor, + ) + + def test02_change(self): + """Changing the flavor and reading data.""" + + for flavor in tb.flavor.all_flavors: + self.array.flavor = flavor + self.assertEqual(self.array.flavor, flavor) + idata = tb.flavor.array_of_flavor(self.array_data, flavor) + odata = self.array[:] + self.assertTrue(common.allequal(odata, idata, flavor)) + + def test03_store(self): + """Storing a changed flavor.""" + + for flavor in tb.flavor.all_flavors: + self.array.flavor = flavor + self.assertEqual(self.array.flavor, flavor) + self._reopen(mode="r+") + self.assertEqual(self.array.flavor, flavor) + + def test04_missing(self): + """Reading a dataset of a missing flavor.""" + + flavor = self.array.flavor # default is internal + self.array._v_attrs.FLAVOR = "foobar" # breaks flavor + self._reopen(mode="r") + idata = tb.flavor.array_of_flavor(self.array_data, flavor) + with self.assertWarns(tb.FlavorWarning): + odata = self.array.read() + self.assertTrue(common.allequal(odata, idata, flavor)) + + def test05_delete(self): + """Deleting the flavor of a dataset.""" + + self.array.flavor = "python" # non-default + self.assertEqual(self.array.flavor, "python") + self.assertEqual(self.array.attrs.FLAVOR, "python") + del self.array.flavor + self.assertEqual(self.array.flavor, tb.flavor.internal_flavor) + self.assertRaises(AttributeError, getattr, self.array.attrs, "FLAVOR") + + def test06_copyDeleted(self): + """Copying a node with a deleted flavor (see #100).""" + + snames = [node._v_name for node in [self.array, self.scalar]] + dnames = ["%s_copy" % name for name in snames] + for name in snames: + node = self.h5file.get_node("/", name) + del node.flavor + # Check the copied flavors right after copying and after reopening. + for fmode in ["r+", "r"]: + self._reopen(fmode) + for sname, dname in zip(snames, dnames): + if fmode == "r+": + snode = self.h5file.get_node("/", sname) + node = snode.copy("/", dname) + elif fmode == "r": + node = self.h5file.get_node("/", dname) + self.assertEqual( + node.flavor, + tb.flavor.internal_flavor, + "flavor of node ``%s`` is not internal: %r" + % (node._v_pathname, node.flavor), + ) + + def test07_restrict_flavors(self): + # regression test for gh-163 + + all_flavors = list(tb.flavor.all_flavors) + alias_map = tb.flavor.alias_map.copy() + converter_map = tb.flavor.converter_map.copy() + identifier_map = tb.flavor.identifier_map.copy() + description_map = tb.flavor.description_map.copy() + + try: + tb.flavor.restrict_flavors(keep=[]) + self.assertLess(len(tb.flavor.alias_map), len(alias_map)) + self.assertLess(len(tb.flavor.converter_map), len(converter_map)) + finally: + tb.flavor.all_flavors[:] = all_flavors[:] + tb.flavor.alias_map.update(alias_map) + tb.flavor.converter_map.update(converter_map) + tb.flavor.identifier_map.update(identifier_map) + tb.flavor.description_map.update(description_map) + + +# @common.unittest.skipIf( +# sys.getfilesystemencoding() != "utf-8", +# "need utf-8 file-system encoding", +# ) +@common.unittest.skipIf( + sys.platform == "win32", "no unicode filenames on windows" +) +class UnicodeFilename(common.TempFileMixin, common.PyTablesTestCase): + unicode_prefix = "para\u0140lel" + + def _getTempFileName(self): + return tempfile.mktemp(prefix=self.unicode_prefix, suffix=".h5") + + def setUp(self): + super().setUp() + + self.test = self.h5file.create_array("/", "test", [1, 2]) + + # So as to check the reading + self._reopen() + + def test01(self): + """Checking creating a filename with Unicode chars.""" + + test = self.h5file.root.test + if common.verbose: + print("Filename:", self.h5fname) + print("Array:", test[:]) + print("Should look like:", [1, 2]) + self.assertEqual(test[:], [1, 2], "Values does not match.") + + def test02(self): + """Checking tables.is_hdf5_file with a Unicode filename.""" + + self.h5file.close() + if common.verbose: + print("Filename:", self.h5fname) + print(" tables.is_hdf5_file?:", tb.is_hdf5_file(self.h5fname)) + self.assertTrue(tb.is_hdf5_file(self.h5fname)) + + def test03(self): + """Checking is_pytables_file with a Unicode filename.""" + + self.h5file.close() + if common.verbose: + print("Filename:", self.h5fname) + print("is_pytables_file?:", tb.is_pytables_file(self.h5fname)) + self.assertNotEqual(tb.is_pytables_file(self.h5fname), False) + + @staticmethod + def _store_carray(name, data, group): + atom = tb.Atom.from_dtype(data.dtype) + node = tb.CArray(group, name, shape=data.shape, atom=atom) + node[:] = data + + def test_store_and_load_with_non_ascii_attributes(self): + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, "a") + root = self.h5file.root + group = self.h5file.create_group(root, "face_data") + array_name = "data at 40\N{DEGREE SIGN}C" + data = np.sinh(np.linspace(-1.4, 1.4, 500)) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", tb.NaturalNameWarning) + self._store_carray(array_name, data, group) + group = self.h5file.create_group(root, "vertex_data") + + +@common.unittest.skipIf( + sys.version_info < (3, 6), "PEP 519 was implemented in Python 3.6" +) +class PathLikeFilename(common.TempFileMixin, common.PyTablesTestCase): + + def _getTempFileName(self): + from pathlib import Path + + return Path(tempfile.mktemp(suffix=".h5")) + + def setUp(self): + super().setUp() + + self.test = self.h5file.create_array("/", "test", [1, 2]) + + # So as to check the reading + self._reopen() + + def test01(self): + """Checking creating a file with a PathLike object as the filename.""" + + test = self.h5file.root.test + if common.verbose: + print("Filename:", self.h5fname) + print("Array:", test[:]) + print("Should look like:", [1, 2]) + self.assertEqual(test[:], [1, 2], "Values does not match.") + + def test02(self): + """Checking tables.is_hdf5_file with a PathLike object as the + filename.""" + + self.h5file.close() + if common.verbose: + print("Filename:", self.h5fname) + print(" tables.is_hdf5_file?:", tb.is_hdf5_file(self.h5fname)) + self.assertTrue(tb.is_hdf5_file(self.h5fname)) + + def test03(self): + """Checking is_pytables_file with a PathLike object as the filename.""" + + self.h5file.close() + if common.verbose: + print("Filename:", self.h5fname) + print("is_pytables_file?:", tb.is_pytables_file(self.h5fname)) + self.assertNotEqual(tb.is_pytables_file(self.h5fname), False) + + def test04_str(self): + str(self.h5file) + + +class FilePropertyTestCase(common.PyTablesTestCase): + def setUp(self): + super().setUp() + self.h5fname = tempfile.mktemp(".h5") + self.h5file = None + + def tearDown(self): + if self.h5file: + self.h5file.close() + + if Path(self.h5fname).is_file(): + Path(self.h5fname).unlink() + super().tearDown() + + def test_get_filesize(self): + data = np.zeros((2000, 2000)) + datasize = np.prod(data.shape) * data.dtype.itemsize + + self.h5file = tb.open_file(self.h5fname, mode="w") + self.h5file.create_array(self.h5file.root, "array", data) + h5_filesize = self.h5file.get_filesize() + self.h5file.close() + + fs_filesize = Path(self.h5fname).stat().st_size + + self.assertGreaterEqual(h5_filesize, datasize) + self.assertEqual(h5_filesize, fs_filesize) + + def test01_null_userblock_size(self): + self.h5file = tb.open_file(self.h5fname, mode="w") + self.h5file.create_array(self.h5file.root, "array", [1, 2]) + self.assertEqual(self.h5file.get_userblock_size(), 0) + + def test02_null_userblock_size(self): + self.h5file = tb.open_file(self.h5fname, mode="w") + self.h5file.create_array(self.h5file.root, "array", [1, 2]) + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, mode="r") + self.assertEqual(self.h5file.get_userblock_size(), 0) + + def test03_null_userblock_size(self): + USER_BLOCK_SIZE = 0 + self.h5file = tb.open_file( + self.h5fname, mode="w", user_block_size=USER_BLOCK_SIZE + ) + self.h5file.create_array(self.h5file.root, "array", [1, 2]) + self.assertEqual(self.h5file.get_userblock_size(), 0) + + def test01_userblock_size(self): + USER_BLOCK_SIZE = 512 + self.h5file = tb.open_file( + self.h5fname, mode="w", user_block_size=USER_BLOCK_SIZE + ) + self.h5file.create_array(self.h5file.root, "array", [1, 2]) + self.assertEqual(self.h5file.get_userblock_size(), USER_BLOCK_SIZE) + + def test02_userblock_size(self): + USER_BLOCK_SIZE = 512 + self.h5file = tb.open_file( + self.h5fname, mode="w", user_block_size=USER_BLOCK_SIZE + ) + self.h5file.create_array(self.h5file.root, "array", [1, 2]) + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, mode="r") + self.assertEqual(self.h5file.get_userblock_size(), USER_BLOCK_SIZE) + + def test_small_userblock_size(self): + USER_BLOCK_SIZE = 12 + self.assertRaises( + ValueError, + tb.open_file, + self.h5fname, + mode="w", + user_block_size=USER_BLOCK_SIZE, + ) + + def test_invalid_userblock_size(self): + USER_BLOCK_SIZE = 1025 + self.assertRaises( + ValueError, + tb.open_file, + self.h5fname, + mode="w", + user_block_size=USER_BLOCK_SIZE, + ) + + +# Test for reading a file that uses Blosc and created on a big-endian platform +@common.unittest.skipIf(not common.blosc_avail, "Blosc not available") +class BloscBigEndian(common.TestFileMixin, common.PyTablesTestCase): + h5fname = common.test_filename("blosc_bigendian.h5") + + def test00_bigendian(self): + """Checking compatibility with Blosc on big-endian machines.""" + + # Check that we can read the contents without problems (nor warnings!) + for dset_name in ("i1", "i2", "i4", "i8"): + a = np.arange(10, dtype=dset_name) + dset = self.h5file.get_node("/" + dset_name) + self.assertTrue( + common.allequal(a, dset[:]), "Error in big-endian data!" + ) + + +# Case test for Blosc and subprocesses (via multiprocessing module) + + +# The worker function for the subprocess (needs to be here because Windows +# has problems pickling nested functions with the multiprocess module :-/) +def _worker(fn, qout=None): + fp = tb.open_file(fn) + if common.verbose: + print("About to load: ", fn) + rows = fp.root.table.where("(f0 < 10)") + if common.verbose: + print("Got the iterator, about to iterate") + next(rows) + if common.verbose: + print("Succeeded in one iteration\n") + fp.close() + + if qout is not None: + qout.put("Done") + + +# From: Yaroslav Halchenko +# Subject: Skip the unittest on kFreeBSD and Hurd -- locking seems to +# be N/A +# +# on kfreebsd /dev/shm is N/A +# on Hurd -- inter-process semaphore locking is N/A +@common.unittest.skipIf( + not multiprocessing_imported, "multiprocessing module not available" +) +@common.unittest.skipIf( + platform.system().lower() in ("gnu", "gnu/kfreebsd"), + "multiprocessing module is not " "supported on Hurd/kFreeBSD", +) +@common.unittest.skipIf(not common.blosc_avail, "Blosc not available") +class BloscSubprocess(common.PyTablesTestCase): + def test_multiprocess(self): + # Create a relatively large table with Blosc level 9 (large blocks) + h5fname = tempfile.mktemp(prefix="multiproc-blosc9-", suffix=".h5") + try: + size = 300_000 + sa = np.fromiter( + ((i, i**2, i // 3) for i in range(size)), "i4,i8,f8" + ) + with tb.open_file(h5fname, "w") as h5file: + h5file.create_table( + h5file.root, + "table", + sa, + filters=tb.Filters(complevel=9, complib="blosc"), + chunkshape=(size // 3,), + ) + + if common.verbose: + print("**** Running from main process:") + _worker(h5fname) + + if common.verbose: + print("**** Running from subprocess:") + + try: + qout = mp.Queue() + except OSError: + print("Permission denied due to /dev/shm settings") + else: + ps = mp.Process( + target=_worker, + args=( + h5fname, + qout, + ), + ) + ps.daemon = True + ps.start() + + result = qout.get() + if common.verbose: + print(result) + + ps.join() + # Avoid warnings with later tests forking subprocesses. + ps.terminate() + finally: + Path(h5fname).unlink() + + +class HDF5ErrorHandling(common.PyTablesTestCase): + def setUp(self): + super().setUp() + self._old_policy = tb.HDF5ExtError.DEFAULT_H5_BACKTRACE_POLICY + + def tearDown(self): + tb.HDF5ExtError.DEFAULT_H5_BACKTRACE_POLICY = self._old_policy + super().tearDown() + + def test_silence_messages(self): + code = """ +import tables as tb +tb.silence_hdf5_messages(False) +tb.silence_hdf5_messages() +try: + tb.open_file(r'%s') +except tb.HDF5ExtError, e: + pass +""" + + filename = tempfile.mktemp(prefix="hdf5-error-handling-", suffix=".py") + try: + with open(filename, "w") as fp: + fp.write(code % filename) + + p = subprocess.Popen( + [sys.executable, filename], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + stdout, stderr = p.communicate() + + self.assertNotIn("HDF5-DIAG", stderr.decode("ascii")) + finally: + Path(filename).unlink() + + # This test is a bit flaky and in some situations it fails + # E.g. on Mac OSX (arm64), I am getting this: + # FAIL: None (tables.tests.test_basics.HDF5ErrorHandling) + # ---------------------------------------------------------------------- + # Traceback (most recent call last): + # File "/Users/faltet/software/PyTables-upstream/tables/tests/test_basics.py", + # line 2231, in test_enable_messages + # self.assertIn("HDF5-DIAG", stderr.decode('ascii')) + # AssertionError: 'HDF5-DIAG' not found in 'Traceback (most recent call last):\n + # symbol not found in flat namespace \'_blosc2_cbuffer_sizes\'\n' + # As the fix is not clear to me, I prefer to disable it until a more robust + # path is found. + def _test_enable_messages(self): + code = """ +import tables as tb +tb.silence_hdf5_messages() +tb.silence_hdf5_messages(False) +try: + tb.open_file(r'%s') +except tb.HDF5ExtError as e: + pass +""" + + filename = tempfile.mktemp(prefix="hdf5-error-handling-", suffix=".py") + try: + Path(filename).write_text(code % filename) + + p = subprocess.Popen( + [sys.executable, filename], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + stdout, stderr = p.communicate() + + self.assertIn("HDF5-DIAG", stderr.decode("ascii")) + finally: + Path(filename).unlink() + + def _raise_exterror(self): + h5fname = tempfile.mktemp(".h5") + Path(h5fname).write_text("") + + try: + h5file = tb.open_file(h5fname) + h5file.close() + finally: + Path(h5fname).unlink() + + def test_h5_backtrace_quiet(self): + tb.HDF5ExtError.DEFAULT_H5_BACKTRACE_POLICY = True + + with self.assertRaises(tb.HDF5ExtError) as cm: + self._raise_exterror() + + self.assertIsNotNone(cm.exception.h5backtrace) + + def test_h5_backtrace_verbose(self): + tb.HDF5ExtError.DEFAULT_H5_BACKTRACE_POLICY = "VERBOSE" + + with self.assertRaises(tb.HDF5ExtError) as cm: + self._raise_exterror() + + self.assertIsNotNone(cm.exception.h5backtrace) + msg = str(cm.exception) + self.assertIn(cm.exception.h5backtrace[-1][-1], msg) + + def test_h5_backtrace_ignore(self): + tb.HDF5ExtError.DEFAULT_H5_BACKTRACE_POLICY = False + + with self.assertRaises(tb.HDF5ExtError) as cm: + self._raise_exterror() + + self.assertIsNone(cm.exception.h5backtrace) + + +class TestDescription(common.PyTablesTestCase): + def test_isdescription_inheritance(self): + # Regression test for gh-65 + class TestDescParent(tb.IsDescription): + c = tb.Int32Col() + + class TestDesc(TestDescParent): + pass + + self.assertIn("c", TestDesc.columns) + + def test_descr_from_dtype(self): + t = np.dtype([("col1", "int16"), ("col2", float)]) + descr, byteorder = tb.description.descr_from_dtype(t) + + self.assertIn("col1", descr._v_colobjects) + self.assertIn("col2", descr._v_colobjects) + self.assertEqual(len(descr._v_colobjects), 2) + self.assertIsInstance(descr._v_colobjects["col1"], tb.Col) + self.assertIsInstance(descr._v_colobjects["col2"], tb.Col) + self.assertEqual(descr._v_colobjects["col1"].dtype, np.int16) + self.assertEqual(descr._v_colobjects["col2"].dtype, float) + + def test_descr_from_dtype_rich_dtype(self): + header = [ + (("timestamp", "t"), "u4"), + (("unit (cluster) id", "unit"), "u2"), + ] + t = np.dtype(header) + + descr, byteorder = tb.description.descr_from_dtype(t) + self.assertEqual(len(descr._v_names), 2) + self.assertEqual(sorted(descr._v_names), ["t", "unit"]) + + def test_descr_from_dtype_comp_01(self): + d1 = np.dtype([("x", "int16"), ("y", "int16")]) + d_comp = np.dtype([("time", "float64"), ("value", d1)]) + + descr, byteorder = tb.description.descr_from_dtype(d_comp) + + self.assertTrue(descr._v_is_nested) + self.assertIn("time", descr._v_colobjects) + self.assertIn("value", descr._v_colobjects) + self.assertEqual(len(descr._v_colobjects), 2) + self.assertIsInstance(descr._v_colobjects["time"], tb.Col) + self.assertTrue( + isinstance(descr._v_colobjects["value"], tb.Description) + ) + self.assertEqual(descr._v_colobjects["time"].dtype, np.float64) + + def test_descr_from_dtype_comp_02(self): + d1 = np.dtype([("x", "int16"), ("y", "int16")]) + + d_comp = np.dtype([("time", "float64"), ("value", (d1, (1,)))]) + + with self.assertWarns(UserWarning): + descr, byteorder = tb.description.descr_from_dtype(d_comp) + + self.assertTrue(descr._v_is_nested) + self.assertIn("time", descr._v_colobjects) + self.assertIn("value", descr._v_colobjects) + self.assertEqual(len(descr._v_colobjects), 2) + self.assertIsInstance(descr._v_colobjects["time"], tb.Col) + self.assertTrue( + isinstance(descr._v_colobjects["value"], tb.Description) + ) + self.assertEqual(descr._v_colobjects["time"].dtype, np.float64) + + def test_dtype_from_descr_is_description(self): + # See gh-152 + class TestDescParent(tb.IsDescription): + col1 = tb.Int16Col() + col2 = tb.FloatCol() + + dtype = np.dtype([("col1", "int16"), ("col2", float)]) + t = tb.description.dtype_from_descr(TestDescParent) + + self.assertEqual(t, dtype) + + def test_dtype_from_descr_is_description_instance(self): + # See gh-152 + class TestDescParent(tb.IsDescription): + col1 = tb.Int16Col() + col2 = tb.FloatCol() + + dtype = np.dtype([("col1", "int16"), ("col2", float)]) + t = tb.description.dtype_from_descr(TestDescParent()) + + self.assertEqual(t, dtype) + + def test_dtype_from_descr_description_instance(self): + # See gh-152 + class TestDescParent(tb.IsDescription): + col1 = tb.Int16Col() + col2 = tb.FloatCol() + + dtype = np.dtype([("col1", "int16"), ("col2", float)]) + desctiption = tb.Description(TestDescParent().columns) + t = tb.description.dtype_from_descr(desctiption) + + self.assertEqual(t, dtype) + + def test_dtype_from_descr_dict(self): + # See gh-152 + dtype = np.dtype([("col1", "int16"), ("col2", float)]) + t = tb.description.dtype_from_descr( + {"col1": tb.Int16Col(), "col2": tb.FloatCol()} + ) + + self.assertEqual(t, dtype) + + def test_dtype_from_descr_invalid_type(self): + # See gh-152 + self.assertRaises(ValueError, tb.description.dtype_from_descr, []) + + def test_dtype_from_descr_byteorder(self): + # See gh-152 + class TestDescParent(tb.IsDescription): + col1 = tb.Int16Col() + col2 = tb.FloatCol() + + t = tb.description.dtype_from_descr(TestDescParent, byteorder=">") + + self.assertEqual(t["col1"].byteorder, ">") + self.assertEqual(t["col2"].byteorder, ">") + + def test_str_names(self): + # see gh-42 + d = {"name": tb.Int16Col()} + descr = tb.Description(d) + self.assertEqual(sorted(descr._v_names), sorted(d.keys())) + self.assertIsInstance(descr._v_dtype, np.dtype) + self.assertTrue(sorted(descr._v_dtype.fields), sorted(d.keys())) + + +class TestAtom(common.PyTablesTestCase): + def test_atom_attributes01(self): + shape = (10, 10) + a = tb.Float64Atom(shape=shape) + + self.assertEqual(a.dflt, 0.0) + self.assertEqual(a.dtype, np.dtype((np.float64, shape))) + self.assertEqual(a.itemsize, a.dtype.base.itemsize) + self.assertEqual(a.kind, "float") + self.assertEqual(a.ndim, len(shape)) + # self.assertEqual(a.recarrtype, ) + self.assertEqual(a.shape, shape) + self.assertEqual(a.size, a.itemsize * np.prod(shape)) + self.assertEqual(a.type, "float64") + + def test_atom_copy01(self): + shape = (10, 10) + a = tb.Float64Atom(shape=shape) + aa = a.copy() + self.assertEqual(aa.shape, shape) + + def test_atom_copy02(self): + dflt = 2.0 + a = tb.Float64Atom(dflt=dflt) + aa = a.copy() + self.assertEqual(aa.dflt, dflt) + + def test_atom_copy_override(self): + shape = (10, 10) + dflt = 2.0 + a = tb.Float64Atom(shape=shape, dflt=dflt) + aa = a.copy(dflt=-dflt) + self.assertEqual(aa.shape, shape) + self.assertNotEqual(aa.dflt, dflt) + self.assertEqual(aa.dflt, -dflt) + + +class TestCol(common.PyTablesTestCase): + def test_col_copy01(self): + shape = (10, 10) + c = tb.Float64Col(shape=shape) + cc = c.copy() + self.assertEqual(cc.shape, shape) + + def test_col_copy02(self): + dflt = 2.0 + c = tb.Float64Col(dflt=dflt) + cc = c.copy() + self.assertEqual(cc.dflt, dflt) + + def test_col_copy_override(self): + shape = (10, 10) + dflt = 2.0 + pos = 3 + c = tb.Float64Col(shape=shape, dflt=dflt, pos=pos) + cc = c.copy(pos=2) + self.assertEqual(cc.shape, shape) + self.assertEqual(cc.dflt, dflt) + self.assertNotEqual(cc._v_pos, pos) + self.assertEqual(cc._v_pos, 2) + + +class TestSysattrCompatibility(common.PyTablesTestCase): + def test_open_python2(self): + h5fname = common.test_filename("python2.h5") + with tb.open_file(h5fname, "r") as h5file: + self.assertTrue(h5file.isopen) + + def test_open_python3(self): + h5fname = common.test_filename("python3.h5") + with tb.open_file(h5fname, "r") as h5file: + self.assertTrue(h5file.isopen) + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + + for i in range(niter): + theSuite.addTest(common.make_suite(OpenFileFailureTestCase)) + theSuite.addTest(common.make_suite(NodeCacheOpenFile)) + theSuite.addTest(common.make_suite(NoNodeCacheOpenFile)) + theSuite.addTest(common.make_suite(DictNodeCacheOpenFile)) + theSuite.addTest(common.make_suite(CheckFileTestCase)) + theSuite.addTest(common.make_suite(ThreadingTestCase)) + theSuite.addTest(common.make_suite(PythonAttrsTestCase)) + theSuite.addTest(common.make_suite(StateTestCase)) + theSuite.addTest(common.make_suite(FlavorTestCase)) + theSuite.addTest(common.make_suite(UnicodeFilename)) + theSuite.addTest(common.make_suite(PathLikeFilename)) + theSuite.addTest(common.make_suite(FilePropertyTestCase)) + theSuite.addTest(common.make_suite(BloscBigEndian)) + theSuite.addTest(common.make_suite(BloscSubprocess)) + theSuite.addTest(common.make_suite(HDF5ErrorHandling)) + theSuite.addTest(common.make_suite(TestDescription)) + theSuite.addTest(common.make_suite(TestAtom)) + theSuite.addTest(common.make_suite(TestCol)) + theSuite.addTest(common.make_suite(TestSysattrCompatibility)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_carray.py b/venv/Lib/site-packages/tables/tests/test_carray.py new file mode 100644 index 0000000..eaa569c --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_carray.py @@ -0,0 +1,3266 @@ +import sys +from pathlib import Path + +import numpy as np + +import tables as tb +from tables.tests import common + + +def foreign_byteorder(): + return {"little": "big", "big": "little"}[sys.byteorder] + + +class BasicTestCase(common.TempFileMixin, common.PyTablesTestCase): + # Default values + obj = None + flavor = "numpy" + type = "int32" + shape = (2, 2) + start = 0 + stop = 10 + step = 1 + length = 1 + chunkshape = (5, 5) + byteorder = None + compress = 0 + complib = "zlib" # Default compression library + shuffle = 0 + bitshuffle = 0 + fletcher32 = 0 + reopen = 1 # Tells whether the file has to be reopened on each test or not + + def setUp(self): + super().setUp() + # Create an instance of an HDF5 Table + self.rootgroup = self.h5file.root + self.populateFile() + if self.reopen: + # Close the file + self.h5file.close() + + def populateFile(self): + group = self.rootgroup + obj = self.obj + if obj is None: + if self.type == "string": + atom = tb.StringAtom(itemsize=self.length) + else: + atom = tb.Atom.from_type(self.type) + else: + atom = None + title = self.__class__.__name__ + filters = tb.Filters( + complevel=self.compress, + complib=self.complib, + shuffle=self.shuffle, + bitshuffle=self.bitshuffle, + fletcher32=self.fletcher32, + ) + carray = self.h5file.create_carray( + group, + "carray1", + atom=atom, + shape=self.shape, + title=title, + filters=filters, + chunkshape=self.chunkshape, + byteorder=self.byteorder, + obj=obj, + ) + carray.flavor = self.flavor + + # Fill it with data + self.rowshape = list(carray.shape) + self.objsize = self.length * np.prod(carray.shape) + + if self.flavor == "numpy": + if self.type == "string": + object = np.ndarray( + buffer=b"a" * self.objsize, + shape=self.shape, + dtype="S%s" % carray.atom.itemsize, + ) + else: + object = np.arange(self.objsize, dtype=carray.atom.dtype) + object.shape = carray.shape + if common.verbose: + print("Object to append -->", repr(object)) + + carray[...] = object + + def _get_shape(self): + if self.shape is not None: + shape = self.shape + else: + shape = np.asarray(self.obj).shape + + return shape + + def test00_attributes(self): + if self.reopen: + self.h5file = tb.open_file(self.h5fname, "r") + obj = self.h5file.get_node("/carray1") + + shape = self._get_shape() + + self.assertEqual(obj.flavor, self.flavor) + self.assertEqual(obj.shape, shape) + self.assertEqual(obj.ndim, len(shape)) + self.assertEqual(obj.chunkshape, self.chunkshape) + self.assertEqual(obj.nrows, shape[0]) + self.assertEqual(obj.atom.type, self.type) + + def test01_readCArray(self): + """Checking read() of chunked layout arrays.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_readCArray..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + if self.reopen: + self.h5file = tb.open_file(self.h5fname, "r") + carray = self.h5file.get_node("/carray1") + + # Choose a small value for buffer size + carray.nrowsinbuf = 3 + if common.verbose: + print("CArray descr:", repr(carray)) + print("shape of read array ==>", carray.shape) + print("reopening?:", self.reopen) + + shape = self._get_shape() + + # Build the array to do comparisons + if self.flavor == "numpy": + if self.type == "string": + object_ = np.ndarray( + buffer=b"a" * self.objsize, + shape=self.shape, + dtype=f"S{carray.atom.itemsize}", + ) + else: + object_ = np.arange(self.objsize, dtype=carray.atom.dtype) + object_.shape = shape + + stop = self.stop + # stop == None means read only the element designed by start + # (in read() contexts) + if self.stop is None: + if self.start == -1: # corner case + stop = carray.nrows + else: + stop = self.start + 1 + # Protection against number of elements less than existing + # if rowshape[self.extdim] < self.stop or self.stop == 0: + if carray.nrows < stop: + # self.stop == 0 means last row only in read() + # and not in [::] slicing notation + stop = int(carray.nrows) + # do a copy() in order to ensure that len(object._data) + # actually do a measure of its length + obj = object_[self.start : stop : self.step].copy() + + # Read all the array + try: + data = carray.read(self.start, stop, self.step) + except IndexError: + if self.flavor == "numpy": + data = np.empty(shape=self.shape, dtype=self.type) + else: + data = np.empty(shape=self.shape, dtype=self.type) + + if common.verbose: + if hasattr(obj, "shape"): + print("shape should look as:", obj.shape) + print("Object read ==>", repr(data)) + print("Should look like ==>", repr(obj)) + + if hasattr(data, "shape"): + self.assertEqual(len(data.shape), len(shape)) + else: + # Scalar case + self.assertEqual(len(self.shape), 1) + self.assertEqual(carray.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(data, obj, self.flavor)) + + def test01_readCArray_out_argument(self): + """Checking read() of chunked layout arrays.""" + + # Create an instance of an HDF5 Table + if self.reopen: + self.h5file = tb.open_file(self.h5fname, "r") + carray = self.h5file.get_node("/carray1") + + shape = self._get_shape() + + # Choose a small value for buffer size + carray.nrowsinbuf = 3 + # Build the array to do comparisons + if self.flavor == "numpy": + if self.type == "string": + object_ = np.ndarray( + buffer=b"a" * self.objsize, + shape=self.shape, + dtype=f"S{carray.atom.itemsize}", + ) + else: + object_ = np.arange(self.objsize, dtype=carray.atom.dtype) + object_.shape = shape + + stop = self.stop + # stop == None means read only the element designed by start + # (in read() contexts) + if self.stop is None: + if self.start == -1: # corner case + stop = carray.nrows + else: + stop = self.start + 1 + # Protection against number of elements less than existing + # if rowshape[self.extdim] < self.stop or self.stop == 0: + if carray.nrows < stop: + # self.stop == 0 means last row only in read() + # and not in [::] slicing notation + stop = int(carray.nrows) + # do a copy() in order to ensure that len(object._data) + # actually do a measure of its length + obj = object_[self.start : stop : self.step].copy() + + # Read all the array + try: + data = np.empty(shape, dtype=carray.atom.dtype) + data = data[self.start : stop : self.step].copy() + carray.read(self.start, stop, self.step, out=data) + except IndexError: + if self.flavor == "numpy": + data = np.empty(shape=shape, dtype=self.type) + else: + data = np.empty(shape=shape, dtype=self.type) + + if hasattr(data, "shape"): + self.assertEqual(len(data.shape), len(shape)) + else: + # Scalar case + self.assertEqual(len(shape), 1) + self.assertEqual(carray.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(data, obj, self.flavor)) + + def test02_getitemCArray(self): + """Checking chunked layout array __getitem__ special method.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test02_getitemCArray..." % self.__class__.__name__ + ) + + if not hasattr(self, "slices"): + # If there is not a slices attribute, create it + self.slices = (slice(self.start, self.stop, self.step),) + + # Create an instance of an HDF5 Table + if self.reopen: + self.h5file = tb.open_file(self.h5fname, "r") + carray = self.h5file.get_node("/carray1") + + if common.verbose: + print("CArray descr:", repr(carray)) + print("shape of read array ==>", carray.shape) + print("reopening?:", self.reopen) + + shape = self._get_shape() + + # Build the array to do comparisons + if self.type == "string": + object_ = np.ndarray( + buffer=b"a" * self.objsize, + shape=self.shape, + dtype=f"S{carray.atom.itemsize}", + ) + else: + object_ = np.arange(self.objsize, dtype=carray.atom.dtype) + object_.shape = shape + + # do a copy() in order to ensure that len(object._data) + # actually do a measure of its length + obj = object_.__getitem__(self.slices).copy() + + # Read data from the array + try: + data = carray.__getitem__(self.slices) + except IndexError: + print("IndexError!") + if self.flavor == "numpy": + data = np.empty(shape=self.shape, dtype=self.type) + else: + data = np.empty(shape=self.shape, dtype=self.type) + + if common.verbose: + print("Object read:\n", repr(data)) # , data.info() + print("Should look like:\n", repr(obj)) # , object.info() + if hasattr(obj, "shape"): + print("Original object shape:", self.shape) + print("Shape read:", data.shape) + print("shape should look as:", obj.shape) + + if not hasattr(data, "shape"): + # Scalar case + self.assertEqual(len(self.shape), 1) + self.assertEqual(carray.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(data, obj, self.flavor)) + + def test03_setitemCArray(self): + """Checking chunked layout array __setitem__ special method.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test03_setitemCArray..." % self.__class__.__name__ + ) + + if not hasattr(self, "slices"): + # If there is not a slices attribute, create it + self.slices = (slice(self.start, self.stop, self.step),) + + # Create an instance of an HDF5 Table + if self.reopen: + self.h5file = tb.open_file(self.h5fname, "a") + carray = self.h5file.get_node("/carray1") + + if common.verbose: + print("CArray descr:", repr(carray)) + print("shape of read array ==>", carray.shape) + print("reopening?:", self.reopen) + + shape = self._get_shape() + + # Build the array to do comparisons + if self.type == "string": + object_ = np.ndarray( + buffer=b"a" * self.objsize, + shape=self.shape, + dtype=f"S{carray.atom.itemsize}", + ) + else: + object_ = np.arange(self.objsize, dtype=carray.atom.dtype) + object_.shape = shape + + # do a copy() in order to ensure that len(object._data) + # actually do a measure of its length + obj = object_.__getitem__(self.slices).copy() + + if self.type == "string": + if hasattr(self, "wslice"): + obj[self.wslize] = "xXx" + carray[self.wslice] = "xXx" + elif sum(obj[self.slices].shape) != 0: + obj[:] = "xXx" + if obj.size > 0: + carray[self.slices] = obj + else: + if hasattr(self, "wslice"): + obj[self.wslice] = obj[self.wslice] * 2 + 3 + carray[self.wslice] = carray[self.wslice] * 2 + 3 + elif sum(obj[self.slices].shape) != 0: + obj = obj * 2 + 3 + if np.prod(obj.shape) > 0: + carray[self.slices] = carray[self.slices] * 2 + 3 + # Cast again object to its original type + obj = np.array(obj, dtype=carray.atom.dtype) + # Read datafrom the array + try: + data = carray.__getitem__(self.slices) + except IndexError: + print("IndexError!") + if self.flavor == "numpy": + data = np.empty(shape=self.shape, dtype=self.type) + else: + data = np.empty(shape=self.shape, dtype=self.type) + + if common.verbose: + print("Object read:\n", repr(data)) # , data.info() + print("Should look like:\n", repr(obj)) # , object.info() + if hasattr(obj, "shape"): + print("Original object shape:", self.shape) + print("Shape read:", data.shape) + print("shape should look as:", obj.shape) + + if not hasattr(data, "shape"): + # Scalar case + self.assertEqual(len(self.shape), 1) + self.assertEqual(carray.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(data, obj, self.flavor)) + + +class BasicWriteTestCase(BasicTestCase): + type = "int32" + shape = (2,) + chunkshape = (5,) + step = 1 + wslice = 1 # single element case + + +class BasicWrite2TestCase(BasicTestCase): + type = "int32" + shape = (2,) + chunkshape = (5,) + step = 1 + wslice = slice(shape[0] - 2, shape[0], 2) # range of elements + reopen = 0 # This case does not reopen files + + +class BasicWrite3TestCase(BasicTestCase): + obj = [1, 2] + type = np.asarray(obj).dtype.name + shape = None + chunkshape = (5,) + step = 1 + reopen = 0 # This case does not reopen files + + +class BasicWrite4TestCase(BasicTestCase): + obj = np.array([1, 2]) + type = obj.dtype.name + shape = None + chunkshape = (5,) + step = 1 + reopen = 0 # This case does not reopen files + + +class BasicWrite5TestCase(BasicTestCase): + obj = [[1, 2], [3, 4]] + type = np.asarray(obj).dtype.name + shape = None + chunkshape = (5, 1) + step = 1 + reopen = 0 # This case does not reopen files + + +class BasicWrite6TestCase(BasicTestCase): + obj = [1, 2] + type = np.asarray(obj).dtype.name + shape = None + chunkshape = (5,) + step = 1 + reopen = 1 # This case does reopen files + + +class BasicWrite7TestCase(BasicTestCase): + obj = np.array([1, 2]) + type = obj.dtype.name + shape = None + chunkshape = (5,) + step = 1 + reopen = 1 # This case does reopen files + + +class BasicWrite8TestCase(BasicTestCase): + obj = [[1, 2], [3, 4]] + type = np.asarray(obj).dtype.name + shape = None + chunkshape = (5, 1) + step = 1 + reopen = 1 # This case does reopen files + + +class EmptyCArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 2) + chunkshape = (5, 5) + start = 0 + stop = 10 + step = 1 + + +class EmptyCArray2TestCase(BasicTestCase): + type = "int32" + shape = (2, 2) + chunkshape = (5, 5) + start = 0 + stop = 10 + step = 1 + reopen = 0 # This case does not reopen files + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class SlicesCArrayTestCase(BasicTestCase): + compress = 1 + complib = "lzo" + type = "int32" + shape = (2, 2) + chunkshape = (5, 5) + slices = (slice(1, 2, 1), slice(1, 3, 1)) + + +class EllipsisCArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 2) + chunkshape = (5, 5) + # slices = (slice(1,2,1), Ellipsis) + slices = (Ellipsis, slice(1, 2, 1)) + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class Slices2CArrayTestCase(BasicTestCase): + compress = 1 + complib = "lzo" + type = "int32" + shape = (2, 2, 4) + chunkshape = (5, 5, 5) + slices = (slice(1, 2, 1), slice(None, None, None), slice(1, 4, 2)) + + +class Ellipsis2CArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 2, 4) + chunkshape = (5, 5, 5) + slices = (slice(1, 2, 1), Ellipsis, slice(1, 4, 2)) + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class Slices3CArrayTestCase(BasicTestCase): + compress = 1 # To show the chunks id DEBUG is on + complib = "lzo" + type = "int32" + shape = (2, 3, 4, 2) + chunkshape = (5, 5, 5, 5) + slices = ( + slice(1, 2, 1), + slice(0, None, None), + slice(1, 4, 2), + ) # Don't work + # slices = (slice(None, None, None), slice(0, None, None), + # slice(1,4,1)) # W + # slices = (slice(None, None, None), slice(None, None, None), + # slice(1,4,2)) # N + # slices = (slice(1,2,1), slice(None, None, None), slice(1,4,2)) # N + # Disable the failing test temporarily with a working test case + slices = (slice(1, 2, 1), slice(1, 4, None), slice(1, 4, 2)) # Y + # slices = (slice(1,2,1), slice(0, 4, None), slice(1,4,1)) # Y + slices = (slice(1, 2, 1), slice(0, 4, None), slice(1, 4, 2)) # N + # slices = (slice(1,2,1), slice(0, 4, None), slice(1,4,2), + # slice(0,100,1)) # N + + +class Slices4CArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 4, 2, 5, 6) + chunkshape = (5, 5, 5, 5, 5, 5) + slices = ( + slice(1, 2, 1), + slice(0, None, None), + slice(1, 4, 2), + slice(0, 4, 2), + slice(3, 5, 2), + slice(2, 7, 1), + ) + + +class Ellipsis3CArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 4, 2) + chunkshape = (5, 5, 5, 5) + slices = (Ellipsis, slice(0, 4, None), slice(1, 4, 2)) + slices = (slice(1, 2, 1), slice(0, 4, None), slice(1, 4, 2), Ellipsis) + + +class Ellipsis4CArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 4, 5) + chunkshape = (5, 5, 5, 5) + slices = (Ellipsis, slice(0, 4, None), slice(1, 4, 2)) + slices = (slice(1, 2, 1), Ellipsis, slice(1, 4, 2)) + + +class Ellipsis5CArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 4, 5) + chunkshape = (5, 5, 5, 5) + slices = (slice(1, 2, 1), slice(0, 4, None), Ellipsis) + + +class Ellipsis6CArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 4, 5) + chunkshape = (5, 5, 5, 5) + # The next slices gives problems with setting values (test03) + # This is a problem on the test design, not the Array.__setitem__ + # code, though. See # see test_earray.py Ellipsis6EArrayTestCase + slices = (slice(1, 2, 1), slice(0, 4, None), 2, Ellipsis) + + +class Ellipsis7CArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 4, 5) + chunkshape = (5, 5, 5, 5) + slices = (slice(1, 2, 1), slice(0, 4, None), slice(2, 3), Ellipsis) + + +class MD3WriteTestCase(BasicTestCase): + type = "int32" + shape = (2, 2, 3) + chunkshape = (4, 4, 4) + step = 2 + + +class MD5WriteTestCase(BasicTestCase): + type = "int32" + shape = (2, 2, 3, 4, 5) # ok + # shape = (1, 1, 2, 1) # Minimum shape that shows problems with HDF5 1.6.1 + # shape = (2, 3, 2, 4, 5) # Floating point exception (HDF5 1.6.1) + # shape = (2, 3, 3, 2, 5, 6) # Segmentation fault (HDF5 1.6.1) + chunkshape = (1, 1, 1, 1, 1) + start = 1 + stop = 10 + step = 10 + + +class MD6WriteTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 3, 2, 5, 6) + chunkshape = (1, 1, 1, 1, 5, 6) + start = 1 + stop = 10 + step = 3 + + +class MD6WriteTestCase__(BasicTestCase): + type = "int32" + shape = (2, 2) + chunkshape = (1, 1) + start = 1 + stop = 3 + step = 1 + + +class MD7WriteTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 3, 4, 5, 2, 3) + chunkshape = (10, 10, 10, 10, 10, 10, 10) + start = 1 + stop = 10 + step = 2 + + +class MD10WriteTestCase(BasicTestCase): + type = "int32" + shape = (1, 2, 3, 4, 5, 5, 4, 3, 2, 2) + chunkshape = (5, 5, 5, 5, 5, 5, 5, 5, 5, 5) + start = -1 + stop = -1 + step = 10 + + +class ZlibComprTestCase(BasicTestCase): + compress = 1 + complib = "zlib" + start = 3 + # stop = 0 # means last row + stop = None # means last row from 0.8 on + step = 10 + + +class ZlibShuffleTestCase(BasicTestCase): + shuffle = 1 + compress = 1 + complib = "zlib" + # case start < stop , i.e. no rows read + start = 3 + stop = 1 + step = 10 + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscComprTestCase(BasicTestCase): + compress = 1 # sss + complib = "blosc" + chunkshape = (10, 10) + start = 3 + stop = 10 + step = 3 + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscShuffleTestCase(BasicTestCase): + shape = (20, 30) + compress = 1 + shuffle = 1 + complib = "blosc" + chunkshape = (100, 100) + start = 3 + stop = 10 + step = 7 + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscBitShuffleTestCase(BasicTestCase): + shape = (20, 30) + compress = 1 + bitshuffle = 1 + complib = "blosc" + chunkshape = (200, 100) + start = 2 + stop = 11 + step = 7 + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscFletcherTestCase(BasicTestCase): + # see gh-21 + shape = (200, 300) + compress = 1 + shuffle = 1 + fletcher32 = 1 + complib = "blosc" + chunkshape = (100, 100) + start = 3 + stop = 10 + step = 7 + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscBloscLZTestCase(BasicTestCase): + shape = (20, 30) + compress = 1 + shuffle = 1 + complib = "blosc:blosclz" + chunkshape = (200, 100) + start = 2 + stop = 11 + step = 7 + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "lz4" not in tb.blosc_compressor_list(), "lz4 required" +) +class BloscLZ4TestCase(BasicTestCase): + shape = (20, 30) + compress = 1 + shuffle = 1 + complib = "blosc:lz4" + chunkshape = (100, 100) + start = 3 + stop = 10 + step = 7 + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "lz4" not in tb.blosc_compressor_list(), "lz4 required" +) +class BloscLZ4HCTestCase(BasicTestCase): + shape = (20, 30) + compress = 1 + shuffle = 1 + complib = "blosc:lz4hc" + chunkshape = (100, 100) + start = 3 + stop = 10 + step = 7 + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "snappy" not in tb.blosc_compressor_list(), "snappy required" +) +class BloscSnappyTestCase(BasicTestCase): + shape = (20, 30) + compress = 1 + shuffle = 1 + complib = "blosc:snappy" + chunkshape = (100, 100) + start = 3 + stop = 10 + step = 7 + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "zlib" not in tb.blosc_compressor_list(), "zlib required" +) +class BloscZlibTestCase(BasicTestCase): + shape = (20, 30) + compress = 1 + shuffle = 1 + complib = "blosc:zlib" + chunkshape = (100, 100) + start = 3 + stop = 10 + step = 7 + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "zstd" not in tb.blosc_compressor_list(), "zstd required" +) +class BloscZstdTestCase(BasicTestCase): + shape = (20, 30) + compress = 1 + shuffle = 1 + complib = "blosc:zstd" + chunkshape = (100, 100) + start = 3 + stop = 10 + step = 7 + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2ComprTestCase(BasicTestCase): + compress = 1 # sss + complib = "blosc2" + chunkshape = (10, 10) + start = 3 + stop = 10 + step = 3 + byteorder = foreign_byteorder() + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2FletcherTestCase(Blosc2ComprTestCase): + fletcher32 = 1 + start = 0 + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2CrossChunkTestCase(BasicTestCase): + shape = (10, 10) + compress = 1 # sss + complib = "blosc2" + chunkshape = (4, 4) + start = 3 + stop = 6 + step = 3 + byteorder = foreign_byteorder() + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2CrossChunkOptTestCase(Blosc2CrossChunkTestCase): + step = 1 # optimized + byteorder = sys.byteorder + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2PastLastChunkTestCase(BasicTestCase): + shape = (10, 10) + compress = 1 # sss + complib = "blosc2" + chunkshape = (4, 4) + start = 8 + stop = 100 + step = 3 + byteorder = foreign_byteorder() + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2PastLastChunkOptTestCase(Blosc2PastLastChunkTestCase): + step = 1 # optimized + byteorder = sys.byteorder + + +# Minimal test which can be figured out manually:: +# +# z Data: 1 Chunk0: Chunk1: 1 Slice: +# / /|\ |\ +# |\ 0 5 3 0 5 3 5 +# x y |X X| |\ \| / \ +# 4 2 7 4 2 7 4 7 +# \|/ \| \ / +# 6 6 6 +# +# Chunk0 & Slice: 4 Chunk1 & Slice: 5 +# \ \ +# 6 7 +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2Ndim3MinChunkOptTestCase(BasicTestCase): + shape = (2, 2, 2) + compress = 1 + complib = "blosc2" + chunkshape = (2, 2, 1) + byteorder = sys.byteorder + type = "int8" + slices = (slice(1, 2), slice(0, 2), slice(0, 2)) + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2Ndim3ChunkOptTestCase(BasicTestCase): + shape = (10, 10, 10) + compress = 1 + complib = "blosc2" + chunkshape = (7, 7, 7) + byteorder = sys.byteorder + type = "int32" + slices = (slice(1, 2), Ellipsis, slice(1, 4)) + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2Ndim4ChunkOptTestCase(BasicTestCase): + shape = (13, 13, 13, 3) + compress = 1 + complib = "blosc2" + chunkshape = (5, 5, 5, 3) + byteorder = sys.byteorder + type = "int32" + slices = (slice(0, 8), slice(7, 13), slice(3, 12), slice(1, 3)) + + +# The file used in the test below is created with this script, +# producing a chunked array that lacks chunk rank/shape in filter args. +# It is a reduced version of ``examples/direct-chunk-shape.py``, +# check there for more info and the assemblage of the data array. +# An h5py release is used which contains a version of hdf5-blosc2 +# that does not include chunk rank/shape in filter arguments. +# +# :: +# +# import blosc2 +# import h5py +# import hdf5plugin +# import numpy +# +# assert(hdf5plugin.version_info < (4, 2, 1)) +# +# fparams = hdf5plugin.Blosc2(cname='zstd', clevel=1, +# filters=hdf5plugin.Blosc2.SHUFFLE) +# cparams = { +# "codec": blosc2.Codec.ZSTD, +# "clevel": 1, +# "filters": [blosc2.Filter.SHUFFLE], +# } +# +# achunk = numpy.arange(4 * 4, dtype='int8').reshape((4, 4)) +# adata = numpy.zeros((6, 6), dtype=achunk.dtype) +# adata[0:4, 0:4] = achunk[:, :] +# adata[0:4, 4:6] = achunk[:, 0:2] +# adata[4:6, 0:4] = achunk[0:2, :] +# adata[4:6, 4:6] = achunk[0:2, 0:2] +# +# h5f = h5py.File("b2nd-no-chunkshape.h5", "w") +# dataset = h5f.create_dataset( +# "data", adata.shape, dtype=adata.dtype, chunks=achunk.shape, +# **fparams) +# b2chunk = blosc2.asarray(achunk, +# chunks=achunk.shape, blocks=achunk.shape, +# cparams=cparams) +# b2frame = b2chunk._schunk.to_cframe() +# dataset.id.write_direct_chunk((0, 0), b2frame) +# dataset.id.write_direct_chunk((0, 4), b2frame) +# dataset.id.write_direct_chunk((4, 0), b2frame) +# dataset.id.write_direct_chunk((4, 4), b2frame) +# h5f.close() +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2NDNoChunkshape(common.TestFileMixin, common.PyTablesTestCase): + h5fname = common.test_filename("b2nd-no-chunkshape.h5") + + adata = np.array( + [ + [0, 1, 2, 3, 0, 1], + [4, 5, 6, 7, 4, 5], + [8, 9, 10, 11, 8, 9], + [12, 13, 14, 15, 12, 13], + [0, 1, 2, 3, 0, 1], + [4, 5, 6, 7, 4, 5], + ], + dtype="int8", + ) + + def test_data_opt(self): + array = self.h5file.get_node("/data") + self.assertTrue(common.areArraysEqual(array[:], self.adata[:])) + + def test_data_filter(self): + array = self.h5file.get_node("/data") + self.assertTrue(common.areArraysEqual(array[::2], self.adata[::2])) + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class LZOComprTestCase(BasicTestCase): + compress = 1 # sss + complib = "lzo" + chunkshape = (10, 10) + start = 3 + stop = 10 + step = 3 + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class LZOShuffleTestCase(BasicTestCase): + shape = (20, 30) + compress = 1 + shuffle = 1 + complib = "lzo" + chunkshape = (100, 100) + start = 3 + stop = 10 + step = 7 + + +@common.unittest.skipIf( + not common.bzip2_avail, "BZIP2 compression library not available" +) +class Bzip2ComprTestCase(BasicTestCase): + shape = (20, 30) + compress = 1 + complib = "bzip2" + chunkshape = (100, 100) + start = 3 + stop = 10 + step = 8 + + +@common.unittest.skipIf( + not common.bzip2_avail, "BZIP2 compression library not available" +) +class Bzip2ShuffleTestCase(BasicTestCase): + shape = (20, 30) + compress = 1 + shuffle = 1 + complib = "bzip2" + chunkshape = (100, 100) + start = 3 + stop = 10 + step = 6 + + +class Fletcher32TestCase(BasicTestCase): + shape = (60, 50) + compress = 0 + fletcher32 = 1 + chunkshape = (50, 50) + start = 4 + stop = 20 + step = 7 + + +class AllFiltersTestCase(BasicTestCase): + compress = 1 + shuffle = 1 + fletcher32 = 1 + complib = "zlib" + chunkshape = (20, 20) # sss + start = 2 + stop = 99 + step = 6 + + +class FloatTypeTestCase(BasicTestCase): + type = "float64" + shape = (2, 2) + chunkshape = (5, 5) + start = 3 + stop = 10 + step = 20 + + +class ComplexTypeTestCase(BasicTestCase): + type = "complex128" + shape = (2, 2) + chunkshape = (5, 5) + start = 3 + stop = 10 + step = 20 + + +class StringTestCase(BasicTestCase): + type = "string" + length = 20 + shape = (2, 2) + # shape = (2,2,20) + chunkshape = (5, 5) + start = 3 + stop = 10 + step = 20 + slices = (slice(0, 1), slice(1, 2)) + + +class String2TestCase(BasicTestCase): + type = "string" + length = 20 + shape = (2, 20) + chunkshape = (5, 5) + start = 1 + stop = 10 + step = 2 + + +class StringComprTestCase(BasicTestCase): + type = "string" + length = 20 + shape = (20, 2, 10) + # shape = (20,0,10,20) + compr = 1 + # shuffle = 1 # this shouldn't do nothing on chars + chunkshape = (50, 50, 2) + start = -1 + stop = 100 + step = 20 + + +class Int8TestCase(BasicTestCase): + type = "int8" + shape = (2, 2) + compress = 1 + shuffle = 1 + chunkshape = (50, 50) + start = -1 + stop = 100 + step = 20 + + +class Int16TestCase(BasicTestCase): + type = "int16" + shape = (2, 2) + compress = 1 + shuffle = 1 + chunkshape = (50, 50) + start = 1 + stop = 100 + step = 1 + + +class Int32TestCase(BasicTestCase): + type = "int32" + shape = (2, 2) + compress = 1 + shuffle = 1 + chunkshape = (50, 50) + start = -1 + stop = 100 + step = 20 + + +@common.unittest.skipUnless( + hasattr(tb, "Float16Atom"), "Float16Atom not available" +) +class Float16TestCase(BasicTestCase): + type = "float16" + shape = (200,) + compress = 1 + shuffle = 1 + chunkshape = (20,) + start = -1 + stop = 100 + step = 20 + + +class Float32TestCase(BasicTestCase): + type = "float32" + shape = (200,) + compress = 1 + shuffle = 1 + chunkshape = (20,) + start = -1 + stop = 100 + step = 20 + + +class Float64TestCase(BasicTestCase): + type = "float64" + shape = (200,) + compress = 1 + shuffle = 1 + chunkshape = (20,) + start = -1 + stop = 100 + step = 20 + + +@common.unittest.skipUnless( + hasattr(tb, "Float96Atom"), "Float96Atom not available" +) +class Float96TestCase(BasicTestCase): + type = "float96" + shape = (200,) + compress = 1 + shuffle = 1 + chunkshape = (20,) + start = -1 + stop = 100 + step = 20 + + +@common.unittest.skipUnless( + hasattr(tb, "Float128Atom"), "Float128Atom not available" +) +class Float128TestCase(BasicTestCase): + type = "float128" + shape = (200,) + compress = 1 + shuffle = 1 + chunkshape = (20,) + start = -1 + stop = 100 + step = 20 + + +class Complex64TestCase(BasicTestCase): + type = "complex64" + shape = (4,) + compress = 1 + shuffle = 1 + chunkshape = (2,) + start = -1 + stop = 100 + step = 20 + + +class Complex128TestCase(BasicTestCase): + type = "complex128" + shape = (20,) + compress = 1 + shuffle = 1 + chunkshape = (2,) + start = -1 + stop = 100 + step = 20 + + +@common.unittest.skipUnless( + hasattr(tb, "Complex192Atom"), "Complex192Atom not available" +) +class Complex192TestCase(BasicTestCase): + type = "complex192" + shape = (20,) + compress = 1 + shuffle = 1 + chunkshape = (2,) + start = -1 + stop = 100 + step = 20 + + +@common.unittest.skipUnless( + hasattr(tb, "Complex256Atom"), "Complex256Atom not available" +) +class Complex256TestCase(BasicTestCase): + type = "complex256" + shape = (20,) + compress = 1 + shuffle = 1 + chunkshape = (2,) + start = -1 + stop = 100 + step = 20 + + +class ComprTestCase(BasicTestCase): + type = "float64" + compress = 1 + shuffle = 1 + shape = (200,) + compr = 1 + chunkshape = (21,) + start = 51 + stop = 100 + step = 7 + + +# this is a subset of the tests in test_array.py, mostly to verify that errors +# are handled in the same way +class ReadOutArgumentTests(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + self.size = 1000 + self.filters = tb.Filters(complevel=1, complib="blosc") + + def create_array(self): + array = np.arange(self.size, dtype="i8") + disk_array = self.h5file.create_carray( + "/", + "array", + atom=tb.Int64Atom(), + shape=(self.size,), + filters=self.filters, + ) + disk_array[:] = array + return array, disk_array + + def test_read_entire_array(self): + array, disk_array = self.create_array() + out_buffer = np.empty((self.size,), "i8") + disk_array.read(out=out_buffer) + np.testing.assert_equal(out_buffer, array) + + def test_read_non_contiguous_buffer(self): + array, disk_array = self.create_array() + out_buffer = np.empty((self.size,), "i8") + out_buffer_slice = out_buffer[0 : self.size : 2] + + with self.assertRaisesRegex( + ValueError, "output array not C contiguous" + ): + disk_array.read(0, self.size, 2, out_buffer_slice) + + def test_buffer_too_small(self): + array, disk_array = self.create_array() + out_buffer = np.empty((self.size // 2,), "i8") + self.assertRaises( + ValueError, disk_array.read, 0, self.size, 1, out_buffer + ) + try: + disk_array.read(0, self.size, 1, out_buffer) + except ValueError as exc: + self.assertIn("output array size invalid, got", str(exc)) + + +class SizeOnDiskInMemoryPropertyTestCase( + common.TempFileMixin, common.PyTablesTestCase +): + + def setUp(self): + super().setUp() + self.array_size = (10_000, 10) + # set chunkshape so it divides evenly into array_size, to avoid + # partially filled chunks + self.chunkshape = (1000, 10) + # approximate size (in bytes) of non-data portion of hdf5 file + self.hdf_overhead = 6000 + + def create_array(self, complevel): + filters = tb.Filters(complevel=complevel, complib="blosc") + self.array = self.h5file.create_carray( + "/", + "somearray", + atom=tb.Int16Atom(), + shape=self.array_size, + filters=filters, + chunkshape=self.chunkshape, + ) + + def test_no_data(self): + complevel = 0 + self.create_array(complevel) + self.assertEqual(self.array.size_on_disk, 0) + self.assertEqual(self.array.size_in_memory, 10_000 * 10 * 2) + + def test_data_no_compression(self): + complevel = 0 + self.create_array(complevel) + self.array[:] = 1 + self.assertEqual(self.array.size_on_disk, 10_000 * 10 * 2) + self.assertEqual(self.array.size_in_memory, 10_000 * 10 * 2) + + def test_highly_compressible_data(self): + complevel = 1 + self.create_array(complevel) + self.array[:] = 1 + self.h5file.flush() + file_size = Path(self.h5fname).stat().st_size + self.assertTrue( + abs(self.array.size_on_disk - file_size) <= self.hdf_overhead + ) + self.assertTrue(self.array.size_on_disk < self.array.size_in_memory) + self.assertEqual(self.array.size_in_memory, 10_000 * 10 * 2) + + # XXX + def test_random_data(self): + complevel = 1 + self.create_array(complevel) + self.array[:] = np.random.randint(0, 1e6, self.array_size) + self.h5file.flush() + file_size = Path(self.h5fname).stat().st_size + self.assertTrue( + abs(self.array.size_on_disk - file_size) <= self.hdf_overhead + ) + + # XXX: check. The test fails if blosc is not available + if tb.which_lib_version("blosc") is not None: + self.assertAlmostEqual(self.array.size_on_disk, 10_000 * 10 * 2) + else: + self.assertTrue( + abs(self.array.size_on_disk - 10_000 * 10 * 2) < 200 + ) + + +class OffsetStrideTestCase(common.TempFileMixin, common.PyTablesTestCase): + compress = 0 + complib = "zlib" # Default compression library + + def setUp(self): + super().setUp() + # Create an instance of an HDF5 Table + self.rootgroup = self.h5file.root + + def test01a_String(self): + """Checking carray with offset NumPy strings appends.""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01a_String..." % self.__class__.__name__) + + shape = (3, 2, 2) + # Create a string atom + carray = self.h5file.create_carray( + root, + "strings", + atom=tb.StringAtom(itemsize=3), + shape=shape, + title="Array of strings", + chunkshape=(1, 2, 2), + ) + a = np.array([[["a", "b"], ["123", "45"], ["45", "123"]]], dtype="S3") + carray[0] = a[0, 1:] + a = np.array([[["s", "a"], ["ab", "f"], ["s", "abc"], ["abc", "f"]]]) + carray[1] = a[0, 2:] + + # Read all the data: + data = carray.read() + if common.verbose: + print("Object read:", data) + print("Nrows in", carray._v_pathname, ":", carray.nrows) + print("Second row in carray ==>", data[1].tolist()) + + self.assertEqual(carray.nrows, 3) + self.assertEqual(data[0].tolist(), [[b"123", b"45"], [b"45", b"123"]]) + self.assertEqual(data[1].tolist(), [[b"s", b"abc"], [b"abc", b"f"]]) + self.assertEqual(len(data[0]), 2) + self.assertEqual(len(data[1]), 2) + + def test01b_String(self): + """Checking carray with strided NumPy strings appends.""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b_String..." % self.__class__.__name__) + + shape = (3, 2, 2) + + # Create a string atom + carray = self.h5file.create_carray( + root, + "strings", + atom=tb.StringAtom(itemsize=3), + shape=shape, + title="Array of strings", + chunkshape=(1, 2, 2), + ) + a = np.array([[["a", "b"], ["123", "45"], ["45", "123"]]], dtype="S3") + carray[0] = a[0, ::2] + a = np.array([[["s", "a"], ["ab", "f"], ["s", "abc"], ["abc", "f"]]]) + carray[1] = a[0, ::2] + + # Read all the rows: + data = carray.read() + if common.verbose: + print("Object read:", data) + print("Nrows in", carray._v_pathname, ":", carray.nrows) + print("Second row in carray ==>", data[1].tolist()) + + self.assertEqual(carray.nrows, 3) + self.assertEqual(data[0].tolist(), [[b"a", b"b"], [b"45", b"123"]]) + self.assertEqual(data[1].tolist(), [[b"s", b"a"], [b"s", b"abc"]]) + self.assertEqual(len(data[0]), 2) + self.assertEqual(len(data[1]), 2) + + def test02a_int(self): + """Checking carray with offset NumPy ints appends.""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02a_int..." % self.__class__.__name__) + + shape = (3, 3) + + # Create a string atom + carray = self.h5file.create_carray( + root, + "CAtom", + atom=tb.Int32Atom(), + shape=shape, + title="array of ints", + chunkshape=(1, 3), + ) + a = np.array( + [(0, 0, 0), (1, 0, 3), (1, 1, 1), (0, 0, 0)], dtype="int32" + ) + carray[0:2] = a[2:] # Introduce an offset + a = np.array([(1, 1, 1), (-1, 0, 0)], dtype="int32") + carray[2:3] = a[1:] # Introduce an offset + + # Read all the rows: + data = carray.read() + if common.verbose: + print("Object read:", data) + print("Nrows in", carray._v_pathname, ":", carray.nrows) + print("Third row in carray ==>", data[2]) + + self.assertEqual(carray.nrows, 3) + self.assertTrue( + common.allequal(data[0], np.array([1, 1, 1], dtype="int32")) + ) + self.assertTrue( + common.allequal(data[1], np.array([0, 0, 0], dtype="int32")) + ) + self.assertTrue( + common.allequal(data[2], np.array([-1, 0, 0], dtype="int32")) + ) + + def test02b_int(self): + """Checking carray with strided NumPy ints appends.""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02b_int..." % self.__class__.__name__) + + shape = (3, 3) + + # Create a string atom + carray = self.h5file.create_carray( + root, + "CAtom", + atom=tb.Int32Atom(), + shape=shape, + title="array of ints", + chunkshape=(1, 3), + ) + a = np.array( + [(0, 0, 0), (1, 0, 3), (1, 1, 1), (3, 3, 3)], dtype="int32" + ) + carray[0:2] = a[::3] # Create an offset + a = np.array([(1, 1, 1), (-1, 0, 0)], dtype="int32") + carray[2:3] = a[::2] # Create an offset + + # Read all the rows: + data = carray.read() + if common.verbose: + print("Object read:", data) + print("Nrows in", carray._v_pathname, ":", carray.nrows) + print("Third row in carray ==>", data[2]) + + self.assertEqual(carray.nrows, 3) + self.assertTrue( + common.allequal(data[0], np.array([0, 0, 0], dtype="int32")) + ) + self.assertTrue( + common.allequal(data[1], np.array([3, 3, 3], dtype="int32")) + ) + self.assertTrue( + common.allequal(data[2], np.array([1, 1, 1], dtype="int32")) + ) + + +class CopyTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test01a_copy(self): + """Checking CArray.copy() method.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01a_copy..." % self.__class__.__name__) + + # Create an CArray + shape = (2, 2) + atom = tb.Int16Atom() + array1 = self.h5file.create_carray( + self.h5file.root, + "array1", + atom=atom, + shape=shape, + title="title array1", + chunkshape=(2, 2), + ) + array1[...] = np.array([[456, 2], [3, 457]], dtype="int16") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + # print("dirs-->", dir(array1), dir(array2)) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + self.assertTrue(common.allequal(array1.read(), array2.read())) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.extdim, array2.extdim) + self.assertEqual(array1.flavor, array2.flavor) + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(array1.title, array2.title) + self.assertEqual(str(array1.atom), str(array2.atom)) + # The next line is commented out because a copy should not + # keep the same chunkshape anymore. + # F. Alted 2006-11-27 + # self.assertEqual(array1.chunkshape, array2.chunkshape) + + def test01b_copy(self): + """Checking CArray.copy() method.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b_copy..." % self.__class__.__name__) + + # Create an CArray + shape = (2, 2) + atom = tb.Int16Atom() + array1 = self.h5file.create_carray( + self.h5file.root, + "array1", + atom=atom, + shape=shape, + title="title array1", + chunkshape=(5, 5), + ) + array1[...] = np.array([[456, 2], [3, 457]], dtype="int16") + + if self.close: + if common.verbose: + print("(closing h5fname version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing h5fname version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + # print("dirs-->", dir(array1), dir(array2)) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + self.assertTrue(common.allequal(array1.read(), array2.read())) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.extdim, array2.extdim) + self.assertEqual(array1.flavor, array2.flavor) + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(array1.title, array2.title) + self.assertEqual(str(array1.atom), str(array2.atom)) + # By default, the chunkshape should be the same + self.assertEqual(array1.chunkshape, array2.chunkshape) + + def test01c_copy(self): + """Checking CArray.copy() method.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01c_copy..." % self.__class__.__name__) + + # Create an CArray + shape = (5, 5) + atom = tb.Int16Atom() + array1 = self.h5file.create_carray( + self.h5file.root, + "array1", + atom=atom, + shape=shape, + title="title array1", + chunkshape=(2, 2), + ) + array1[:2, :2] = np.array([[456, 2], [3, 457]], dtype="int16") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + # print("dirs-->", dir(array1), dir(array2)) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + self.assertTrue(common.allequal(array1.read(), array2.read())) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.extdim, array2.extdim) + self.assertEqual(array1.flavor, array2.flavor) + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(array1.title, array2.title) + self.assertEqual(str(array1.atom), str(array2.atom)) + # The next line is commented out because a copy should not + # keep the same chunkshape anymore. + # F. Alted 2006-11-27 + # self.assertEqual(array1.chunkshape, array2.chunkshape) + + def test02_copy(self): + """Checking CArray.copy() method (where specified)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_copy..." % self.__class__.__name__) + + # Create an CArray + shape = (5, 5) + atom = tb.Int16Atom() + array1 = self.h5file.create_carray( + self.h5file.root, + "array1", + atom=atom, + shape=shape, + title="title array1", + chunkshape=(2, 2), + ) + array1[:2, :2] = np.array([[456, 2], [3, 457]], dtype="int16") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy to another location + group1 = self.h5file.create_group("/", "group1") + array2 = array1.copy(group1, "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.group1.array2 + + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + # print("dirs-->", dir(array1), dir(array2)) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + self.assertTrue(common.allequal(array1.read(), array2.read())) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.extdim, array2.extdim) + self.assertEqual(array1.flavor, array2.flavor) + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(array1.title, array2.title) + self.assertEqual(str(array1.atom), str(array2.atom)) + # The next line is commented out because a copy should not + # keep the same chunkshape anymore. + # F. Alted 2006-11-27 + # self.assertEqual(array1.chunkshape, array2.chunkshape) + + def test03a_copy(self): + """Checking CArray.copy() method (python flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03c_copy..." % self.__class__.__name__) + + shape = (2, 2) + atom = tb.Int16Atom() + array1 = self.h5file.create_carray( + self.h5file.root, + "array1", + atom=atom, + shape=shape, + title="title array1", + chunkshape=(2, 2), + ) + array1.flavor = "python" + array1[...] = [[456, 2], [3, 457]] + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all elements are equal + self.assertEqual(array1.read(), array2.read()) + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.extdim, array2.extdim) + self.assertEqual(array1.flavor, array2.flavor) # Very important here! + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(array1.title, array2.title) + self.assertEqual(str(array1.atom), str(array2.atom)) + # The next line is commented out because a copy should not + # keep the same chunkshape anymore. + # F. Alted 2006-11-27 + # self.assertEqual(array1.chunkshape, array2.chunkshape) + + def test03b_copy(self): + """Checking CArray.copy() method (string python flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03d_copy..." % self.__class__.__name__) + + shape = (2, 2) + atom = tb.StringAtom(itemsize=4) + array1 = self.h5file.create_carray( + self.h5file.root, + "array1", + atom=atom, + shape=shape, + title="title array1", + chunkshape=(2, 2), + ) + array1.flavor = "python" + array1[...] = [["456", "2"], ["3", "457"]] + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("type value-->", type(array2[:][0][0])) + print("value-->", array2[:]) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all elements are equal + self.assertEqual(array1.read(), array2.read()) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.extdim, array2.extdim) + self.assertEqual(array1.flavor, array2.flavor) # Very important here! + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(array1.title, array2.title) + self.assertEqual(str(array1.atom), str(array2.atom)) + # The next line is commented out because a copy should not + # keep the same chunkshape anymore. + # F. Alted 2006-11-27 + # self.assertEqual(array1.chunkshape, array2.chunkshape) + + def test03c_copy(self): + """Checking CArray.copy() method (chararray flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03e_copy..." % self.__class__.__name__) + + shape = (2, 2) + atom = tb.StringAtom(itemsize=4) + array1 = self.h5file.create_carray( + self.h5file.root, + "array1", + atom=atom, + shape=shape, + title="title array1", + chunkshape=(2, 2), + ) + array1[...] = np.array([["456", "2"], ["3", "457"]], dtype="S4") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all elements are equal + self.assertTrue(common.allequal(array1.read(), array2.read())) + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.extdim, array2.extdim) + self.assertEqual(array1.flavor, array2.flavor) # Very important here! + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(array1.title, array2.title) + self.assertEqual(str(array1.atom), str(array2.atom)) + # The next line is commented out because a copy should not + # keep the same chunkshape anymore. + # F. Alted 2006-11-27 + # self.assertEqual(array1.chunkshape, array2.chunkshape) + + def test04_copy(self): + """Checking CArray.copy() method (checking title copying)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_copy..." % self.__class__.__name__) + + # Create an CArray + shape = (2, 2) + atom = tb.Int16Atom() + array1 = self.h5file.create_carray( + self.h5file.root, + "array1", + atom=atom, + shape=shape, + title="title array1", + chunkshape=(2, 2), + ) + array1[...] = np.array([[456, 2], [3, 457]], dtype="int16") + + # Append some user attrs + array1.attrs.attr1 = "attr1" + array1.attrs.attr2 = 2 + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another Array + array2 = array1.copy("/", "array2", title="title array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + # Assert user attributes + if common.verbose: + print("title of destination array-->", array2.title) + self.assertEqual(array2.title, "title array2") + + def test05_copy(self): + """Checking CArray.copy() method (user attributes copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05_copy..." % self.__class__.__name__) + + # Create an CArray + shape = (2, 2) + atom = tb.Int16Atom() + array1 = self.h5file.create_carray( + self.h5file.root, + "array1", + atom=atom, + shape=shape, + title="title array1", + chunkshape=(2, 2), + ) + array1[...] = np.array([[456, 2], [3, 457]], dtype="int16") + + # Append some user attrs + array1.attrs.attr1 = "attr1" + array1.attrs.attr2 = 2 + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another Array + array2 = array1.copy("/", "array2", copyuserattrs=1) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Assert user attributes + self.assertEqual(array2.attrs.attr1, "attr1") + self.assertEqual(array2.attrs.attr2, 2) + + def test05b_copy(self): + """Checking CArray.copy() method (user attributes not copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05b_copy..." % self.__class__.__name__) + + # Create an Array + shape = (2, 2) + atom = tb.Int16Atom() + array1 = self.h5file.create_carray( + self.h5file.root, + "array1", + atom=atom, + shape=shape, + title="title array1", + chunkshape=(2, 2), + ) + array1[...] = np.array([[456, 2], [3, 457]], dtype="int16") + + # Append some user attrs + array1.attrs.attr1 = "attr1" + array1.attrs.attr2 = 2 + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another Array + array2 = array1.copy("/", "array2", copyuserattrs=0) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Assert user attributes + self.assertEqual(hasattr(array2.attrs, "attr1"), 0) + self.assertEqual(hasattr(array2.attrs, "attr2"), 0) + + +class CloseCopyTestCase(CopyTestCase): + close = 1 + + +class OpenCopyTestCase(CopyTestCase): + close = 0 + + +class CopyIndexTestCase(common.TempFileMixin, common.PyTablesTestCase): + nrowsinbuf = 2 + + def test01_index(self): + """Checking CArray.copy() method with indexes.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_index..." % self.__class__.__name__) + + # Create an CArray + shape = (100, 2) + atom = tb.Int32Atom() + array1 = self.h5file.create_carray( + self.h5file.root, + "array1", + atom=atom, + shape=shape, + title="title array1", + chunkshape=(2, 2), + ) + r = np.arange(200, dtype="int32") + r.shape = shape + array1[...] = r + + # Select a different buffer size: + array1.nrowsinbuf = self.nrowsinbuf + + # Copy to another array + array2 = array1.copy( + "/", "array2", start=self.start, stop=self.stop, step=self.step + ) + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + r2 = r[self.start : self.stop : self.step] + self.assertTrue(common.allequal(r2, array2.read())) + + # Assert the number of rows in array + if common.verbose: + print("nrows in array2-->", array2.nrows) + print("and it should be-->", r2.shape[0]) + + # The next line is commented out because a copy should not + # keep the same chunkshape anymore. + # F. Alted 2006-11-27 + # assert array1.chunkshape == array2.chunkshape + self.assertEqual(r2.shape[0], array2.nrows) + + def _test02_indexclosef(self): + """Checking CArray.copy() method with indexes (close file version)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_indexclosef..." % self.__class__.__name__) + + # Create an CArray + shape = (100, 2) + atom = tb.Int32Atom() + array1 = self.h5file.create_carray( + self.h5file.root, + "array1", + atom=atom, + shape=shape, + title="title array1", + chunkshape=(2, 2), + ) + r = np.arange(200, dtype="int32") + r.shape = shape + array1[...] = r + + # Select a different buffer size: + array1.nrowsinbuf = self.nrowsinbuf + + # Copy to another array + array2 = array1.copy( + "/", "array2", start=self.start, stop=self.stop, step=self.step + ) + + # Close and reopen the file + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + r2 = r[self.start : self.stop : self.step] + self.assertEqual(array1.chunkshape, array2.chunkshape) + self.assertTrue(common.allequal(r2, array2.read())) + + # Assert the number of rows in array + if common.verbose: + print("nrows in array2-->", array2.nrows) + print("and it should be-->", r2.shape[0]) + self.assertEqual(r2.shape[0], array2.nrows) + + +class CopyIndex1TestCase(CopyIndexTestCase): + nrowsinbuf = 1 + start = 0 + stop = 7 + step = 1 + + +class CopyIndex2TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + start = 0 + stop = -1 + step = 1 + + +class CopyIndex3TestCase(CopyIndexTestCase): + nrowsinbuf = 3 + start = 1 + stop = 7 + step = 1 + + +class CopyIndex4TestCase(CopyIndexTestCase): + nrowsinbuf = 4 + start = 0 + stop = 6 + step = 1 + + +class CopyIndex5TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + start = 3 + stop = 7 + step = 1 + + +class CopyIndex6TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + start = 3 + stop = 6 + step = 2 + + +class CopyIndex7TestCase(CopyIndexTestCase): + start = 0 + stop = 7 + step = 10 + + +class CopyIndex8TestCase(CopyIndexTestCase): + start = 6 + stop = -1 # Negative values means starting from the end + step = 1 + + +class CopyIndex9TestCase(CopyIndexTestCase): + start = 3 + stop = 4 + step = 1 + + +class CopyIndex10TestCase(CopyIndexTestCase): + nrowsinbuf = 1 + start = 3 + stop = 4 + step = 2 + + +class CopyIndex11TestCase(CopyIndexTestCase): + start = -3 + stop = -1 + step = 2 + + +class CopyIndex12TestCase(CopyIndexTestCase): + start = -1 # Should point to the last element + stop = None # None should mean the last element (including it) + step = 1 + + +# The next test should be run only in **heavy** mode +class Rows64bitsTestCase(common.TempFileMixin, common.PyTablesTestCase): + narows = 1000 * 1000 # each array will have 1 million entries + # narows = 1000 # for testing only + nanumber = 1000 * 3 # That should account for more than 2**31-1 + + def setUp(self): + super().setUp() + + # Create an CArray + shape = (self.narows * self.nanumber,) + array = self.h5file.create_carray( + self.h5file.root, + "array", + atom=tb.Int8Atom(), + shape=shape, + filters=tb.Filters(complib="lzo", complevel=1), + ) + + # Fill the array + na = np.arange(self.narows, dtype="int8") + # for i in xrange(self.nanumber): + # s = slice(i * self.narows, (i + 1)*self.narows) + # array[s] = na + s = slice(0, self.narows) + array[s] = na + s = slice( + (self.nanumber - 1) * self.narows, self.nanumber * self.narows + ) + array[s] = na + + def test01_basiccheck(self): + """Some basic checks for carrays exceeding 2**31 rows""" + + array = self.h5file.root.array + + if self.close: + if common.verbose: + # Check how many entries there are in the array + print("Before closing") + print("Entries:", array.nrows, type(array.nrows)) + print("Entries:", array.nrows / (1000 * 1000), "Millions") + print("Shape:", array.shape) + + # Re-open the file + self._reopen() + array = self.h5file.root.array + if common.verbose: + print("After re-open") + + # Check how many entries there are in the array + if common.verbose: + print("Entries:", array.nrows, type(array.nrows)) + print("Entries:", array.nrows / (1000 * 1000), "Millions") + print("Shape:", array.shape) + print("Last 10 elements-->", array[-10:]) + stop = self.narows % 256 + if stop > 127: + stop -= 256 + start = stop - 10 + # print("start, stop-->", start, stop) + print("Should look like:", np.arange(start, stop, dtype="int8")) + + nrows = self.narows * self.nanumber + + # check nrows + self.assertEqual(array.nrows, nrows) + + # Check shape + self.assertEqual(array.shape, (nrows,)) + + # check the 10 first elements + self.assertTrue( + common.allequal(array[:10], np.arange(10, dtype="int8")) + ) + + # check the 10 last elements + stop = self.narows % 256 + if stop > 127: + stop -= 256 + start = stop - 10 + self.assertTrue( + common.allequal(array[-10:], np.arange(start, stop, dtype="int8")) + ) + + +class Rows64bitsTestCase1(Rows64bitsTestCase): + close = 0 + + +class Rows64bitsTestCase2(Rows64bitsTestCase): + close = 1 + + +class BigArrayTestCase(common.TempFileMixin, common.PyTablesTestCase): + shape = (3_000_000_000,) # more than 2**31-1 + + def setUp(self): + super().setUp() + # This should be fast since disk space isn't actually allocated, + # so this case is OK for non-heavy test runs. + self.h5file.create_carray( + "/", "array", atom=tb.Int8Atom(), shape=self.shape + ) + + def test00_shape(self): + """Check that the shape doesn't overflow.""" + # See ticket #147. + self.assertEqual(self.h5file.root.array.shape, self.shape) + try: + self.assertEqual(len(self.h5file.root.array), self.shape[0]) + except OverflowError: + # This can't be avoided in 32-bit platforms. + self.assertTrue( + self.shape[0] > np.iinfo(int).max, + "Array length overflowed but ``int`` " "is wide enough.", + ) + + def test01_shape_reopen(self): + """Check that the shape doesn't overflow after reopening.""" + self._reopen("r") + self.test00_shape() + + +# Test for default values when creating arrays. +class DfltAtomTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00_dflt(self): + """Check that Atom.dflt is honored (string version).""" + + # Create a CArray with default values + self.h5file.create_carray( + "/", + "bar", + atom=tb.StringAtom(itemsize=5, dflt=b"abdef"), + shape=(10, 10), + ) + + if self.reopen: + self._reopen() + + # Check the values + values = self.h5file.root.bar[:] + if common.verbose: + print("Read values:", values) + self.assertTrue( + common.allequal( + values, np.array(["abdef"] * 100, "S5").reshape(10, 10) + ) + ) + + def test01_dflt(self): + """Check that Atom.dflt is honored (int version).""" + + # Create a CArray with default values + self.h5file.create_carray( + "/", "bar", atom=tb.IntAtom(dflt=1), shape=(10, 10) + ) + + if self.reopen: + self._reopen() + + # Check the values + values = self.h5file.root.bar[:] + if common.verbose: + print("Read values:", values) + self.assertTrue(common.allequal(values, np.ones((10, 10), "i4"))) + + def test02_dflt(self): + """Check that Atom.dflt is honored (float version).""" + + # Create a CArray with default values + self.h5file.create_carray( + "/", "bar", atom=tb.FloatAtom(dflt=1.134), shape=(10, 10) + ) + + if self.reopen: + self._reopen() + + # Check the values + values = self.h5file.root.bar[:] + if common.verbose: + print("Read values:", values) + self.assertTrue( + common.allequal(values, np.ones((10, 10), "f8") * 1.134) + ) + + +class DfltAtomNoReopen(DfltAtomTestCase): + reopen = False + + +class DfltAtomReopen(DfltAtomTestCase): + reopen = True + + +# Test for representation of defaults in atoms. Ticket #212. +class AtomDefaultReprTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00a_zeros(self): + """Testing default values. Zeros (scalar).""" + + N = () + atom = tb.StringAtom(itemsize=3, shape=N, dflt=b"") + ca = self.h5file.create_carray("/", "test", atom=atom, shape=(1,)) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + # Check the value + if common.verbose: + print("First row-->", repr(ca[0])) + print("Defaults-->", repr(ca.atom.dflt)) + self.assertTrue(common.allequal(ca[0], np.zeros(N, "S3"))) + self.assertTrue(common.allequal(ca.atom.dflt, np.zeros(N, "S3"))) + + def test00b_zeros(self): + """Testing default values. Zeros (array).""" + + N = 2 + atom = tb.StringAtom(itemsize=3, shape=N, dflt=b"") + ca = self.h5file.create_carray("/", "test", atom=atom, shape=(1,)) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + # Check the value + if common.verbose: + print("First row-->", ca[0]) + print("Defaults-->", ca.atom.dflt) + self.assertTrue(common.allequal(ca[0], np.zeros(N, "S3"))) + self.assertTrue(common.allequal(ca.atom.dflt, np.zeros(N, "S3"))) + + def test01a_values(self): + """Testing default values. Ones.""" + + N = 2 + atom = tb.Int32Atom(shape=N, dflt=1) + ca = self.h5file.create_carray("/", "test", atom=atom, shape=(1,)) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + # Check the value + if common.verbose: + print("First row-->", ca[0]) + print("Defaults-->", ca.atom.dflt) + self.assertTrue(common.allequal(ca[0], np.ones(N, "i4"))) + self.assertTrue(common.allequal(ca.atom.dflt, np.ones(N, "i4"))) + + def test01b_values(self): + """Testing default values. Generic value.""" + + N = 2 + generic = 112.32 + atom = tb.Float32Atom(shape=N, dflt=generic) + ca = self.h5file.create_carray("/", "test", atom=atom, shape=(1,)) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + # Check the value + if common.verbose: + print("First row-->", ca[0]) + print("Defaults-->", ca.atom.dflt) + self.assertTrue(common.allequal(ca[0], np.ones(N, "f4") * generic)) + self.assertTrue( + common.allequal(ca.atom.dflt, np.ones(N, "f4") * generic) + ) + + def test02a_None(self): + """Testing default values. None (scalar).""" + + N = () + atom = tb.Int32Atom(shape=N, dflt=None) + ca = self.h5file.create_carray("/", "test", atom=atom, shape=(1,)) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + # Check the value + if common.verbose: + print("First row-->", repr(ca[0])) + print("Defaults-->", repr(ca.atom.dflt)) + self.assertTrue(common.allequal(ca.atom.dflt, np.zeros(N, "i4"))) + + def test02b_None(self): + """Testing default values. None (array).""" + + N = 2 + atom = tb.Int32Atom(shape=N, dflt=None) + ca = self.h5file.create_carray("/", "test", atom=atom, shape=(1,)) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + # Check the value + if common.verbose: + print("First row-->", ca[0]) + print("Defaults-->", ca.atom.dflt) + self.assertTrue(common.allequal(ca.atom.dflt, np.zeros(N, "i4"))) + + +class AtomDefaultReprNoReopen(AtomDefaultReprTestCase): + reopen = False + + +class AtomDefaultReprReopen(AtomDefaultReprTestCase): + reopen = True + + +class TruncateTestCase(common.TempFileMixin, common.PyTablesTestCase): + def test(self): + """Test for inability to truncate Array objects.""" + + array1 = self.h5file.create_carray("/", "array1", tb.IntAtom(), [2, 2]) + self.assertRaises(TypeError, array1.truncate, 0) + + +# Test for dealing with multidimensional atoms +class MDAtomTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test01a_assign(self): + """Assign a row to a (unidimensional) CArray with a MD atom.""" + + # Create an CArray + ca = self.h5file.create_carray( + "/", "test", atom=tb.Int32Atom((2, 2)), shape=(1,) + ) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + # Assign one row + ca[0] = [[1, 3], [4, 5]] + self.assertEqual(ca.nrows, 1) + if common.verbose: + print("First row-->", ca[0]) + self.assertTrue( + common.allequal(ca[0], np.array([[1, 3], [4, 5]], "i4")) + ) + + def test01b_assign(self): + """Assign several rows to a (unidimensional) CArray with a MD atom.""" + + # Create an CArray + ca = self.h5file.create_carray( + "/", "test", atom=tb.Int32Atom((2, 2)), shape=(3,) + ) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + # Assign three rows + ca[:] = [[[1]], [[2]], [[3]]] # Simple broadcast + self.assertEqual(ca.nrows, 3) + if common.verbose: + print("Third row-->", ca[2]) + self.assertTrue( + common.allequal(ca[2], np.array([[3, 3], [3, 3]], "i4")) + ) + + def test02a_assign(self): + """Assign a row to a (multidimensional) CArray with a MD atom.""" + + # Create an CArray + ca = self.h5file.create_carray( + "/", "test", atom=tb.Int32Atom((2,)), shape=(1, 3) + ) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + # Assign one row + ca[:] = [[[1, 3], [4, 5], [7, 9]]] + self.assertEqual(ca.nrows, 1) + if common.verbose: + print("First row-->", ca[0]) + self.assertTrue( + common.allequal(ca[0], np.array([[1, 3], [4, 5], [7, 9]], "i4")) + ) + + def test02b_assign(self): + """Assign several rows to a (multidimensional) CArray with + a MD atom.""" + + # Create an CArray + ca = self.h5file.create_carray( + "/", "test", atom=tb.Int32Atom((2,)), shape=(3, 3) + ) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + # Assign three rows + ca[:] = [ + [[1, -3], [4, -5], [-7, 9]], + [[-1, 3], [-4, 5], [7, -8]], + [[-2, 3], [-5, 5], [7, -9]], + ] + self.assertEqual(ca.nrows, 3) + if common.verbose: + print("Third row-->", ca[2]) + self.assertTrue( + common.allequal(ca[2], np.array([[-2, 3], [-5, 5], [7, -9]], "i4")) + ) + + def test03a_MDMDMD(self): + """Complex assign of a MD array in a MD CArray with a MD atom.""" + + # Create an CArray + ca = self.h5file.create_carray( + "/", "test", atom=tb.Int32Atom((2, 4)), shape=(3, 2, 3) + ) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + + # Assign values + # The shape of the atom should be added at the end of the arrays + a = np.arange(2 * 3 * 2 * 4, dtype="i4").reshape((2, 3, 2, 4)) + ca[:] = [a * 1, a * 2, a * 3] + self.assertEqual(ca.nrows, 3) + if common.verbose: + print("Third row-->", ca[2]) + self.assertTrue(common.allequal(ca[2], a * 3)) + + def test03b_MDMDMD(self): + """Complex assign of a MD array in a MD CArray with a MD atom (II).""" + + # Create an CArray + ca = self.h5file.create_carray( + "/", "test", atom=tb.Int32Atom((2, 4)), shape=(2, 3, 3) + ) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + + # Assign values + # The shape of the atom should be added at the end of the arrays + a = np.arange(2 * 3 * 3 * 2 * 4, dtype="i4").reshape((2, 3, 3, 2, 4)) + ca[:] = a + self.assertEqual(ca.nrows, 2) + if common.verbose: + print("Third row-->", ca[:, 2, ...]) + self.assertTrue(common.allequal(ca[:, 2, ...], a[:, 2, ...])) + + def test03c_MDMDMD(self): + """Complex assign of a MD array in a MD CArray with a MD atom (III).""" + + # Create an CArray + ca = self.h5file.create_carray( + "/", "test", atom=tb.Int32Atom((2, 4)), shape=(3, 1, 2) + ) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + + # Assign values + # The shape of the atom should be added at the end of the arrays + a = np.arange(3 * 1 * 2 * 2 * 4, dtype="i4").reshape((3, 1, 2, 2, 4)) + ca[:] = a + self.assertEqual(ca.nrows, 3) + if common.verbose: + print("Second row-->", ca[:, :, 1, ...]) + self.assertTrue(common.allequal(ca[:, :, 1, ...], a[:, :, 1, ...])) + + +class MDAtomNoReopen(MDAtomTestCase): + reopen = False + + +class MDAtomReopen(MDAtomTestCase): + reopen = True + + +# Test for building very large MD atoms without defaults. Ticket #211. +class MDLargeAtomTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test01_create(self): + """Create a CArray with a very large MD atom.""" + + N = 2**16 # 4x larger than maximum object header size (64 KB) + ca = self.h5file.create_carray( + "/", "test", atom=tb.Int32Atom(shape=N), shape=(1,) + ) + if self.reopen: + self._reopen("a") + ca = self.h5file.root.test + + # Check the value + if common.verbose: + print("First row-->", ca[0]) + self.assertTrue(common.allequal(ca[0], np.zeros(N, "i4"))) + + +class MDLargeAtomNoReopen(MDLargeAtomTestCase): + reopen = False + + +class MDLargeAtomReopen(MDLargeAtomTestCase): + reopen = True + + +class AccessClosedTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + self.array = self.h5file.create_carray( + self.h5file.root, "array", atom=tb.Int32Atom(), shape=(10, 10) + ) + self.array[...] = np.zeros((10, 10)) + + def test_read(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.read) + + def test_getitem(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.__getitem__, 0) + + def test_setitem(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.__setitem__, 0, 0) + + +class TestCreateCArrayArgs(common.TempFileMixin, common.PyTablesTestCase): + obj = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + where = "/" + name = "carray" + atom = tb.Atom.from_dtype(obj.dtype) + shape = obj.shape + title = "title" + filters = None + chunkshape = (1, 2) + byteorder = None + createparents = False + + def test_positional_args_01(self): + self.h5file.create_carray( + self.where, + self.name, + self.atom, + self.shape, + self.title, + self.filters, + self.chunkshape, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(np.zeros_like(self.obj), nparr)) + + def test_positional_args_02(self): + ptarr = self.h5file.create_carray( + self.where, + self.name, + self.atom, + self.shape, + self.title, + self.filters, + self.chunkshape, + ) + ptarr[...] = self.obj + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_positional_args_obj(self): + self.h5file.create_carray( + self.where, + self.name, + None, + None, + self.title, + self.filters, + self.chunkshape, + self.byteorder, + self.createparents, + self.obj, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj(self): + self.h5file.create_carray( + self.where, + self.name, + title=self.title, + chunkshape=self.chunkshape, + obj=self.obj, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_atom_shape_01(self): + ptarr = self.h5file.create_carray( + self.where, + self.name, + title=self.title, + chunkshape=self.chunkshape, + atom=self.atom, + shape=self.shape, + ) + ptarr[...] = self.obj + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_atom_shape_02(self): + ptarr = self.h5file.create_carray( + self.where, + self.name, + title=self.title, + chunkshape=self.chunkshape, + atom=self.atom, + shape=self.shape, + ) + # ptarr[...] = self.obj + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(np.zeros_like(self.obj), nparr)) + + def test_kwargs_obj_atom(self): + ptarr = self.h5file.create_carray( + self.where, + self.name, + title=self.title, + chunkshape=self.chunkshape, + obj=self.obj, + atom=self.atom, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj_shape(self): + ptarr = self.h5file.create_carray( + self.where, + self.name, + title=self.title, + chunkshape=self.chunkshape, + obj=self.obj, + shape=self.shape, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj_atom_shape(self): + ptarr = self.h5file.create_carray( + self.where, + self.name, + title=self.title, + chunkshape=self.chunkshape, + obj=self.obj, + atom=self.atom, + shape=self.shape, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj_atom_error(self): + atom = tb.Atom.from_dtype(np.dtype("complex")) + # shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_carray, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=atom, + ) + + def test_kwargs_obj_shape_error(self): + # atom = Atom.from_dtype(np.dtype('complex')) + shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_carray, + self.where, + self.name, + title=self.title, + obj=self.obj, + shape=shape, + ) + + def test_kwargs_obj_atom_shape_error_01(self): + atom = tb.Atom.from_dtype(np.dtype("complex")) + # shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_carray, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=atom, + shape=self.shape, + ) + + def test_kwargs_obj_atom_shape_error_02(self): + # atom = Atom.from_dtype(np.dtype('complex')) + shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_carray, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=self.atom, + shape=shape, + ) + + def test_kwargs_obj_atom_shape_error_03(self): + atom = tb.Atom.from_dtype(np.dtype("complex")) + shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_carray, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=atom, + shape=shape, + ) + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + # common.heavy = 1 # uncomment this only for testing purposes + + # theSuite.addTest(make_suite(BasicTestCase)) + for n in range(niter): + theSuite.addTest(common.make_suite(BasicWriteTestCase)) + theSuite.addTest(common.make_suite(BasicWrite2TestCase)) + theSuite.addTest(common.make_suite(BasicWrite3TestCase)) + theSuite.addTest(common.make_suite(BasicWrite4TestCase)) + theSuite.addTest(common.make_suite(BasicWrite5TestCase)) + theSuite.addTest(common.make_suite(BasicWrite6TestCase)) + theSuite.addTest(common.make_suite(BasicWrite7TestCase)) + theSuite.addTest(common.make_suite(BasicWrite8TestCase)) + theSuite.addTest(common.make_suite(EmptyCArrayTestCase)) + theSuite.addTest(common.make_suite(EmptyCArray2TestCase)) + theSuite.addTest(common.make_suite(SlicesCArrayTestCase)) + theSuite.addTest(common.make_suite(Slices2CArrayTestCase)) + theSuite.addTest(common.make_suite(EllipsisCArrayTestCase)) + theSuite.addTest(common.make_suite(Ellipsis2CArrayTestCase)) + theSuite.addTest(common.make_suite(Ellipsis3CArrayTestCase)) + theSuite.addTest(common.make_suite(ZlibComprTestCase)) + theSuite.addTest(common.make_suite(ZlibShuffleTestCase)) + theSuite.addTest(common.make_suite(BloscComprTestCase)) + theSuite.addTest(common.make_suite(BloscShuffleTestCase)) + theSuite.addTest(common.make_suite(BloscBitShuffleTestCase)) + theSuite.addTest(common.make_suite(BloscFletcherTestCase)) + theSuite.addTest(common.make_suite(BloscBloscLZTestCase)) + theSuite.addTest(common.make_suite(BloscLZ4TestCase)) + theSuite.addTest(common.make_suite(BloscLZ4HCTestCase)) + theSuite.addTest(common.make_suite(BloscSnappyTestCase)) + theSuite.addTest(common.make_suite(BloscZlibTestCase)) + theSuite.addTest(common.make_suite(BloscZstdTestCase)) + theSuite.addTest(common.make_suite(Blosc2ComprTestCase)) + theSuite.addTest(common.make_suite(Blosc2FletcherTestCase)) + theSuite.addTest(common.make_suite(Blosc2CrossChunkTestCase)) + theSuite.addTest(common.make_suite(Blosc2CrossChunkOptTestCase)) + theSuite.addTest(common.make_suite(Blosc2PastLastChunkTestCase)) + theSuite.addTest(common.make_suite(Blosc2PastLastChunkOptTestCase)) + theSuite.addTest(common.make_suite(Blosc2Ndim3MinChunkOptTestCase)) + theSuite.addTest(common.make_suite(Blosc2Ndim3ChunkOptTestCase)) + theSuite.addTest(common.make_suite(Blosc2Ndim4ChunkOptTestCase)) + theSuite.addTest(common.make_suite(Blosc2NDNoChunkshape)) + theSuite.addTest(common.make_suite(LZOComprTestCase)) + theSuite.addTest(common.make_suite(LZOShuffleTestCase)) + theSuite.addTest(common.make_suite(Bzip2ComprTestCase)) + theSuite.addTest(common.make_suite(Bzip2ShuffleTestCase)) + theSuite.addTest(common.make_suite(FloatTypeTestCase)) + theSuite.addTest(common.make_suite(ComplexTypeTestCase)) + theSuite.addTest(common.make_suite(StringTestCase)) + theSuite.addTest(common.make_suite(String2TestCase)) + theSuite.addTest(common.make_suite(StringComprTestCase)) + theSuite.addTest(common.make_suite(Int8TestCase)) + theSuite.addTest(common.make_suite(Int16TestCase)) + theSuite.addTest(common.make_suite(Int32TestCase)) + theSuite.addTest(common.make_suite(Float16TestCase)) + theSuite.addTest(common.make_suite(Float32TestCase)) + theSuite.addTest(common.make_suite(Float64TestCase)) + theSuite.addTest(common.make_suite(Float96TestCase)) + theSuite.addTest(common.make_suite(Float128TestCase)) + theSuite.addTest(common.make_suite(Complex64TestCase)) + theSuite.addTest(common.make_suite(Complex128TestCase)) + theSuite.addTest(common.make_suite(Complex192TestCase)) + theSuite.addTest(common.make_suite(Complex256TestCase)) + theSuite.addTest(common.make_suite(ComprTestCase)) + theSuite.addTest(common.make_suite(OffsetStrideTestCase)) + theSuite.addTest(common.make_suite(Fletcher32TestCase)) + theSuite.addTest(common.make_suite(AllFiltersTestCase)) + theSuite.addTest(common.make_suite(ReadOutArgumentTests)) + theSuite.addTest(common.make_suite(SizeOnDiskInMemoryPropertyTestCase)) + theSuite.addTest(common.make_suite(CloseCopyTestCase)) + theSuite.addTest(common.make_suite(OpenCopyTestCase)) + theSuite.addTest(common.make_suite(CopyIndex1TestCase)) + theSuite.addTest(common.make_suite(CopyIndex2TestCase)) + theSuite.addTest(common.make_suite(CopyIndex3TestCase)) + theSuite.addTest(common.make_suite(CopyIndex4TestCase)) + theSuite.addTest(common.make_suite(CopyIndex5TestCase)) + theSuite.addTest(common.make_suite(BigArrayTestCase)) + theSuite.addTest(common.make_suite(DfltAtomNoReopen)) + theSuite.addTest(common.make_suite(DfltAtomReopen)) + theSuite.addTest(common.make_suite(AtomDefaultReprNoReopen)) + theSuite.addTest(common.make_suite(AtomDefaultReprReopen)) + theSuite.addTest(common.make_suite(TruncateTestCase)) + theSuite.addTest(common.make_suite(MDAtomNoReopen)) + theSuite.addTest(common.make_suite(MDAtomReopen)) + theSuite.addTest(common.make_suite(MDLargeAtomNoReopen)) + theSuite.addTest(common.make_suite(MDLargeAtomReopen)) + theSuite.addTest(common.make_suite(AccessClosedTestCase)) + theSuite.addTest(common.make_suite(TestCreateCArrayArgs)) + if common.heavy: + theSuite.addTest(common.make_suite(Slices3CArrayTestCase)) + theSuite.addTest(common.make_suite(Slices4CArrayTestCase)) + theSuite.addTest(common.make_suite(Ellipsis4CArrayTestCase)) + theSuite.addTest(common.make_suite(Ellipsis5CArrayTestCase)) + theSuite.addTest(common.make_suite(Ellipsis6CArrayTestCase)) + theSuite.addTest(common.make_suite(Ellipsis7CArrayTestCase)) + theSuite.addTest(common.make_suite(MD3WriteTestCase)) + theSuite.addTest(common.make_suite(MD5WriteTestCase)) + theSuite.addTest(common.make_suite(MD6WriteTestCase)) + theSuite.addTest(common.make_suite(MD7WriteTestCase)) + theSuite.addTest(common.make_suite(MD10WriteTestCase)) + theSuite.addTest(common.make_suite(CopyIndex6TestCase)) + theSuite.addTest(common.make_suite(CopyIndex7TestCase)) + theSuite.addTest(common.make_suite(CopyIndex8TestCase)) + theSuite.addTest(common.make_suite(CopyIndex9TestCase)) + theSuite.addTest(common.make_suite(CopyIndex10TestCase)) + theSuite.addTest(common.make_suite(CopyIndex11TestCase)) + theSuite.addTest(common.make_suite(CopyIndex12TestCase)) + theSuite.addTest(common.make_suite(Rows64bitsTestCase1)) + theSuite.addTest(common.make_suite(Rows64bitsTestCase2)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_create.py b/venv/Lib/site-packages/tables/tests/test_create.py new file mode 100644 index 0000000..fa827bd --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_create.py @@ -0,0 +1,2839 @@ +"""This test unit checks object creation functions, like open_file, +create_table, create_array or create_group. + +It also checks: + +- name identifiers in tree objects +- title character limit for objects (255) +- limit in number in table fields (255) + +""" + +import sys +import hashlib +import tempfile +import warnings +from pathlib import Path + +import numpy as np +from packaging.version import Version + +import tables as tb +from tables.tests import common + + +class Record(tb.IsDescription): + var1 = tb.StringCol(itemsize=4) # 4-character String + var2 = tb.IntCol() # integer + var3 = tb.Int16Col() # short integer + var4 = tb.FloatCol() # double (double-precision) + var5 = tb.Float32Col() # float (single-precision) + + +class CreateTestCase(common.TempFileMixin, common.PyTablesTestCase): + title = "This is the table title" + expectedrows = 100 + maxshort = 2**15 + maxint = 2_147_483_648 # (2 ** 31) + compress = 0 + + def setUp(self): + super().setUp() + + # Create an instance of HDF5 Table + self.root = self.h5file.root + + # Create a table object + self.table = self.h5file.create_table( + self.root, "atable", Record, "Table title" + ) + # Create an array object + self.array = self.h5file.create_array( + self.root, "anarray", [1], "Array title" + ) + # Create a group object + self.group = self.h5file.create_group( + self.root, "agroup", "Group title" + ) + + def test00_isClass(self): + """Testing table creation.""" + + self.assertIsInstance(self.table, tb.Table) + self.assertIsInstance(self.array, tb.Array) + self.assertIsInstance(self.array, tb.Leaf) + self.assertIsInstance(self.group, tb.Group) + + def test01_overwriteNode(self): + """Checking protection against node overwriting.""" + + try: + self.array = self.h5file.create_array( + self.root, "anarray", [1], "Array title" + ) + except tb.NodeError: + if common.verbose: + type, value, traceback = sys.exc_info() + print("\nGreat!, the next NameError was catched!") + print(value) + else: + self.fail("expected a tables.NodeError") + + def test02_syntaxname(self): + """Checking syntax in object tree names.""" + + with self.assertWarns(tb.NaturalNameWarning): + self.array = self.h5file.create_array( + self.root, " array", [1], "Array title" + ) + + # another name error + with self.assertWarns(tb.NaturalNameWarning): + self.array = self.h5file.create_array( + self.root, "$array", [1], "Array title" + ) + + # Finally, test a reserved word + with self.assertWarns(tb.NaturalNameWarning): + self.array = self.h5file.create_array( + self.root, "for", [1], "Array title" + ) + + def test03a_titleAttr(self): + """Checking the self.title attr in nodes.""" + + # Close the opened file to destroy the object tree + self._reopen() + + # Now, test that self.title exists and is correct in all the nodes + self.assertEqual(self.h5file.root.agroup._v_title, "Group title") + self.assertEqual(self.h5file.root.atable.title, "Table title") + self.assertEqual(self.h5file.root.anarray.title, "Array title") + + def test03b_titleLength(self): + """Checking large title character length limit (1023)""" + + titlelength = 1023 + # Try to put a very long title on a group object + group = self.h5file.create_group(self.root, "group", "t" * titlelength) + self.assertEqual(group._v_title, "t" * titlelength) + self.assertEqual(group._f_getattr("TITLE"), "t" * titlelength) + + # Now, try with a table object + table = self.h5file.create_table( + self.root, "table", Record, "t" * titlelength + ) + self.assertEqual(table.title, "t" * titlelength) + self.assertEqual(table.get_attr("TITLE"), "t" * titlelength) + + # Finally, try with an Array object + arr = self.h5file.create_array( + self.root, "arr", [1], "t" * titlelength + ) + self.assertEqual(arr.title, "t" * titlelength) + self.assertEqual(arr.get_attr("TITLE"), "t" * titlelength) + + def test04_maxFields(self): + """Checking a large number of fields in tables""" + + # The number of fields for a table + varnumber = tb.parameters.MAX_COLUMNS + + varnames = [] + for i in range(varnumber): + varnames.append("int%d" % i) + + # Build a dictionary with the types as values and varnames as keys + recordDict = {} + i = 0 + for varname in varnames: + recordDict[varname] = tb.Col.from_type("int32", dflt=1, pos=i) + i += 1 + # Append this entry to indicate the alignment! + recordDict["_v_align"] = "=" + table = self.h5file.create_table( + self.root, "table", recordDict, "MetaRecord instance" + ) + row = table.row + listrows = [] + # Write 10 records + for j in range(10): + rowlist = [] + for i in range(len(table.colnames)): + row[varnames[i]] = i * j + rowlist.append(i * j) + + row.append() + listrows.append(tuple(rowlist)) + + # write data on disk + table.flush() + + # Read all the data as a list + listout = table.read().tolist() + + # Compare the input rowlist and output row list. They should + # be equal. + if common.verbose: + print("Original row list:", listrows[-1]) + print("Retrieved row list:", listout[-1]) + self.assertEqual(listrows, listout) + + # The next limitation has been released. A warning is still there, though + def test05_maxFieldsExceeded(self): + """Checking an excess of the maximum number of fields in tables""" + + # The number of fields for a table + varnumber = tb.parameters.MAX_COLUMNS + 1 + + varnames = [] + for i in range(varnumber): + varnames.append("int%d" % i) + + # Build a dictionary with the types as values and varnames as keys + recordDict = {} + i = 0 + for varname in varnames: + recordDict[varname] = tb.Col.from_type("int32", dflt=1) + i += 1 + + # Now, create a table with this record object + # This way of creating node objects has been deprecated + # table = Table(recordDict, "MetaRecord instance") + + # Attach the table to object tree + warnings.filterwarnings("error", category=tb.PerformanceWarning) + # Here, a tables.PerformanceWarning should be raised! + try: + self.h5file.create_table( + self.root, "table", recordDict, "MetaRecord instance" + ) + except tb.PerformanceWarning: + if common.verbose: + type, value, traceback = sys.exc_info() + print("\nGreat!, the next PerformanceWarning was catched!") + print(value) + else: + self.fail("expected an tables.PerformanceWarning") + # Reset the warning + warnings.filterwarnings("default", category=tb.PerformanceWarning) + + # The next limitation has been released + def _test06_maxColumnNameLengthExceeded(self): + """Checking an excess (256) of the maximum length in column names""" + + # Build a dictionary with the types as values and varnames as keys + recordDict = {} + recordDict["a" * 255] = tb.IntCol(dflt=1) + recordDict["b" * 256] = tb.IntCol(dflt=1) # Should raise a ValueError + + # Now, create a table with this record object + # This way of creating node objects has been deprecated + table = tb.Table(recordDict, "MetaRecord instance") + self.assertIsNotNone(table) + + # Attach the table to object tree + # Here, ValueError should be raised! + with self.assertRaises(ValueError): + self.h5file.create_table( + self.root, "table", recordDict, "MetaRecord instance" + ) + + def test06_noMaxColumnNameLength(self): + """Checking unlimited length in column names""" + + # Build a dictionary with the types as values and varnames as keys + recordDict = {} + recordDict["a" * 255] = tb.IntCol(dflt=1, pos=0) + recordDict["b" * 1024] = tb.IntCol(dflt=1, pos=1) # Should work well + + # Attach the table to object tree + # Here, IndexError should be raised! + table = self.h5file.create_table( + self.root, "table", recordDict, "MetaRecord instance" + ) + self.assertEqual(table.colnames[0], "a" * 255) + self.assertEqual(table.colnames[1], "b" * 1024) + + +class Record2(tb.IsDescription): + var1 = tb.StringCol(itemsize=4) # 4-character String + var2 = tb.IntCol() # integer + var3 = tb.Int16Col() # short integer + + +class FiltersTreeTestCase(common.TempFileMixin, common.PyTablesTestCase): + title = "A title" + nrows = 10 + + def setUp(self): + super().setUp() + self.populateFile() + + def populateFile(self): + group = self.h5file.root + # Create a tree with three levels of depth + for j in range(5): + # Create a table + table = self.h5file.create_table( + group, "table1", Record2, title=self.title, filters=None + ) + # Get the record object associated with the new table + d = table.row + # Fill the table + for i in range(self.nrows): + d["var1"] = "%04d" % (self.nrows - i) + d["var2"] = i + d["var3"] = i * 2 + d.append() # This injects the Record values + # Flush the buffer for this table + table.flush() + + # Create a couple of arrays in each group + var1List = [x["var1"] for x in table.iterrows()] + var3List = [x["var3"] for x in table.iterrows()] + + self.h5file.create_array(group, "array1", var1List, "col 1") + self.h5file.create_array(group, "array2", var3List, "col 3") + + # Create a couple of EArrays as well + ea1 = self.h5file.create_earray( + group, "earray1", tb.StringAtom(itemsize=4), (0,), "col 1" + ) + ea2 = self.h5file.create_earray( + group, "earray2", tb.Int16Atom(), (0,), "col 3" + ) + # And fill them with some values + ea1.append(var1List) + ea2.append(var3List) + + # Finally a couple of carrays too + vla1 = self.h5file.create_carray( + group, "carray1", tb.StringAtom(itemsize=4), (2,) + ) + vla2 = self.h5file.create_carray( + group, "carray2", tb.Int16Atom(), (2,) + ) + + # Create a new group (descendant of group) + if j == 1: # The second level + group2 = self.h5file.create_group( + group, "group" + str(j), filters=self.gfilters + ) + elif j == 2: # third level + group2 = self.h5file.create_group(group, "group" + str(j)) + else: # The rest of levels + group2 = self.h5file.create_group( + group, "group" + str(j), filters=self.filters + ) + # Iterate over this new group (group2) + group = group2 + + def test00_checkFilters(self): + """Checking inheritance of filters on trees (open file version)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test00_checkFilters..." % self.__class__.__name__ + ) + + # First level check + if common.verbose: + print("Test filter:", repr(self.filters)) + print("Filters in file:", repr(self.h5file.filters)) + + if self.filters is None: + filters = tb.Filters() + else: + filters = self.filters + self.assertEqual(repr(filters), repr(self.h5file.filters)) + + # The next nodes have to have the same filter properties as + # self.filters + nodelist = [ + "/table1", + "/group0/earray1", + "/group0/carray1", + "/group0", + ] + for node in nodelist: + obj = self.h5file.get_node(node) + if isinstance(obj, tb.Group): + self.assertEqual(repr(filters), repr(obj._v_filters)) + else: + self.assertEqual(repr(filters), repr(obj.filters)) + + # Second and third level check + group1 = self.h5file.root.group0.group1 + if self.gfilters is None: + if self.filters is None: + gfilters = tb.Filters() + else: + gfilters = self.filters + else: + gfilters = self.gfilters + if common.verbose: + print("Test gfilter:", repr(gfilters)) + print("Filters in file:", repr(group1._v_filters)) + + self.assertEqual(repr(gfilters), repr(group1._v_filters)) + + # The next nodes have to have the same filter properties as + # gfilters + nodelist = [ + "/group0/group1", + "/group0/group1/earray1", + "/group0/group1/carray1", + "/group0/group1/table1", + "/group0/group1/group2/table1", + ] + for node in nodelist: + obj = self.h5file.get_node(node) + if isinstance(obj, tb.Group): + self.assertEqual(repr(gfilters), repr(obj._v_filters)) + else: + self.assertEqual(repr(gfilters), repr(obj.filters)) + + # Fourth and fifth level check + if self.filters is None: + # If None, the filters are inherited! + if self.gfilters is None: + filters = tb.Filters() + else: + filters = self.gfilters + else: + filters = self.filters + group3 = self.h5file.root.group0.group1.group2.group3 + if common.verbose: + print("Test filter:", repr(filters)) + print("Filters in file:", repr(group3._v_filters)) + + self.assertEqual(repr(filters), repr(group3._v_filters)) + + # The next nodes have to have the same filter properties as + # self.filter + nodelist = [ + "/group0/group1/group2/group3", + "/group0/group1/group2/group3/earray1", + "/group0/group1/group2/group3/carray1", + "/group0/group1/group2/group3/table1", + "/group0/group1/group2/group3/group4", + ] + for node in nodelist: + obj = self.h5file.get_node(node) + if isinstance(obj, tb.Group): + self.assertEqual(repr(filters), repr(obj._v_filters)) + else: + self.assertEqual(repr(filters), repr(obj.filters)) + + # Checking the special case for Arrays in which the compression + # should always be the empty Filter() + # The next nodes have to have the same filter properties as + # Filter() + nodelist = [ + "/array1", + "/group0/array1", + "/group0/group1/array1", + "/group0/group1/group2/array1", + "/group0/group1/group2/group3/array1", + ] + for node in nodelist: + obj = self.h5file.get_node(node) + self.assertEqual(repr(tb.Filters()), repr(obj.filters)) + + def test01_checkFilters(self): + """Checking inheritance of filters on trees (close file version)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01_checkFilters..." % self.__class__.__name__ + ) + + # Close the file + self._reopen() + + # First level check + if self.filters is None: + filters = tb.Filters() + else: + filters = self.filters + if common.verbose: + print("Test filter:", repr(filters)) + print("Filters in file:", repr(self.h5file.filters)) + + self.assertEqual(repr(filters), repr(self.h5file.filters)) + + # The next nodes have to have the same filter properties as + # self.filters + nodelist = [ + "/table1", + "/group0/earray1", + "/group0/carray1", + "/group0", + ] + for node in nodelist: + object_ = self.h5file.get_node(node) + if isinstance(object_, tb.Group): + self.assertEqual(repr(filters), repr(object_._v_filters)) + else: + self.assertEqual(repr(filters), repr(object_.filters)) + + # Second and third level check + group1 = self.h5file.root.group0.group1 + if self.gfilters is None: + if self.filters is None: + gfilters = tb.Filters() + else: + gfilters = self.filters + else: + gfilters = self.gfilters + if common.verbose: + print("Test filter:", repr(gfilters)) + print("Filters in file:", repr(group1._v_filters)) + + self.assertEqual(repr(gfilters), repr(group1._v_filters)) + + # The next nodes have to have the same filter properties as + # gfilters + nodelist = [ + "/group0/group1", + "/group0/group1/earray1", + "/group0/group1/carray1", + "/group0/group1/table1", + "/group0/group1/group2/table1", + ] + for node in nodelist: + object_ = self.h5file.get_node(node) + if isinstance(object_, tb.Group): + self.assertEqual(repr(gfilters), repr(object_._v_filters)) + else: + self.assertEqual(repr(gfilters), repr(object_.filters)) + + # Fourth and fifth level check + if self.filters is None: + if self.gfilters is None: + filters = tb.Filters() + else: + filters = self.gfilters + else: + filters = self.filters + group3 = self.h5file.root.group0.group1.group2.group3 + if common.verbose: + print("Test filter:", repr(filters)) + print("Filters in file:", repr(group3._v_filters)) + + repr(filters) == repr(group3._v_filters) + # The next nodes have to have the same filter properties as + # self.filters + nodelist = [ + "/group0/group1/group2/group3", + "/group0/group1/group2/group3/earray1", + "/group0/group1/group2/group3/carray1", + "/group0/group1/group2/group3/table1", + "/group0/group1/group2/group3/group4", + ] + for node in nodelist: + obj = self.h5file.get_node(node) + if isinstance(obj, tb.Group): + self.assertEqual(repr(filters), repr(obj._v_filters)) + else: + self.assertEqual(repr(filters), repr(obj.filters)) + + # Checking the special case for Arrays in which the compression + # should always be the empty Filter() + # The next nodes have to have the same filter properties as + # Filter() + nodelist = [ + "/array1", + "/group0/array1", + "/group0/group1/array1", + "/group0/group1/group2/array1", + "/group0/group1/group2/group3/array1", + ] + for node in nodelist: + obj = self.h5file.get_node(node) + self.assertEqual(repr(tb.Filters()), repr(obj.filters)) + + +class FiltersCase1(FiltersTreeTestCase): + filters = tb.Filters() + gfilters = tb.Filters(complevel=1) + open_kwargs = dict(filters=filters) + + +@common.unittest.skipIf( + not common.bzip2_avail, "BZIP2 compression library not available" +) +class FiltersCase2(FiltersTreeTestCase): + filters = tb.Filters(complevel=1, complib="bzip2") + gfilters = tb.Filters(complevel=1) + open_kwargs = dict(filters=filters) + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class FiltersCase3(FiltersTreeTestCase): + filters = tb.Filters(shuffle=True, complib="zlib") + gfilters = tb.Filters(complevel=1, shuffle=False, complib="lzo") + open_kwargs = dict(filters=filters) + + +class FiltersCase4(FiltersTreeTestCase): + filters = tb.Filters(shuffle=True) + gfilters = tb.Filters(complevel=1, shuffle=False) + open_kwargs = dict(filters=filters) + + +class FiltersCase5(FiltersTreeTestCase): + filters = tb.Filters(fletcher32=True) + gfilters = tb.Filters(complevel=1, shuffle=False) + open_kwargs = dict(filters=filters) + + +class FiltersCase6(FiltersTreeTestCase): + filters = None + gfilters = tb.Filters(complevel=1, shuffle=False) + open_kwargs = dict(filters=filters) + + +class FiltersCase7(FiltersTreeTestCase): + filters = tb.Filters(complevel=1) + gfilters = None + open_kwargs = dict(filters=filters) + + +class FiltersCase8(FiltersTreeTestCase): + filters = None + gfilters = None + open_kwargs = dict(filters=filters) + + +@common.unittest.skipIf( + not common.bzip2_avail, "BZIP2 compression library not available" +) +class FiltersCase9(FiltersTreeTestCase): + filters = tb.Filters(shuffle=True, complib="zlib") + gfilters = tb.Filters(complevel=5, shuffle=True, complib="bzip2") + open_kwargs = dict(filters=filters) + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class FiltersCase10(FiltersTreeTestCase): + filters = tb.Filters(shuffle=False, complevel=1, complib="blosc") + gfilters = tb.Filters(complevel=5, shuffle=True, complib="blosc") + open_kwargs = dict(filters=filters) + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class FiltersCaseBloscBloscLZ(FiltersTreeTestCase): + filters = tb.Filters(shuffle=False, complevel=1, complib="blosc:blosclz") + gfilters = tb.Filters(complevel=5, shuffle=True, complib="blosc:blosclz") + open_kwargs = dict(filters=filters) + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "lz4" not in tb.blosc_compressor_list(), "lz4 required" +) +class FiltersCaseBloscLZ4(FiltersTreeTestCase): + def setUp(self): + self.filters = tb.Filters( + shuffle=False, complevel=1, complib="blosc:lz4" + ) + self.gfilters = tb.Filters( + complevel=5, shuffle=True, complib="blosc:lz4" + ) + self.open_kwargs = dict(filters=self.filters) + super().setUp() + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "lz4" not in tb.blosc_compressor_list(), "lz4 required" +) +class FiltersCaseBloscLZ4HC(FiltersTreeTestCase): + def setUp(self): + self.filters = tb.Filters( + shuffle=False, complevel=1, complib="blosc:lz4hc" + ) + self.gfilters = tb.Filters( + complevel=5, shuffle=True, complib="blosc:lz4hc" + ) + self.open_kwargs = dict(filters=self.filters) + super().setUp() + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "snappy" not in tb.blosc_compressor_list(), "snappy required" +) +class FiltersCaseBloscSnappy(FiltersTreeTestCase): + def setUp(self): + self.filters = tb.Filters( + shuffle=False, complevel=1, complib="blosc:snappy" + ) + self.gfilters = tb.Filters( + complevel=5, shuffle=True, complib="blosc:snappy" + ) + self.open_kwargs = dict(filters=self.filters) + super().setUp() + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "zlib" not in tb.blosc_compressor_list(), "zlib required" +) +class FiltersCaseBloscZlib(FiltersTreeTestCase): + def setUp(self): + self.filters = tb.Filters( + shuffle=False, complevel=1, complib="blosc:zlib" + ) + self.gfilters = tb.Filters( + complevel=5, shuffle=True, complib="blosc:zlib" + ) + self.open_kwargs = dict(filters=self.filters) + super().setUp() + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "zstd" not in tb.blosc_compressor_list(), "zstd required" +) +class FiltersCaseBloscZstd(FiltersTreeTestCase): + def setUp(self): + self.filters = tb.Filters( + shuffle=False, complevel=1, complib="blosc:zstd" + ) + self.gfilters = tb.Filters( + complevel=5, shuffle=True, complib="blosc:zstd" + ) + self.open_kwargs = dict(filters=self.filters) + super().setUp() + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class FiltersCaseBloscBitShuffle(FiltersTreeTestCase): + filters = tb.Filters(shuffle=False, complevel=1, complib="blosc:blosclz") + gfilters = tb.Filters( + complevel=5, shuffle=False, bitshuffle=True, complib="blosc:blosclz" + ) + open_kwargs = dict(filters=filters) + # print("version:", tables.which_lib_version("blosc")[1]) + + +class CopyGroupTestCase(common.TempFileMixin, common.PyTablesTestCase): + title = "A title" + nrows = 10 + + def setUp(self): + super().setUp() + + # Create a temporary file + self.h5fname2 = tempfile.mktemp(".h5") + + # Create the destination + self.h5file2 = tb.open_file(self.h5fname2, "w") + self.populateFile() + + def populateFile(self): + group = self.h5file.root + # Add some user attrs: + group._v_attrs.attr1 = "an string for root group" + group._v_attrs.attr2 = 124 + # Create a tree + for group_i in range(5): + for bgroup_i in range(2): + # Create a new group (brother of group) + group2 = self.h5file.create_group( + group, "bgroup" + str(bgroup_i), filters=None + ) + + # Create a table + table = self.h5file.create_table( + group2, "table1", Record2, title=self.title, filters=None + ) + # Get the record object associated with the new table + d = table.row + # Fill the table + for row_i in range(self.nrows): + d["var1"] = "%04d" % (self.nrows - row_i) + d["var2"] = row_i + d["var3"] = row_i * 2 + d.append() # This injects the Record values + # Flush the buffer for this table + table.flush() + + # Add some user attrs: + table.attrs.attr1 = "an string" + table.attrs.attr2 = 234 + + # Create a couple of arrays in each group + var1List = [x["var1"] for x in table.iterrows()] + var3List = [x["var3"] for x in table.iterrows()] + + self.h5file.create_array(group2, "array1", var1List, "col 1") + self.h5file.create_array(group2, "array2", var3List, "col 3") + + # Create a couple of EArrays as well + ea1 = self.h5file.create_earray( + group2, "earray1", tb.StringAtom(itemsize=4), (0,), "col 1" + ) + ea2 = self.h5file.create_earray( + group2, "earray2", tb.Int16Atom(), (0,), "col 3" + ) + # Add some user attrs: + ea1.attrs.attr1 = "an string for earray" + ea2.attrs.attr2 = 123 + # And fill them with some values + ea1.append(var1List) + ea2.append(var3List) + + # Create a new group (descendant of group) + group3 = self.h5file.create_group( + group, "group" + str(group_i), filters=None + ) + # Iterate over this new group (group3) + group = group3 + # Add some user attrs: + group._v_attrs.attr1 = "an string for group" + group._v_attrs.attr2 = 124 + + def tearDown(self): + # Close the file + if self.h5file2.isopen: + self.h5file2.close() + Path(self.h5fname2).unlink() + + super().tearDown() + + def test00_nonRecursive(self): + """Checking non-recursive copy of a Group""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test00_nonRecursive..." % self.__class__.__name__ + ) + + # Copy a group non-recursively + srcgroup = self.h5file.root.group0.group1 + # srcgroup._f_copy_children(self.h5file2.root, recursive=False, + # filters=self.filters) + self.h5file.copy_children( + srcgroup, self.h5file2.root, recursive=False, filters=self.filters + ) + if self.close: + # Close the destination file + self.h5file2.close() + # And open it again + self.h5file2 = tb.open_file(self.h5fname2, "r") + + # Check that the copy has been done correctly + dstgroup = self.h5file2.root + nodelist1 = list(srcgroup._v_children) + nodelist2 = list(dstgroup._v_children) + # Sort the lists + nodelist1.sort() + nodelist2.sort() + if common.verbose: + print("The origin node list -->", nodelist1) + print("The copied node list -->", nodelist2) + self.assertEqual(srcgroup._v_nchildren, dstgroup._v_nchildren) + self.assertEqual(nodelist1, nodelist2) + + def test01_nonRecursiveAttrs(self): + """Checking non-recursive copy of a Group (attributes copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + f"Running {self.__class__.__name__}" + f".test01_nonRecursiveAttrs..." + ) + + # Copy a group non-recursively with attrs + srcgroup = self.h5file.root.group0.group1 + srcgroup._f_copy_children( + self.h5file2.root, + recursive=False, + filters=self.filters, + copyuserattrs=1, + ) + if self.close: + # Close the destination file + self.h5file2.close() + # And open it again + self.h5file2 = tb.open_file(self.h5fname2, "r") + + # Check that the copy has been done correctly + dstgroup = self.h5file2.root + for srcnode in srcgroup: + dstnode = getattr(dstgroup, srcnode._v_name) + if isinstance(srcnode, tb.Group): + srcattrs = srcnode._v_attrs + srcattrskeys = srcattrs._f_list("all") + dstattrs = dstnode._v_attrs + dstattrskeys = dstattrs._f_list("all") + else: + srcattrs = srcnode.attrs + srcattrskeys = srcattrs._f_list("all") + dstattrs = dstnode.attrs + dstattrskeys = dstattrs._f_list("all") + + # Filters may differ, do not take into account + if self.filters is not None: + dstattrskeys.remove("FILTERS") + + # These lists should already be ordered + if common.verbose: + print( + f"srcattrskeys for node {srcnode._v_name}: " + f"{srcattrskeys}" + ) + print( + f"dstattrskeys for node {dstnode._v_name}: " + f"{dstattrskeys}" + ) + self.assertEqual(srcattrskeys, dstattrskeys) + if common.verbose: + print("The attrs names has been copied correctly") + + # Now, for the contents of attributes + for srcattrname in srcattrskeys: + srcattrvalue = str(getattr(srcattrs, srcattrname)) + dstattrvalue = str(getattr(dstattrs, srcattrname)) + self.assertEqual(srcattrvalue, dstattrvalue) + if self.filters is not None: + self.assertEqual(dstattrs.FILTERS, self.filters) + + if common.verbose: + print("The attrs contents has been copied correctly") + + def test02_Recursive(self): + """Checking recursive copy of a Group""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_Recursive..." % self.__class__.__name__) + + # Create the destination node + group = self.h5file2.root + for groupname in self.dstnode.split("/"): + if groupname: + group = self.h5file2.create_group(group, groupname) + dstgroup = self.h5file2.get_node(self.dstnode) + + # Copy a group non-recursively + srcgroup = self.h5file.get_node(self.srcnode) + self.h5file.copy_children( + srcgroup, dstgroup, recursive=True, filters=self.filters + ) + lenSrcGroup = len(srcgroup._v_pathname) + if lenSrcGroup == 1: + lenSrcGroup = 0 # Case where srcgroup == "/" + if self.close: + # Close the destination file + self.h5file2.close() + # And open it again + self.h5file2 = tb.open_file(self.h5fname2, "r") + dstgroup = self.h5file2.get_node(self.dstnode) + + # Check that the copy has been done correctly + lenDstGroup = len(dstgroup._v_pathname) + if lenDstGroup == 1: + lenDstGroup = 0 # Case where dstgroup == "/" + first = 1 + nodelist1 = [] + for node in srcgroup._f_walknodes(): + if first: + # skip the first group + first = 0 + continue + nodelist1.append(node._v_pathname[lenSrcGroup:]) + + first = 1 + nodelist2 = [] + for node in dstgroup._f_walknodes(): + if first: + # skip the first group + first = 0 + continue + nodelist2.append(node._v_pathname[lenDstGroup:]) + + if common.verbose: + print("The origin node list -->", nodelist1) + print("The copied node list -->", nodelist2) + self.assertEqual(nodelist1, nodelist2) + + def test03_RecursiveFilters(self): + """Checking recursive copy of a Group (cheking Filters)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + f"Running {self.__class__.__name__}" + f".test03_RecursiveFilters..." + ) + + # Create the destination node + group = self.h5file2.root + for groupname in self.dstnode.split("/"): + if groupname: + group = self.h5file2.create_group(group, groupname) + dstgroup = self.h5file2.get_node(self.dstnode) + + # Copy a group non-recursively + srcgroup = self.h5file.get_node(self.srcnode) + srcgroup._f_copy_children( + dstgroup, recursive=True, filters=self.filters + ) + lenSrcGroup = len(srcgroup._v_pathname) + if lenSrcGroup == 1: + lenSrcGroup = 0 # Case where srcgroup == "/" + if self.close: + # Close the destination file + self.h5file2.close() + # And open it again + self.h5file2 = tb.open_file(self.h5fname2, "r") + dstgroup = self.h5file2.get_node(self.dstnode) + + # Check that the copy has been done correctly + lenDstGroup = len(dstgroup._v_pathname) + if lenDstGroup == 1: + lenDstGroup = 0 # Case where dstgroup == "/" + first = 1 + nodelist1 = {} + for node in srcgroup._f_walknodes(): + if first: + # skip the first group + first = 0 + continue + nodelist1[node._v_name] = node._v_pathname[lenSrcGroup:] + + first = 1 + for node in dstgroup._f_walknodes(): + if first: + # skip the first group + first = 0 + continue + if isinstance(node, tb.Group): + repr(node._v_filters) == repr(nodelist1[node._v_name]) + else: + repr(node.filters) == repr(nodelist1[node._v_name]) + + +class CopyGroupCase1(CopyGroupTestCase): + close = 0 + filters = None + srcnode = "/group0/group1" + dstnode = "/" + + +class CopyGroupCase2(CopyGroupTestCase): + close = 1 + filters = None + srcnode = "/group0/group1" + dstnode = "/" + + +class CopyGroupCase3(CopyGroupTestCase): + close = 0 + filters = None + srcnode = "/group0" + dstnode = "/group2/group3" + + +class CopyGroupCase4(CopyGroupTestCase): + close = 1 + filters = tb.Filters(complevel=1) + srcnode = "/group0" + dstnode = "/group2/group3" + + +class CopyGroupCase5(CopyGroupTestCase): + close = 0 + filters = tb.Filters() + srcnode = "/" + dstnode = "/group2/group3" + + +class CopyGroupCase6(CopyGroupTestCase): + close = 1 + filters = tb.Filters(fletcher32=True) + srcnode = "/group0" + dstnode = "/group2/group3" + + +class CopyGroupCase7(CopyGroupTestCase): + close = 0 + filters = tb.Filters(complevel=1, shuffle=False) + srcnode = "/" + dstnode = "/" + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class CopyGroupCase8(CopyGroupTestCase): + close = 1 + filters = tb.Filters(complevel=1, complib="lzo") + srcnode = "/" + dstnode = "/" + + +class CopyFileTestCase(common.TempFileMixin, common.PyTablesTestCase): + title = "A title" + nrows = 10 + + def setUp(self): + super().setUp() + + # Create a temporary file + self.h5fname2 = tempfile.mktemp(".h5") + + # Create the source file + self.populateFile() + + def populateFile(self): + group = self.h5file.root + # Add some user attrs: + group._v_attrs.attr1 = "an string for root group" + group._v_attrs.attr2 = 124 + # Create a tree + for group_i in range(5): + for bgroup_i in range(2): + # Create a new group (brother of group) + group2 = self.h5file.create_group( + group, "bgroup" + str(bgroup_i), filters=None + ) + + # Create a table + table = self.h5file.create_table( + group2, "table1", Record2, title=self.title, filters=None + ) + # Get the record object associated with the new table + d = table.row + # Fill the table + for row_i in range(self.nrows): + d["var1"] = "%04d" % (self.nrows - row_i) + d["var2"] = row_i + d["var3"] = row_i * 2 + d.append() # This injects the Record values + # Flush the buffer for this table + table.flush() + + # Add some user attrs: + table.attrs.attr1 = "an string" + table.attrs.attr2 = 234 + + # Create a couple of arrays in each group + var1List = [x["var1"] for x in table.iterrows()] + var3List = [x["var3"] for x in table.iterrows()] + + self.h5file.create_array(group2, "array1", var1List, "col 1") + self.h5file.create_array(group2, "array2", var3List, "col 3") + + # Create a couple of EArrays as well + ea1 = self.h5file.create_earray( + group2, "earray1", tb.StringAtom(itemsize=4), (0,), "col 1" + ) + ea2 = self.h5file.create_earray( + group2, "earray2", tb.Int16Atom(), (0,), "col 3" + ) + # Add some user attrs: + ea1.attrs.attr1 = "an string for earray" + ea2.attrs.attr2 = 123 + # And fill them with some values + ea1.append(var1List) + ea2.append(var3List) + + # Create a new group (descendant of group) + group3 = self.h5file.create_group( + group, "group" + str(group_i), filters=None + ) + # Iterate over this new group (group3) + group = group3 + # Add some user attrs: + group._v_attrs.attr1 = "an string for group" + group._v_attrs.attr2 = 124 + + def tearDown(self): + # Close the file + if hasattr(self, "h5file2") and self.h5file2.isopen: + self.h5file2.close() + + if hasattr(self, "h5fname2") and Path(self.h5fname2).is_file(): + Path(self.h5fname2).unlink() + + super().tearDown() + + def test00_overwrite(self): + """Checking copy of a File (overwriting file)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00_overwrite..." % self.__class__.__name__) + + # Create a temporary file + Path(self.h5fname2).write_text("") + + # Copy the file to the destination + self.h5file.copy_file( + self.h5fname2, + title=self.title, + overwrite=1, + copyuserattrs=0, + filters=None, + ) + + # Close the original file, if needed + if self.close: + self._reopen() + + # ...and open the destination file + self.h5file2 = tb.open_file(self.h5fname2, "r") + + # Check that the copy has been done correctly + srcgroup = self.h5file.root + dstgroup = self.h5file2.root + nodelist1 = list(srcgroup._v_children) + nodelist2 = list(dstgroup._v_children) + # Sort the lists + nodelist1.sort() + nodelist2.sort() + if common.verbose: + print("The origin node list -->", nodelist1) + print("The copied node list -->", nodelist2) + self.assertEqual(srcgroup._v_nchildren, dstgroup._v_nchildren) + self.assertEqual(nodelist1, nodelist2) + self.assertEqual(self.h5file2.title, self.title) + + def test00a_srcdstequal(self): + """Checking copy of a File (srcfile == dstfile)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test00a_srcdstequal..." % self.__class__.__name__ + ) + + # Copy the file to the destination + self.assertRaises(IOError, self.h5file.copy_file, self.h5file.filename) + + def test00b_firstclass(self): + """Checking copy of a File (first-class function)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00b_firstclass..." % self.__class__.__name__) + + # Close the temporary file + self.h5file.close() + + # Copy the file to the destination + tb.copy_file( + self.h5fname, + self.h5fname2, + title=self.title, + copyuserattrs=0, + filters=None, + overwrite=1, + ) + + # ...and open the source and destination file + self.h5file = tb.open_file(self.h5fname, "r") + self.h5file2 = tb.open_file(self.h5fname2, "r") + + # Check that the copy has been done correctly + srcgroup = self.h5file.root + dstgroup = self.h5file2.root + nodelist1 = list(srcgroup._v_children) + nodelist2 = list(dstgroup._v_children) + + # Sort the lists + nodelist1.sort() + nodelist2.sort() + if common.verbose: + print("The origin node list -->", nodelist1) + print("The copied node list -->", nodelist2) + self.assertEqual(srcgroup._v_nchildren, dstgroup._v_nchildren) + self.assertEqual(nodelist1, nodelist2) + self.assertEqual(self.h5file2.title, self.title) + + def test01_copy(self): + """Checking copy of a File (attributes not copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_copy..." % self.__class__.__name__) + + # Copy the file to the destination + self.h5file.copy_file( + self.h5fname2, + title=self.title, + copyuserattrs=0, + filters=self.filters, + ) + + # Close the original file, if needed + if self.close: + self._reopen() + + # ...and open the destination file + self.h5file2 = tb.open_file(self.h5fname2, "r") + + # Check that the copy has been done correctly + srcgroup = self.h5file.root + dstgroup = self.h5file2.root + nodelist1 = list(srcgroup._v_children) + nodelist2 = list(dstgroup._v_children) + + # Sort the lists + nodelist1.sort() + nodelist2.sort() + if common.verbose: + print("The origin node list -->", nodelist1) + print("The copied node list -->", nodelist2) + self.assertEqual(srcgroup._v_nchildren, dstgroup._v_nchildren) + self.assertEqual(nodelist1, nodelist2) + # print("_v_attrnames-->", self.h5file2.root._v_attrs._v_attrnames) + # print("--> <%s,%s>" % (self.h5file2.title, self.title)) + self.assertEqual(self.h5file2.title, self.title) + + # Check that user attributes has not been copied + for srcnode in srcgroup: + dstnode = getattr(dstgroup, srcnode._v_name) + srcattrs = srcnode._v_attrs + srcattrskeys = srcattrs._f_list("sys") + dstattrs = dstnode._v_attrs + dstattrskeys = dstattrs._f_list("all") + + # Filters may differ, do not take into account + if self.filters is not None: + dstattrskeys.remove("FILTERS") + + # These lists should already be ordered + if common.verbose: + print( + f"srcattrskeys for node {srcnode._v_name}: " + f"{srcattrskeys}" + ) + print( + f"dstattrskeys for node {dstnode._v_name}: " + f"{dstattrskeys}" + ) + self.assertEqual(srcattrskeys, dstattrskeys) + if common.verbose: + print("The attrs names has been copied correctly") + + # Now, for the contents of attributes + for srcattrname in srcattrskeys: + srcattrvalue = str(getattr(srcattrs, srcattrname)) + dstattrvalue = str(getattr(dstattrs, srcattrname)) + self.assertEqual(srcattrvalue, dstattrvalue) + if self.filters is not None: + self.assertEqual(dstattrs.FILTERS, self.filters) + + if common.verbose: + print("The attrs contents has been copied correctly") + + def test02_Attrs(self): + """Checking copy of a File (attributes copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_Attrs..." % self.__class__.__name__) + + # Copy the file to the destination + self.h5file.copy_file( + self.h5fname2, + title=self.title, + copyuserattrs=1, + filters=self.filters, + ) + + # Close the original file, if needed + if self.close: + self._reopen() + + # ...and open the destination file + self.h5file2 = tb.open_file(self.h5fname2, "r") + + # Check that the copy has been done correctly + srcgroup = self.h5file.root + dstgroup = self.h5file2.root + for srcnode in srcgroup: + dstnode = getattr(dstgroup, srcnode._v_name) + srcattrs = srcnode._v_attrs + srcattrskeys = srcattrs._f_list("all") + dstattrs = dstnode._v_attrs + dstattrskeys = dstattrs._f_list("all") + # These lists should already be ordered + if common.verbose: + print( + f"srcattrskeys for node {srcnode._v_name}: " + f"{srcattrskeys}" + ) + print( + f"dstattrskeys for node {dstnode._v_name}: " + f"{dstattrskeys}" + ) + + # Filters may differ, do not take into account + if self.filters is not None: + dstattrskeys.remove("FILTERS") + self.assertEqual(srcattrskeys, dstattrskeys) + if common.verbose: + print("The attrs names has been copied correctly") + + # Now, for the contents of attributes + for srcattrname in srcattrskeys: + srcattrvalue = str(getattr(srcattrs, srcattrname)) + dstattrvalue = str(getattr(dstattrs, srcattrname)) + self.assertEqual(srcattrvalue, dstattrvalue) + if self.filters is not None: + self.assertEqual(dstattrs.FILTERS, self.filters) + + if common.verbose: + print("The attrs contents has been copied correctly") + + +class CopyFileCase1(CopyFileTestCase): + close = 0 + title = "A new title" + filters = None + + +class CopyFileCase2(CopyFileTestCase): + close = 1 + title = "A new title" + filters = None + + +class CopyFileCase3(CopyFileTestCase): + close = 0 + title = "A new title" + filters = tb.Filters(complevel=1) + + +class CopyFileCase4(CopyFileTestCase): + close = 1 + title = "A new title" + filters = tb.Filters(complevel=1) + + +class CopyFileCase5(CopyFileTestCase): + close = 0 + title = "A new title" + filters = tb.Filters(fletcher32=True) + + +class CopyFileCase6(CopyFileTestCase): + close = 1 + title = "A new title" + filters = tb.Filters(fletcher32=True) + + +class CopyFileCase7(CopyFileTestCase): + close = 0 + title = "A new title" + filters = tb.Filters(complevel=1, complib="lzo") + + +class CopyFileCase8(CopyFileTestCase): + close = 1 + title = "A new title" + filters = tb.Filters(complevel=1, complib="lzo") + + +class CopyFileCase10(common.TempFileMixin, common.PyTablesTestCase): + + def test01_notoverwrite(self): + """Checking copy of a File (checking not overwriting)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01_notoverwrite..." % self.__class__.__name__ + ) + + # Create two empty files: + self.h5fname2 = tempfile.mktemp(".h5") + self.h5file2 = tb.open_file(self.h5fname2, "w") + self.h5file2.close() # close the second one + + try: + # Copy the first into the second + self.assertRaises( + IOError, self.h5file.copy_file, self.h5fname2, overwrite=False + ) + finally: + # Delete files + Path(self.h5fname2).unlink() + + +class GroupFiltersTestCase(common.TempFileMixin, common.PyTablesTestCase): + filters = tb.Filters(complevel=4) # something non-default + + def setUp(self): + super().setUp() + + atom, shape = tb.IntAtom(), (1, 1) + create_group = self.h5file.create_group + create_carray = self.h5file.create_carray + + create_group("/", "implicit_no") + create_group("/implicit_no", "implicit_no") + create_carray( + "/implicit_no/implicit_no", "implicit_no", atom=atom, shape=shape + ) + create_carray( + "/implicit_no/implicit_no", + "explicit_no", + atom=atom, + shape=shape, + filters=tb.Filters(), + ) + create_carray( + "/implicit_no/implicit_no", + "explicit_yes", + atom=atom, + shape=shape, + filters=self.filters, + ) + + create_group("/", "explicit_yes", filters=self.filters) + create_group("/explicit_yes", "implicit_yes") + create_carray( + "/explicit_yes/implicit_yes", + "implicit_yes", + atom=atom, + shape=shape, + ) + create_carray( + "/explicit_yes/implicit_yes", + "explicit_yes", + atom=atom, + shape=shape, + filters=self.filters, + ) + create_carray( + "/explicit_yes/implicit_yes", + "explicit_no", + atom=atom, + shape=shape, + filters=tb.Filters(), + ) + + def _check_filters(self, h5file, filters=None): + for node in h5file: + # Get node filters. + if hasattr(node, "filters"): + node_filters = node.filters + else: + node_filters = node._v_filters + + # Compare to given filters. + if filters is not None: + self.assertEqual(node_filters, filters) + return + + # Guess filters to compare to by node name. + if node._v_name.endswith("_no"): + self.assertEqual( + node_filters, + tb.Filters(), + "node ``%s`` should have no filters" % node._v_pathname, + ) + elif node._v_name.endswith("_yes"): + self.assertEqual( + node_filters, + self.filters, + "node ``%s`` should have filters" % node._v_pathname, + ) + + def test00_propagate(self): + """Filters propagating to children.""" + + self._check_filters(self.h5file) + + def _test_copyFile(self, filters=None): + copyfname = tempfile.mktemp(suffix=".h5") + try: + self.h5file.copy_file(copyfname, filters=filters) + try: + copyf = tb.open_file(copyfname) + self._check_filters(copyf, filters=filters) + finally: + copyf.close() + finally: + Path(copyfname).unlink() + + def test01_copyFile(self): + """Keeping filters when copying a file.""" + + self._test_copyFile() + + def test02_copyFile_override(self): + """Overriding filters when copying a file.""" + + self._test_copyFile(self.filters) + + def _test_change(self, pathname, change_filters, new_filters): + group = self.h5file.get_node(pathname) + + # Check expected current filters. + old_filters = tb.Filters() + if pathname.endswith("_yes"): + old_filters = self.filters + self.assertEqual(group._v_filters, old_filters) + + # Change filters. + change_filters(group) + self.assertEqual(group._v_filters, new_filters) + + # Get and check changed filters. + if self._reopen(): + group = self.h5file.get_node(pathname) + self.assertEqual(group._v_filters, new_filters) + + def test03_change(self): + """Changing the filters of a group.""" + + def set_filters(group): + group._v_filters = self.filters + + self._test_change("/", set_filters, self.filters) + + def test04_delete(self): + """Deleting the filters of a group.""" + + def del_filters(group): + del group._v_filters + + self._test_change("/explicit_yes", del_filters, tb.Filters()) + + +@common.unittest.skipIf(not common.blosc_avail, "BLOSC not available") +class SetBloscMaxThreadsTestCase( + common.TempFileMixin, common.PyTablesTestCase +): + filters = tb.Filters(complevel=4, complib="blosc") + + def test00(self): + """Checking set_blosc_max_threads()""" + + nthreads_old = tb.set_blosc_max_threads(4) + if common.verbose: + print("Previous max threads:", nthreads_old) + print("Should be:", self.h5file.params["MAX_BLOSC_THREADS"]) + self.assertEqual(nthreads_old, self.h5file.params["MAX_BLOSC_THREADS"]) + self.h5file.create_carray( + "/", + "some_array", + atom=tb.Int32Atom(), + shape=(3, 3), + filters=self.filters, + ) + nthreads_old = tb.set_blosc_max_threads(1) + if common.verbose: + print("Previous max threads:", nthreads_old) + print("Should be:", 4) + self.assertEqual(nthreads_old, 4) + + def test01(self): + """Checking set_blosc_max_threads() (re-open)""" + + nthreads_old = tb.set_blosc_max_threads(4) + self.h5file.create_carray( + "/", + "some_array", + atom=tb.Int32Atom(), + shape=(3, 3), + filters=self.filters, + ) + self._reopen() + nthreads_old = tb.set_blosc_max_threads(4) + if common.verbose: + print("Previous max threads:", nthreads_old) + print("Should be:", self.h5file.params["MAX_BLOSC_THREADS"]) + self.assertEqual(nthreads_old, self.h5file.params["MAX_BLOSC_THREADS"]) + + +class FilterTestCase(common.PyTablesTestCase): + def test_filter_pack_type(self): + self.assertEqual(type(tb.Filters()._pack()), np.int64) + + @staticmethod + def _hexl(n): + return hex(int(n)) + + def test_filter_pack_01(self): + filter_ = tb.Filters() + self.assertEqual(self._hexl(filter_._pack()), "0x0") + + def test_filter_pack_02(self): + filter_ = tb.Filters(1, shuffle=False) + self.assertEqual(self._hexl(filter_._pack()), "0x101") + + def test_filter_pack_03(self): + filter_ = tb.Filters(9, "zlib", shuffle=True, fletcher32=True) + self.assertEqual(self._hexl(filter_._pack()), "0x30109") + + def test_filter_pack_04(self): + filter_ = tb.Filters(1, shuffle=False, least_significant_digit=5) + self.assertEqual(self._hexl(filter_._pack()), "0x5040101") + + def test_filter_unpack_01(self): + filter_ = tb.Filters._unpack(np.int64(0x0)) + self.assertFalse(filter_.shuffle) + self.assertFalse(filter_.fletcher32) + self.assertEqual(filter_.least_significant_digit, None) + self.assertEqual(filter_.complevel, 0) + self.assertEqual(filter_.complib, None) + + def test_filter_unpack_02(self): + filter_ = tb.Filters._unpack(np.int64(0x101)) + self.assertFalse(filter_.shuffle) + self.assertFalse(filter_.fletcher32) + self.assertEqual(filter_.least_significant_digit, None) + self.assertEqual(filter_.complevel, 1) + self.assertEqual(filter_.complib, "zlib") + + def test_filter_unpack_03(self): + filter_ = tb.Filters._unpack(np.int64(0x30109)) + self.assertTrue(filter_.shuffle) + self.assertTrue(filter_.fletcher32) + self.assertEqual(filter_.least_significant_digit, None) + self.assertEqual(filter_.complevel, 9) + self.assertEqual(filter_.complib, "zlib") + + def test_filter_unpack_04(self): + filter_ = tb.Filters._unpack(np.int64(0x5040101)) + self.assertFalse(filter_.shuffle) + self.assertFalse(filter_.fletcher32) + self.assertEqual(filter_.least_significant_digit, 5) + self.assertEqual(filter_.complevel, 1) + self.assertEqual(filter_.complib, "zlib") + + +class DefaultDriverTestCase(common.TempFileMixin, common.PyTablesTestCase): + DRIVER = None + DRIVER_PARAMS = {} + open_kwargs = dict(driver=DRIVER, **DRIVER_PARAMS) + + def setUp(self): + super().setUp() + + # Create an HDF5 file and contents + root = self.h5file.root + self.h5file.set_node_attr(root, "testattr", 41) + self.h5file.create_array(root, "array", [1, 2], title="array") + self.h5file.create_table( + root, "table", {"var1": tb.IntCol()}, title="table" + ) + + def assertIsFile(self): + self.assertTrue(Path(self.h5fname).is_file()) + + def test_newFile(self): + self.assertIsInstance(self.h5file, tb.File) + self.assertIsFile() + + def test_readFile(self): + self.h5file.close() + self.h5file = None + + self.assertIsFile() + + # Open an existing HDF5 file + self.h5file = tb.open_file( + self.h5fname, mode="r", driver=self.DRIVER, **self.DRIVER_PARAMS + ) + + # check contents + root = self.h5file.root + + self.assertEqual(self.h5file.get_node_attr(root, "testattr"), 41) + + self.assertIsInstance(root.array, tb.Array) + self.assertEqual(root.array._v_title, "array") + + self.assertIsInstance(root.table, tb.Table) + self.assertEqual(root.table._v_title, "table") + self.assertIn("var1", root.table.colnames) + self.assertEqual(root.table.cols.var1.dtype, tb.IntCol().dtype) + + def test_openFileA(self): + self.h5file.close() + self.h5file = None + + self.assertIsFile() + + # Open an existing HDF5 file in append mode + self.h5file = tb.open_file( + self.h5fname, mode="a", driver=self.DRIVER, **self.DRIVER_PARAMS + ) + + # check contents + root = self.h5file.root + + self.assertEqual(self.h5file.get_node_attr(root, "testattr"), 41) + + self.assertIsInstance(root.array, tb.Array) + self.assertEqual(root.array._v_title, "array") + + self.assertIsInstance(root.table, tb.Table) + self.assertEqual(root.table._v_title, "table") + self.assertIn("var1", root.table.colnames) + self.assertEqual(root.table.cols.var1.dtype, tb.IntCol().dtype) + + # write new data + root = self.h5file.root + self.h5file.set_node_attr(root, "testattr2", 42) + self.h5file.create_array(root, "array2", [1, 2], title="array2") + self.h5file.create_table( + root, "table2", {"var2": tb.FloatCol()}, title="table2" + ) + + # check contents + self._reopen(mode="a", driver=self.DRIVER, **self.DRIVER_PARAMS) + + root = self.h5file.root + + self.assertEqual(self.h5file.get_node_attr(root, "testattr"), 41) + self.assertEqual(self.h5file.get_node_attr(root, "testattr2"), 42) + + self.assertIsInstance(root.array, tb.Array) + self.assertEqual(root.array._v_title, "array") + + self.assertIsInstance(root.array2, tb.Array) + self.assertEqual(root.array2._v_title, "array2") + + self.assertIsInstance(root.table, tb.Table) + self.assertEqual(root.table._v_title, "table") + self.assertIn("var1", root.table.colnames) + self.assertEqual(root.table.cols.var1.dtype, tb.IntCol().dtype) + + self.assertIsInstance(root.table2, tb.Table) + self.assertEqual(root.table2._v_title, "table2") + self.assertIn("var2", root.table2.colnames) + self.assertEqual(root.table2.cols.var2.dtype, tb.FloatCol().dtype) + + def test_openFileRW(self): + self.h5file.close() + self.h5file = None + + self.assertIsFile() + + # Open an existing HDF5 file in append mode + self.h5file = tb.open_file( + self.h5fname, mode="r+", driver=self.DRIVER, **self.DRIVER_PARAMS + ) + + # check contents + root = self.h5file.root + + self.assertEqual(self.h5file.get_node_attr(root, "testattr"), 41) + + self.assertIsInstance(root.array, tb.Array) + self.assertEqual(root.array._v_title, "array") + + self.assertIsInstance(root.table, tb.Table) + self.assertEqual(root.table._v_title, "table") + self.assertIn("var1", root.table.colnames) + self.assertEqual(root.table.cols.var1.dtype, tb.IntCol().dtype) + + # write new data + self.h5file.set_node_attr(root, "testattr2", 42) + self.h5file.create_array(root, "array2", [1, 2], title="array2") + self.h5file.create_table( + root, "table2", {"var2": tb.FloatCol()}, title="table2" + ) + + # check contents + self._reopen(mode="r+", driver=self.DRIVER, **self.DRIVER_PARAMS) + + root = self.h5file.root + + self.assertEqual(self.h5file.get_node_attr(root, "testattr"), 41) + self.assertEqual(self.h5file.get_node_attr(root, "testattr2"), 42) + + self.assertIsInstance(root.array, tb.Array) + self.assertEqual(root.array._v_title, "array") + + self.assertIsInstance(root.array2, tb.Array) + self.assertEqual(root.array2._v_title, "array2") + + self.assertIsInstance(root.table, tb.Table) + self.assertEqual(root.table._v_title, "table") + self.assertIn("var1", root.table.colnames) + self.assertEqual(root.table.cols.var1.dtype, tb.IntCol().dtype) + + self.assertIsInstance(root.table2, tb.Table) + self.assertEqual(root.table2._v_title, "table2") + self.assertIn("var2", root.table2.colnames) + self.assertEqual(root.table2.cols.var2.dtype, tb.FloatCol().dtype) + + +class Sec2DriverTestCase(DefaultDriverTestCase): + DRIVER = "H5FD_SEC2" + open_kwargs = dict(driver=DRIVER, **DefaultDriverTestCase.DRIVER_PARAMS) + + def test_get_file_image(self): + image = self.h5file.get_file_image() + self.assertGreater(len(image), 0) + self.assertEqual([i for i in image[:4]], [137, 72, 68, 70]) + + +class StdioDriverTestCase(DefaultDriverTestCase): + DRIVER = "H5FD_STDIO" + open_kwargs = dict(driver=DRIVER, **DefaultDriverTestCase.DRIVER_PARAMS) + + def test_get_file_image(self): + image = self.h5file.get_file_image() + self.assertGreater(len(image), 0) + self.assertEqual([i for i in image[:4]], [137, 72, 68, 70]) + + +class CoreDriverTestCase(DefaultDriverTestCase): + DRIVER = "H5FD_CORE" + open_kwargs = dict(driver=DRIVER, **DefaultDriverTestCase.DRIVER_PARAMS) + + def test_get_file_image(self): + image = self.h5file.get_file_image() + self.assertGreater(len(image), 0) + self.assertEqual([i for i in image[:4]], [137, 72, 68, 70]) + + +class CoreDriverNoBackingStoreTestCase(common.PyTablesTestCase): + DRIVER = "H5FD_CORE" + + def setUp(self): + super().setUp() + + self.h5fname = tempfile.mktemp(suffix=".h5") + self.h5file = None + + def tearDown(self): + if self.h5file: + self.h5file.close() + elif self.h5fname in tb.file._open_files: + open_files = tb.file._open_files + for h5file in open_files.get_handlers_by_name(self.h5fname): + h5file.close() + + self.h5file = None + if Path(self.h5fname).is_file(): + Path(self.h5fname).unlink() + + super().tearDown() + + def test_newFile(self): + """Ensure that nothing is written to file.""" + + self.assertFalse(Path(self.h5fname).is_file()) + + self.h5file = tb.open_file( + self.h5fname, + mode="w", + driver=self.DRIVER, + driver_core_backing_store=False, + ) + + # Create an HDF5 file and contents + root = self.h5file.root + self.h5file.set_node_attr(root, "testattr", 41) + self.h5file.create_array(root, "array", [1, 2], title="array") + self.h5file.create_table( + root, "table", {"var1": tb.IntCol()}, title="table" + ) + self.h5file.close() # flush + + self.assertFalse(Path(self.h5fname).is_file()) + + def test_readNewFileW(self): + self.assertFalse(Path(self.h5fname).is_file()) + + # Create an HDF5 file and contents + self.h5file = tb.open_file( + self.h5fname, + mode="w", + driver=self.DRIVER, + driver_core_backing_store=False, + ) + root = self.h5file.root + self.h5file.set_node_attr(root, "testattr", 41) + self.h5file.create_array(root, "array", [1, 2], title="array") + self.h5file.create_table( + root, "table", {"var1": tb.IntCol()}, title="table" + ) + + self.assertEqual(self.h5file.get_node_attr(root, "testattr"), 41) + + self.assertIsInstance(root.array, tb.Array) + self.assertEqual(root.array._v_title, "array") + + self.assertIsInstance(root.table, tb.Table) + self.assertEqual(root.table._v_title, "table") + self.assertIn("var1", root.table.colnames) + self.assertEqual(root.table.cols.var1.dtype, tb.IntCol().dtype) + + self.h5file.close() # flush + + self.assertFalse(Path(self.h5fname).is_file()) + + def test_readNewFileA(self): + self.assertFalse(Path(self.h5fname).is_file()) + + # Create an HDF5 file and contents + self.h5file = tb.open_file( + self.h5fname, + mode="a", + driver=self.DRIVER, + driver_core_backing_store=False, + ) + root = self.h5file.root + self.h5file.set_node_attr(root, "testattr", 41) + self.h5file.create_array(root, "array", [1, 2], title="array") + self.h5file.create_table( + root, "table", {"var1": tb.IntCol()}, title="table" + ) + + self.assertEqual(self.h5file.get_node_attr(root, "testattr"), 41) + + self.assertIsInstance(root.array, tb.Array) + self.assertEqual(root.array._v_title, "array") + + self.assertIsInstance(root.table, tb.Table) + self.assertEqual(root.table._v_title, "table") + self.assertIn("var1", root.table.colnames) + self.assertEqual(root.table.cols.var1.dtype, tb.IntCol().dtype) + + self.h5file.close() # flush + + self.assertFalse(Path(self.h5fname).is_file()) + + def test_openNewFileRW(self): + self.assertFalse(Path(self.h5fname).is_file()) + self.assertRaises( + tb.HDF5ExtError, + tb.open_file, + self.h5fname, + mode="r+", + driver=self.DRIVER, + driver_core_backing_store=False, + ) + + def test_openNewFileR(self): + self.assertFalse(Path(self.h5fname).is_file()) + self.assertRaises( + tb.HDF5ExtError, + tb.open_file, + self.h5fname, + mode="r", + driver=self.DRIVER, + driver_core_backing_store=False, + ) + + def _create_file(self, filename): + h5file = tb.open_file(filename, mode="w") + + root = h5file.root + h5file.set_node_attr(root, "testattr", 41) + h5file.create_array(root, "array", [1, 2], title="array") + h5file.create_table( + root, "table", {"var1": tb.IntCol()}, title="table" + ) + + h5file.close() + + def test_readFile(self): + self._create_file(self.h5fname) + self.assertTrue(Path(self.h5fname).is_file()) + + # Open an existing HDF5 file + self.h5file = tb.open_file( + self.h5fname, + mode="r", + driver=self.DRIVER, + driver_core_backing_store=False, + ) + root = self.h5file.root + + self.assertEqual(self.h5file.get_node_attr(root, "testattr"), 41) + + self.assertIsInstance(root.array, tb.Array) + self.assertEqual(root.array._v_title, "array") + + self.assertIsInstance(root.table, tb.Table) + self.assertEqual(root.table._v_title, "table") + self.assertIn("var1", root.table.colnames) + self.assertEqual(root.table.cols.var1.dtype, tb.IntCol().dtype) + + def _get_digest(self, filename): + md5 = hashlib.md5() + md5.update(Path(filename).read_bytes()) + hexdigest = md5.hexdigest() + return hexdigest + + def test_openFileA(self): + self._create_file(self.h5fname) + self.assertTrue(Path(self.h5fname).is_file()) + + # compute the file hash + hexdigest = self._get_digest(self.h5fname) + + # Open an existing HDF5 file in append mode + self.h5file = tb.open_file( + self.h5fname, + mode="a", + driver=self.DRIVER, + driver_core_backing_store=False, + ) + + # check contents + root = self.h5file.root + + self.assertEqual(self.h5file.get_node_attr(root, "testattr"), 41) + + self.assertIsInstance(root.array, tb.Array) + self.assertEqual(root.array._v_title, "array") + + self.assertIsInstance(root.table, tb.Table) + self.assertEqual(root.table._v_title, "table") + self.assertIn("var1", root.table.colnames) + self.assertEqual(root.table.cols.var1.dtype, tb.IntCol().dtype) + + # write new data + root = self.h5file.root + self.h5file.set_node_attr(root, "testattr2", 42) + self.h5file.create_array(root, "array2", [1, 2], title="array2") + self.h5file.create_table( + root, "table2", {"var2": tb.FloatCol()}, title="table2" + ) + self.h5file.close() + + # ensure that there is no change on the file on disk + self.assertEqual(hexdigest, self._get_digest(self.h5fname)) + + def test_openFileRW(self): + self._create_file(self.h5fname) + self.assertTrue(Path(self.h5fname).is_file()) + + # compute the file hash + hexdigest = self._get_digest(self.h5fname) + + # Open an existing HDF5 file in append mode + self.h5file = tb.open_file( + self.h5fname, + mode="r+", + driver=self.DRIVER, + driver_core_backing_store=False, + ) + + # check contents + root = self.h5file.root + + self.assertEqual(self.h5file.get_node_attr(root, "testattr"), 41) + + self.assertIsInstance(root.array, tb.Array) + self.assertEqual(root.array._v_title, "array") + + self.assertIsInstance(root.table, tb.Table) + self.assertEqual(root.table._v_title, "table") + self.assertIn("var1", root.table.colnames) + self.assertEqual(root.table.cols.var1.dtype, tb.IntCol().dtype) + + # write new data + root = self.h5file.root + self.h5file.set_node_attr(root, "testattr2", 42) + self.h5file.create_array(root, "array2", [1, 2], title="array2") + self.h5file.create_table( + root, "table2", {"var2": tb.FloatCol()}, title="table2" + ) + self.h5file.close() + + # ensure that there is no change on the file on disk + self.assertEqual(hexdigest, self._get_digest(self.h5fname)) + + def test_get_file_image(self): + self.h5file = tb.open_file( + self.h5fname, + mode="w", + driver=self.DRIVER, + driver_core_backing_store=False, + ) + root = self.h5file.root + self.h5file.set_node_attr(root, "testattr", 41) + self.h5file.create_array(root, "array", [1, 2], title="array") + self.h5file.create_table( + root, "table", {"var1": tb.IntCol()}, title="table" + ) + + image = self.h5file.get_file_image() + + self.assertGreater(len(image), 0) + self.assertEqual([i for i in image[:4]], [137, 72, 68, 70]) + + +class SplitDriverTestCase(DefaultDriverTestCase): + DRIVER = "H5FD_SPLIT" + DRIVER_PARAMS = { + "driver_split_meta_ext": "-xm.h5", + "driver_split_raw_ext": "-xr.h5", + } + open_kwargs = dict(driver=DRIVER, **DRIVER_PARAMS) + + def _getTempFileName(self): + return tempfile.mktemp(prefix=self._getName()) + + def setUp(self): + super().setUp() + + self.h5fnames = [ + self.h5fname + self.DRIVER_PARAMS[k] + for k in ("driver_split_meta_ext", "driver_split_raw_ext") + ] + + def tearDown(self): + self.h5file.close() + for fname in self.h5fnames: + if Path(fname).is_file(): + Path(fname).unlink() + # super().tearDown() + common.PyTablesTestCase.tearDown(self) + + def assertIsFile(self): + for fname in self.h5fnames: + self.assertTrue(Path(fname).is_file()) + + +class NotSpportedDriverTestCase(common.PyTablesTestCase): + DRIVER = None + DRIVER_PARAMS = {} + EXCEPTION = ValueError + + def setUp(self): + super().setUp() + self.h5fname = tempfile.mktemp(suffix=".h5") + + def tearDown(self): + open_files = tb.file._open_files + if self.h5fname in open_files: + for h5file in open_files.get_handlers_by_name(self.h5fname): + h5file.close() + if Path(self.h5fname).is_file(): + Path(self.h5fname).unlink() + super().tearDown() + + def test_newFile(self): + self.assertRaises( + self.EXCEPTION, + tb.open_file, + self.h5fname, + mode="w", + driver=self.DRIVER, + **self.DRIVER_PARAMS, + ) + self.assertFalse(Path(self.h5fname).is_file()) + + +if "H5FD_LOG" in tb.hdf5extension._supported_drivers: + BaseLogDriverTestCase = DefaultDriverTestCase + +else: + BaseLogDriverTestCase = NotSpportedDriverTestCase + + +class LogDriverTestCase(BaseLogDriverTestCase): + DRIVER = "H5FD_LOG" + open_kwargs = dict(driver=DRIVER, **BaseLogDriverTestCase.DRIVER_PARAMS) + + def setUp(self): + # local binding + self.DRIVER_PARAMS = { + "driver_log_file": tempfile.mktemp(suffix=".log") + } + + super().setUp() + + def tearDown(self): + if Path(self.DRIVER_PARAMS["driver_log_file"]).is_file(): + Path(self.DRIVER_PARAMS["driver_log_file"]).unlink() + super().tearDown() + + +if tb.hdf5extension.HAVE_DIRECT_DRIVER: + + class DirectDriverTestCase(DefaultDriverTestCase): + DRIVER = "H5FD_DIRECT" + open_kwargs = dict( + driver=DRIVER, **DefaultDriverTestCase.DRIVER_PARAMS + ) + +else: + + class DirectDriverTestCase(NotSpportedDriverTestCase): + DRIVER = "H5FD_DIRECT" + EXCEPTION = RuntimeError + + +if tb.hdf5extension.HAVE_WINDOWS_DRIVER: + + class WindowsDriverTestCase(DefaultDriverTestCase): + DRIVER = "H5FD_WINDOWS" + open_kwargs = dict( + driver=DRIVER, **DefaultDriverTestCase.DRIVER_PARAMS + ) + +else: + + class WindowsDriverTestCase(NotSpportedDriverTestCase): + DRIVER = "H5FD_WINDOWS" + EXCEPTION = RuntimeError + + +class FamilyDriverTestCase(NotSpportedDriverTestCase): + DRIVER = "H5FD_FAMILY" + + +class MultiDriverTestCase(NotSpportedDriverTestCase): + DRIVER = "H5FD_MULTI" + + +class MpioDriverTestCase(NotSpportedDriverTestCase): + DRIVER = "H5FD_MPIO" + + +class MpiPosixDriverTestCase(NotSpportedDriverTestCase): + DRIVER = "H5FD_MPIPOSIX" + + +class StreamDriverTestCase(NotSpportedDriverTestCase): + DRIVER = "H5FD_STREAM" + + +class InMemoryCoreDriverTestCase(common.PyTablesTestCase): + DRIVER = "H5FD_CORE" + + def setUp(self): + super().setUp() + self.h5fname = tempfile.mktemp(".h5") + self.h5file = None + + def tearDown(self): + if self.h5file: + self.h5file.close() + self.h5file = None + + if Path(self.h5fname).is_file(): + Path(self.h5fname).unlink() + super().tearDown() + + def _create_image(self, filename="in-memory", title="Title", mode="w"): + h5file = tb.open_file( + filename, + mode=mode, + title=title, + driver=self.DRIVER, + driver_core_backing_store=0, + ) + + try: + h5file.create_array(h5file.root, "array", [1, 2], title="Array") + h5file.create_table( + h5file.root, "table", {"var1": tb.IntCol()}, "Table" + ) + h5file.root._v_attrs.testattr = 41 + + image = h5file.get_file_image() + finally: + h5file.close() + + return image + + def test_newFileW(self): + image = self._create_image(self.h5fname, mode="w") + self.assertGreater(len(image), 0) + self.assertEqual([i for i in image[:4]], [137, 72, 68, 70]) + self.assertFalse(Path(self.h5fname).exists()) + + def test_newFileA(self): + image = self._create_image(self.h5fname, mode="a") + self.assertGreater(len(image), 0) + self.assertEqual([i for i in image[:4]], [137, 72, 68, 70]) + self.assertFalse(Path(self.h5fname).exists()) + + def test_openFileR(self): + image = self._create_image(self.h5fname) + self.assertFalse(Path(self.h5fname).exists()) + + # Open an existing file + self.h5file = tb.open_file( + self.h5fname, + mode="r", + driver=self.DRIVER, + driver_core_image=image, + driver_core_backing_store=0, + ) + + # Get the CLASS attribute of the arr object + self.assertTrue(hasattr(self.h5file.root._v_attrs, "TITLE")) + self.assertEqual(self.h5file.get_node_attr("/", "TITLE"), "Title") + self.assertTrue(hasattr(self.h5file.root._v_attrs, "testattr")) + self.assertEqual(self.h5file.get_node_attr("/", "testattr"), 41) + self.assertTrue(hasattr(self.h5file.root, "array")) + self.assertEqual(self.h5file.get_node_attr("/array", "TITLE"), "Array") + self.assertTrue(hasattr(self.h5file.root, "table")) + self.assertEqual(self.h5file.get_node_attr("/table", "TITLE"), "Table") + self.assertEqual(self.h5file.root.array.read(), [1, 2]) + + def test_openFileRW(self): + image = self._create_image(self.h5fname) + self.assertFalse(Path(self.h5fname).exists()) + + # Open an existing file + self.h5file = tb.open_file( + self.h5fname, + mode="r+", + driver=self.DRIVER, + driver_core_image=image, + driver_core_backing_store=0, + ) + + # Get the CLASS attribute of the arr object + self.assertTrue(hasattr(self.h5file.root._v_attrs, "TITLE")) + self.assertEqual(self.h5file.get_node_attr("/", "TITLE"), "Title") + self.assertTrue(hasattr(self.h5file.root._v_attrs, "testattr")) + self.assertEqual(self.h5file.get_node_attr("/", "testattr"), 41) + self.assertTrue(hasattr(self.h5file.root, "array")) + self.assertEqual(self.h5file.get_node_attr("/array", "TITLE"), "Array") + self.assertTrue(hasattr(self.h5file.root, "table")) + self.assertEqual(self.h5file.get_node_attr("/table", "TITLE"), "Table") + self.assertEqual(self.h5file.root.array.read(), [1, 2]) + + self.h5file.create_array( + self.h5file.root, "array2", list(range(10_000)), title="Array2" + ) + self.h5file.root._v_attrs.testattr2 = 42 + + self.h5file.close() + + self.assertFalse(Path(self.h5fname).exists()) + + def test_openFileRW_update(self): + filename = tempfile.mktemp(".h5") + image1 = self._create_image(filename) + self.assertFalse(Path(self.h5fname).exists()) + + # Open an existing file + self.h5file = tb.open_file( + self.h5fname, + mode="r+", + driver=self.DRIVER, + driver_core_image=image1, + driver_core_backing_store=0, + ) + + # Get the CLASS attribute of the arr object + self.assertTrue(hasattr(self.h5file.root._v_attrs, "TITLE")) + self.assertEqual(self.h5file.get_node_attr("/", "TITLE"), "Title") + self.assertTrue(hasattr(self.h5file.root._v_attrs, "testattr")) + self.assertEqual(self.h5file.get_node_attr("/", "testattr"), 41) + self.assertTrue(hasattr(self.h5file.root, "array")) + self.assertEqual(self.h5file.get_node_attr("/array", "TITLE"), "Array") + self.assertTrue(hasattr(self.h5file.root, "table")) + self.assertEqual(self.h5file.get_node_attr("/table", "TITLE"), "Table") + self.assertEqual(self.h5file.root.array.read(), [1, 2]) + + data = list(range(2 * tb.parameters.DRIVER_CORE_INCREMENT)) + self.h5file.create_array( + self.h5file.root, "array2", data, title="Array2" + ) + self.h5file.root._v_attrs.testattr2 = 42 + + image2 = self.h5file.get_file_image() + + self.h5file.close() + + self.assertFalse(Path(self.h5fname).exists()) + + self.assertNotEqual(len(image1), len(image2)) + self.assertNotEqual(image1, image2) + + # Open an existing file + self.h5file = tb.open_file( + self.h5fname, + mode="r", + driver=self.DRIVER, + driver_core_image=image2, + driver_core_backing_store=0, + ) + + # Get the CLASS attribute of the arr object + self.assertTrue(hasattr(self.h5file.root._v_attrs, "TITLE")) + self.assertEqual(self.h5file.get_node_attr("/", "TITLE"), "Title") + self.assertTrue(hasattr(self.h5file.root._v_attrs, "testattr")) + self.assertEqual(self.h5file.get_node_attr("/", "testattr"), 41) + self.assertTrue(hasattr(self.h5file.root, "array")) + self.assertEqual(self.h5file.get_node_attr("/array", "TITLE"), "Array") + self.assertTrue(hasattr(self.h5file.root, "table")) + self.assertEqual(self.h5file.get_node_attr("/table", "TITLE"), "Table") + self.assertEqual(self.h5file.root.array.read(), [1, 2]) + + self.assertTrue(hasattr(self.h5file.root._v_attrs, "testattr2")) + self.assertEqual(self.h5file.get_node_attr("/", "testattr2"), 42) + self.assertTrue(hasattr(self.h5file.root, "array2")) + self.assertEqual( + self.h5file.get_node_attr("/array2", "TITLE"), "Array2" + ) + self.assertEqual(self.h5file.root.array2.read(), data) + + self.h5file.close() + + self.assertFalse(Path(self.h5fname).exists()) + + def test_openFileA(self): + image = self._create_image(self.h5fname) + self.assertFalse(Path(self.h5fname).exists()) + + # Open an existing file + self.h5file = tb.open_file( + self.h5fname, + mode="a", + driver=self.DRIVER, + driver_core_image=image, + driver_core_backing_store=0, + ) + + # Get the CLASS attribute of the arr object + self.assertTrue(hasattr(self.h5file.root._v_attrs, "TITLE")) + self.assertEqual(self.h5file.get_node_attr("/", "TITLE"), "Title") + self.assertTrue(hasattr(self.h5file.root._v_attrs, "testattr")) + self.assertEqual(self.h5file.get_node_attr("/", "testattr"), 41) + self.assertTrue(hasattr(self.h5file.root, "array")) + self.assertEqual(self.h5file.get_node_attr("/array", "TITLE"), "Array") + self.assertTrue(hasattr(self.h5file.root, "table")) + self.assertEqual(self.h5file.get_node_attr("/table", "TITLE"), "Table") + self.assertEqual(self.h5file.root.array.read(), [1, 2]) + + self.h5file.close() + + self.assertFalse(Path(self.h5fname).exists()) + + def test_openFileA_update(self): + h5fname = tempfile.mktemp(".h5") + image1 = self._create_image(h5fname) + self.assertFalse(Path(self.h5fname).exists()) + + # Open an existing file + self.h5file = tb.open_file( + self.h5fname, + mode="a", + driver=self.DRIVER, + driver_core_image=image1, + driver_core_backing_store=0, + ) + + # Get the CLASS attribute of the arr object + self.assertTrue(hasattr(self.h5file.root._v_attrs, "TITLE")) + self.assertEqual(self.h5file.get_node_attr("/", "TITLE"), "Title") + self.assertTrue(hasattr(self.h5file.root._v_attrs, "testattr")) + self.assertEqual(self.h5file.get_node_attr("/", "testattr"), 41) + self.assertTrue(hasattr(self.h5file.root, "array")) + self.assertEqual(self.h5file.get_node_attr("/array", "TITLE"), "Array") + self.assertTrue(hasattr(self.h5file.root, "table")) + self.assertEqual(self.h5file.get_node_attr("/table", "TITLE"), "Table") + self.assertEqual(self.h5file.root.array.read(), [1, 2]) + + data = list(range(2 * tb.parameters.DRIVER_CORE_INCREMENT)) + self.h5file.create_array( + self.h5file.root, "array2", data, title="Array2" + ) + self.h5file.root._v_attrs.testattr2 = 42 + + image2 = self.h5file.get_file_image() + + self.h5file.close() + + self.assertFalse(Path(self.h5fname).exists()) + + self.assertNotEqual(len(image1), len(image2)) + self.assertNotEqual(image1, image2) + + # Open an existing file + self.h5file = tb.open_file( + self.h5fname, + mode="r", + driver=self.DRIVER, + driver_core_image=image2, + driver_core_backing_store=0, + ) + + # Get the CLASS attribute of the arr object + self.assertTrue(hasattr(self.h5file.root._v_attrs, "TITLE")) + self.assertEqual(self.h5file.get_node_attr("/", "TITLE"), "Title") + self.assertTrue(hasattr(self.h5file.root._v_attrs, "testattr")) + self.assertEqual(self.h5file.get_node_attr("/", "testattr"), 41) + self.assertTrue(hasattr(self.h5file.root, "array")) + self.assertEqual(self.h5file.get_node_attr("/array", "TITLE"), "Array") + self.assertTrue(hasattr(self.h5file.root, "table")) + self.assertEqual(self.h5file.get_node_attr("/table", "TITLE"), "Table") + self.assertEqual(self.h5file.root.array.read(), [1, 2]) + + self.assertTrue(hasattr(self.h5file.root._v_attrs, "testattr2")) + self.assertEqual(self.h5file.get_node_attr("/", "testattr2"), 42) + self.assertTrue(hasattr(self.h5file.root, "array2")) + self.assertEqual( + self.h5file.get_node_attr("/array2", "TITLE"), "Array2" + ) + self.assertEqual(self.h5file.root.array2.read(), data) + + self.h5file.close() + + self.assertFalse(Path(self.h5fname).exists()) + + def test_str(self): + self.h5file = tb.open_file( + self.h5fname, + mode="w", + title="Title", + driver=self.DRIVER, + driver_core_backing_store=0, + ) + + self.h5file.create_array( + self.h5file.root, "array", [1, 2], title="Array" + ) + self.h5file.create_table( + self.h5file.root, "table", {"var1": tb.IntCol()}, "Table" + ) + self.h5file.root._v_attrs.testattr = 41 + + # ensure that the __str__ method works even if there is no phisical + # file on disk (in which case the os.stat operation for date retrieval + # fails) + self.assertIsNotNone(str(self.h5file)) + + self.h5file.close() + self.assertFalse(Path(self.h5fname).exists()) + + +class QuantizeTestCase(common.TempFileMixin, common.PyTablesTestCase): + mode = "w" + title = "This is the table title" + expectedrows = 10 + appendrows = 5 + + def setUp(self): + super().setUp() + + self.data = np.linspace(-5.0, 5.0, 41) + self.randomdata = np.random.random_sample(1_000_000) + self.randomints = np.random.randint( + -1_000_000, 1_000_000, 1_000_000 + ).astype("int64") + + self.populateFile() + self.h5file.close() + + self.quantizeddata_0 = np.asarray( + [-5.0] * 2 + + [-4.0] * 5 + + [-3.0] * 3 + + [-2.0] * 5 + + [-1.0] * 3 + + [0.0] * 5 + + [1.0] * 3 + + [2.0] * 5 + + [3.0] * 3 + + [4.0] * 5 + + [5.0] * 2 + ) + self.quantizeddata_m1 = np.asarray([-8.0] * 4 + [0.0] * 33 + [8.0] * 4) + + def populateFile(self): + root = self.h5file.root + filters = tb.Filters( + complevel=1, complib="blosc", least_significant_digit=1 + ) + ints = self.h5file.create_carray( + root, "integers", tb.Int64Atom(), (1_000_000,), filters=filters + ) + ints[:] = self.randomints + floats = self.h5file.create_carray( + root, "floats", tb.Float32Atom(), (1_000_000,), filters=filters + ) + floats[:] = self.randomdata + data1 = self.h5file.create_carray( + root, "data1", tb.Float64Atom(), (41,), filters=filters + ) + data1[:] = self.data + filters = tb.Filters( + complevel=1, complib="blosc", least_significant_digit=0 + ) + data0 = self.h5file.create_carray( + root, "data0", tb.Float64Atom(), (41,), filters=filters + ) + data0[:] = self.data + filters = tb.Filters( + complevel=1, complib="blosc", least_significant_digit=2 + ) + data2 = self.h5file.create_carray( + root, "data2", tb.Float64Atom(), (41,), filters=filters + ) + data2[:] = self.data + filters = tb.Filters( + complevel=1, complib="blosc", least_significant_digit=-1 + ) + datam1 = self.h5file.create_carray( + root, "datam1", tb.Float64Atom(), (41,), filters=filters + ) + datam1[:] = self.data + + def test00_quantizeData(self): + """Checking the quantize() function.""" + + quantized_0 = tb.utils.quantize(self.data, 0) + quantized_1 = tb.utils.quantize(self.data, 1) + quantized_2 = tb.utils.quantize(self.data, 2) + quantized_m1 = tb.utils.quantize(self.data, -1) + np.testing.assert_array_equal(quantized_0, self.quantizeddata_0) + np.testing.assert_array_equal(quantized_1, self.data) + np.testing.assert_array_equal(quantized_2, self.data) + np.testing.assert_array_equal(quantized_m1, self.quantizeddata_m1) + + def test01_quantizeDataMaxError(self): + """Checking the maximum error introduced by the quantize() function.""" + + quantized_0 = tb.utils.quantize(self.randomdata, 0) + quantized_1 = tb.utils.quantize(self.randomdata, 1) + quantized_2 = tb.utils.quantize(self.randomdata, 2) + quantized_m1 = tb.utils.quantize(self.randomdata, -1) + + self.assertLess(np.abs(quantized_0 - self.randomdata).max(), 0.5) + self.assertLess(np.abs(quantized_1 - self.randomdata).max(), 0.05) + self.assertLess(np.abs(quantized_2 - self.randomdata).max(), 0.005) + self.assertLess(np.abs(quantized_m1 - self.randomdata).max(), 1.0) + + def test02_array(self): + """Checking quantized data as written to disk.""" + + self.h5file = tb.open_file(self.h5fname, "r") + np.testing.assert_array_equal(self.h5file.root.data1[:], self.data) + np.testing.assert_array_equal(self.h5file.root.data2[:], self.data) + np.testing.assert_array_equal( + self.h5file.root.data0[:], self.quantizeddata_0 + ) + np.testing.assert_array_equal( + self.h5file.root.datam1[:], self.quantizeddata_m1 + ) + np.testing.assert_array_equal( + self.h5file.root.integers[:], self.randomints + ) + self.assertEqual( + self.h5file.root.integers[:].dtype, self.randomints.dtype + ) + + self.assertLess( + np.abs(self.h5file.root.floats[:] - self.randomdata).max(), 0.05 + ) + + +def suite(): + import doctest + + theSuite = common.unittest.TestSuite() + niter = 1 + # common.heavy = 1 # Uncomment this only for testing purposes! + + for i in range(niter): + theSuite.addTest(common.make_suite(FiltersCase1)) + theSuite.addTest(common.make_suite(FiltersCase2)) + theSuite.addTest(common.make_suite(FiltersCase10)) + theSuite.addTest(common.make_suite(FiltersCaseBloscBloscLZ)) + theSuite.addTest(common.make_suite(FiltersCaseBloscLZ4)) + theSuite.addTest(common.make_suite(FiltersCaseBloscLZ4HC)) + theSuite.addTest(common.make_suite(FiltersCaseBloscSnappy)) + theSuite.addTest(common.make_suite(FiltersCaseBloscZlib)) + theSuite.addTest(common.make_suite(FiltersCaseBloscZstd)) + theSuite.addTest(common.make_suite(FiltersCaseBloscBitShuffle)) + theSuite.addTest(common.make_suite(CopyGroupCase1)) + theSuite.addTest(common.make_suite(CopyGroupCase2)) + theSuite.addTest(common.make_suite(CopyFileCase1)) + theSuite.addTest(common.make_suite(CopyFileCase2)) + theSuite.addTest(common.make_suite(GroupFiltersTestCase)) + theSuite.addTest(common.make_suite(SetBloscMaxThreadsTestCase)) + theSuite.addTest(common.make_suite(FilterTestCase)) + theSuite.addTest(doctest.DocTestSuite(tb.filters)) + + theSuite.addTest(common.make_suite(DefaultDriverTestCase)) + theSuite.addTest(common.make_suite(Sec2DriverTestCase)) + theSuite.addTest(common.make_suite(StdioDriverTestCase)) + theSuite.addTest(common.make_suite(CoreDriverTestCase)) + theSuite.addTest(common.make_suite(CoreDriverNoBackingStoreTestCase)) + theSuite.addTest(common.make_suite(SplitDriverTestCase)) + + theSuite.addTest(common.make_suite(LogDriverTestCase)) + theSuite.addTest(common.make_suite(DirectDriverTestCase)) + theSuite.addTest(common.make_suite(WindowsDriverTestCase)) + + theSuite.addTest(common.make_suite(FamilyDriverTestCase)) + theSuite.addTest(common.make_suite(MultiDriverTestCase)) + theSuite.addTest(common.make_suite(MpioDriverTestCase)) + theSuite.addTest(common.make_suite(MpiPosixDriverTestCase)) + theSuite.addTest(common.make_suite(StreamDriverTestCase)) + theSuite.addTest(common.make_suite(InMemoryCoreDriverTestCase)) + + theSuite.addTest(common.make_suite(QuantizeTestCase)) + + if common.heavy: + theSuite.addTest(common.make_suite(CreateTestCase)) + theSuite.addTest(common.make_suite(FiltersCase3)) + theSuite.addTest(common.make_suite(FiltersCase4)) + theSuite.addTest(common.make_suite(FiltersCase5)) + theSuite.addTest(common.make_suite(FiltersCase6)) + theSuite.addTest(common.make_suite(FiltersCase7)) + theSuite.addTest(common.make_suite(FiltersCase8)) + theSuite.addTest(common.make_suite(FiltersCase9)) + theSuite.addTest(common.make_suite(CopyFileCase3)) + theSuite.addTest(common.make_suite(CopyFileCase4)) + theSuite.addTest(common.make_suite(CopyFileCase5)) + theSuite.addTest(common.make_suite(CopyFileCase6)) + theSuite.addTest(common.make_suite(CopyFileCase7)) + theSuite.addTest(common.make_suite(CopyFileCase8)) + theSuite.addTest(common.make_suite(CopyFileCase10)) + theSuite.addTest(common.make_suite(CopyGroupCase3)) + theSuite.addTest(common.make_suite(CopyGroupCase4)) + theSuite.addTest(common.make_suite(CopyGroupCase5)) + theSuite.addTest(common.make_suite(CopyGroupCase6)) + theSuite.addTest(common.make_suite(CopyGroupCase7)) + theSuite.addTest(common.make_suite(CopyGroupCase8)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_direct_chunk.py b/venv/Lib/site-packages/tables/tests/test_direct_chunk.py new file mode 100644 index 0000000..df3abc4 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_direct_chunk.py @@ -0,0 +1,432 @@ +import sys +import zlib +import itertools + +import numpy as np + +import tables as tb +from tables.tests import common + + +class ArrayDirectChunkingTestCase( + common.TempFileMixin, common.PyTablesTestCase +): + obj = np.arange(25, dtype="uint8") + + def setUp(self): + super().setUp() + self.array = self.h5file.create_array("/", "array", self.obj) + + def test_chunk_info(self): + self.assertRaises( + tb.NotChunkedError, self.array.chunk_info, (0,) * self.array.ndim + ) + + def test_read_chunk(self): + self.assertRaises( + tb.NotChunkedError, self.array.read_chunk, (0,) * self.array.ndim + ) + + def test_read_chunk_out(self): + arr = np.zeros(self.obj.shape, dtype=self.obj.dtype) + self.assertRaises( + tb.NotChunkedError, + self.array.read_chunk, + (0,) * self.array.ndim, + out=memoryview(arr), + ) + + def test_write_chunk(self): + arr = self.obj // 2 + self.assertRaises( + tb.NotChunkedError, + self.array.write_chunk, + (0,) * self.array.ndim, + arr, + ) + + +# For enlargeable and non-enlargeable datasets. +class DirectChunkingTestCase(common.TempFileMixin, common.PyTablesTestCase): + # Class attributes: + shape: tuple[int, ...] + chunkshape: tuple[int, ...] + shuffle: bool + obj: np.ndarray + + # Instance attributes: + array: tb.Leaf # set by ``setUp()`` and ``_reopen()`` + filters: tb.Filters + + def setUp(self): + super().setUp() + self.filters = tb.Filters( + complib="zlib", complevel=1, shuffle=self.shuffle + ) + + def modified(self, obj): + # Return altered copy with same dtype and shape. + raise NotImplementedError + + def iter_chunks(self): + chunk_ranges = list( + range(0, s, cs) for (s, cs) in zip(self.shape, self.chunkshape) + ) + yield from itertools.product(*chunk_ranges) + + def test_chunk_info_aligned(self): + for chunk_start in self.iter_chunks(): + chunk_info = self.array.chunk_info(chunk_start) + self.assertEqual(chunk_info.start, chunk_start) + self.assertIsNotNone(chunk_info.filter_mask) + self.assertIsNotNone(chunk_info.offset) + self.assertIsNotNone(chunk_info.size) + + def test_chunk_info_unaligned(self): + chunk_info_a = self.array.chunk_info((0,) * self.array.ndim) + chunk_info_u = self.array.chunk_info((1,) * self.array.ndim) + self.assertIsNotNone(chunk_info_a.start) + self.assertEqual(chunk_info_a, chunk_info_u) + + def test_chunk_info_aligned_beyond(self): + beyond = tuple( + (1 + s // cs) * cs for (s, cs) in zip(self.shape, self.chunkshape) + ) + self.assertRaises(IndexError, self.array.chunk_info, beyond) + + def test_chunk_info_unaligned_beyond(self): + beyond = tuple( + 1 + (1 + s // cs) * cs + for (s, cs) in zip(self.shape, self.chunkshape) + ) + self.assertRaises(IndexError, self.array.chunk_info, beyond) + + def shuffled(self, bytes_): + itemsize = self.obj.dtype.itemsize + return b"".join(bytes_[d::itemsize] for d in range(itemsize)) + + def prepare_chunk(self, bytes_, shuffle=None): + """Prepare chunk bytes by applying shuffle filter (without compression). + + This returns the bytes as they would appear after decompression, + to allow comparison with decompressed data from HDF5. + """ + if shuffle is None: + shuffle = self.shuffle + return self.shuffled(bytes_) if shuffle else bytes_ + + def compress_chunk(self, bytes_, shuffle=None): + """Prepare and compress chunk bytes for writing to HDF5.""" + assert self.filters.complib == "zlib" + prepared = self.prepare_chunk(bytes_, shuffle) + return zlib.compress(prepared, self.filters.complevel) + + def test_read_chunk(self): + # Extended to fit chunk boundaries. + ext_obj = np.pad( + self.obj, + [(0, s % cs) for (s, cs) in zip(self.shape, self.chunkshape)], + ) + for chunk_start in self.iter_chunks(): + chunk = self.array.read_chunk(chunk_start) + self.assertIsInstance(chunk, bytes) + obj_slice = tuple( + slice(s, s + cs) + for (s, cs) in zip(chunk_start, self.chunkshape) + ) + # Compare decompressed data to avoid zlib implementation differences + expected_bytes = self.prepare_chunk(ext_obj[obj_slice].tobytes()) + self.assertEqual(zlib.decompress(chunk), expected_bytes) + + def test_read_chunk_out(self): + # Extended to fit chunk boundaries. + ext_obj = np.pad( + self.obj, + [(0, s % cs) for (s, cs) in zip(self.shape, self.chunkshape)], + ) + chunk_start = (0,) * self.obj.ndim + obj_slice = tuple( + slice(s, s + cs) for (s, cs) in zip(chunk_start, self.chunkshape) + ) + expected_bytes = self.prepare_chunk(ext_obj[obj_slice].tobytes()) + + # First read the chunk to get its actual compressed size + chunk_reference = self.array.read_chunk(chunk_start) + chunk_size = len(chunk_reference) + + chunk_out = bytearray(chunk_size - 1) # too short + self.assertRaises( + ValueError, self.array.read_chunk, chunk_start, out=chunk_out + ) + + chunk_out = bytearray(chunk_size) + chunk = self.array.read_chunk(chunk_start, out=chunk_out) + self.assertIsInstance(chunk, memoryview) + # Compare decompressed data to avoid zlib implementation differences + self.assertEqual(zlib.decompress(bytes(chunk)), expected_bytes) + self.assertEqual(zlib.decompress(bytes(chunk_out)), expected_bytes) + + def test_read_chunk_unaligned(self): + self.assertRaises( + tb.NotChunkAlignedError, + self.array.read_chunk, + (1,) * self.array.ndim, + ) + + def test_read_chunk_beyond(self): + beyond = tuple( + (1 + s // cs) * cs for (s, cs) in zip(self.shape, self.chunkshape) + ) + self.assertRaises(IndexError, self.array.read_chunk, beyond) + + def test_write_chunk(self): + new_obj = self.modified(self.obj) + # Extended to fit chunk boundaries. + ext_obj = np.pad( + new_obj, + [(0, s % cs) for (s, cs) in zip(self.shape, self.chunkshape)], + ) + for chunk_start in self.iter_chunks(): + obj_slice = tuple( + slice(s, s + cs) + for (s, cs) in zip(chunk_start, self.chunkshape) + ) + obj_bytes = self.compress_chunk(ext_obj[obj_slice].tobytes()) + self.array.write_chunk(chunk_start, obj_bytes) + + self._reopen() + self.assertTrue(common.areArraysEqual(self.array[:], new_obj)) + + def test_write_chunk_filtermask(self): + no_shuffle_mask = 0x00000004 # to turn shuffle off + + chunk_start = (0,) * self.obj.ndim + obj_slice = tuple( + slice(s, s + cs) for (s, cs) in zip(chunk_start, self.chunkshape) + ) + new_obj = self.obj.copy() + new_obj[obj_slice] = self.modified(new_obj[obj_slice]) + obj_bytes = self.compress_chunk( + new_obj[obj_slice].tobytes(), shuffle=False + ) + self.array.write_chunk( + chunk_start, obj_bytes, filter_mask=no_shuffle_mask + ) + + self._reopen() + arr_obj = self.array[:] # first chunk is shuffled, fix it + fixed_bytes = self.shuffled(arr_obj[obj_slice].tobytes()) + fixed_chunk = np.ndarray( + self.chunkshape, dtype=self.obj.dtype, buffer=fixed_bytes + ) + arr_obj[obj_slice] = fixed_chunk + self.assertTrue(common.areArraysEqual(arr_obj, new_obj)) + + chunk_info = self.array.chunk_info(chunk_start) + self.assertEqual(chunk_info.filter_mask, no_shuffle_mask) + + def test_write_chunk_unaligned(self): + self.assertRaises( + tb.NotChunkAlignedError, + self.array.write_chunk, + (1,) * self.array.ndim, + b"foobar", + ) + + def test_write_chunk_beyond(self): + beyond = tuple( + (1 + s // cs) * cs for (s, cs) in zip(self.shape, self.chunkshape) + ) + self.assertRaises( + IndexError, self.array.write_chunk, beyond, b"foobar" + ) + + +# For enlargeable datasets only. +class XDirectChunkingTestCase(DirectChunkingTestCase): + def test_chunk_info_miss_extdim(self): + # Next chunk in the enlargeable dimension. + assert self.array.extdim == 0 + chunk_start = ( + ((1 + self.shape[0] // self.chunkshape[0]) * self.chunkshape[0]), + *((0,) * (self.array.ndim - 1)), + ) + self.assertRaises(IndexError, self.array.chunk_info, chunk_start) + + # Enlarge the array to put the (missing) chunk within the shape. + self.array.truncate(chunk_start[0] + self.chunkshape[0]) + chunk_info = self.array.chunk_info(chunk_start) + self.assertIsNone(chunk_info.filter_mask) + self.assertIsNone(chunk_info.offset) + self.assertIsNone(chunk_info.size) + + def test_chunk_info_miss_noextdim(self): + if self.array.ndim < 2: + raise common.unittest.SkipTest( + "missing chunk always within enlargeable dimension" + ) + + # Next chunk in the first non-enlargeable dimension. + assert self.array.extdim != 1 + chunk_start = ( + 0, + ((1 + self.shape[1] // self.chunkshape[1]) * self.chunkshape[1]), + *((0,) * (self.array.ndim - 2)), + ) + self.assertRaises(IndexError, self.array.chunk_info, chunk_start) + + def test_read_chunk_miss_extdim(self): + # Next chunk in the enlargeable dimension. + assert self.array.extdim == 0 + chunk_start = ( + ((1 + self.shape[0] // self.chunkshape[0]) * self.chunkshape[0]), + *((0,) * (self.array.ndim - 1)), + ) + self.assertRaises(IndexError, self.array.read_chunk, chunk_start) + + # Enlarge the array to put the (missing) chunk within the shape. + self.array.truncate(chunk_start[0] + self.chunkshape[0]) + self.assertRaises( + tb.NoSuchChunkError, self.array.read_chunk, chunk_start + ) + + def _test_write_chunk_missing(self, shrink_after): + # Enlarge array by two chunk rows, + # copy first old chunk in first chunk of new last chunk row. + assert self.array.extdim == 0 + chunk_start = ( + ( + (1 + self.shape[0] // self.chunkshape[0]) * self.chunkshape[0] + + self.chunkshape[0] + ), + *((0,) * (self.array.ndim - 1)), + ) + chunk = self.array.read_chunk((0,) * self.array.ndim) + self.array.truncate(chunk_start[0] + self.chunkshape[0]) + self.array.write_chunk(chunk_start, chunk) + if shrink_after: + self.array.truncate(self.shape[0] + 1) + self.array.truncate(self.shape[0] - 1) + + new_obj = self.obj.copy() + new_obj.resize(self.array.shape, refcheck=False) + obj_slice = tuple( + slice(s, s + cs) for (s, cs) in zip(chunk_start, self.chunkshape) + ) + if not shrink_after: + new_obj[obj_slice] = new_obj[ + tuple(slice(0, cs) for cs in self.chunkshape) + ] + + self._reopen() + self.assertTrue(common.areArraysEqual(self.array[:], new_obj)) + + def test_write_chunk_missing1(self): + return self._test_write_chunk_missing(shrink_after=False) + + def test_write_chunk_missing2(self): + return self._test_write_chunk_missing(shrink_after=True) + + +class CArrayDirectChunkingTestCase(DirectChunkingTestCase): + shape = (5, 5) + chunkshape = (2, 2) # 3 x 3 chunks, incomplete at right/bottom boundaries + shuffle = True + obj = np.arange(np.prod(shape), dtype="u2").reshape(shape) + + def setUp(self): + super().setUp() + self.array = self.h5file.create_carray( + "/", + "carray", + chunkshape=self.chunkshape, + obj=self.obj, + filters=self.filters, + ) + + def _reopen(self): + super()._reopen() + self.array = self.h5file.root.carray + + def modified(self, obj): + return obj * 2 + + +class EArrayDirectChunkingTestCase(XDirectChunkingTestCase): + shape = (5, 5) # enlargeable along first dimension + chunkshape = (2, 2) # 3 x 3 chunks, incomplete at right/bottom boundaries + shuffle = True + obj = np.arange(np.prod(shape), dtype="u2").reshape(shape) + + def setUp(self): + super().setUp() + atom = tb.Atom.from_dtype(self.obj.dtype) + shape = (0, *self.shape[1:]) + self.array = self.h5file.create_earray( + "/", + "earray", + atom, + shape, + chunkshape=self.chunkshape, + filters=self.filters, + ) + self.array.append(self.obj) + + def _reopen(self): + super()._reopen() + self.array = self.h5file.root.earray + + def modified(self, obj): + return obj * 2 + + +class TableDirectChunkingTestCase(XDirectChunkingTestCase): + shape = (5,) # enlargeable along first dimension + chunkshape = (2,) # 3 chunks, incomplete at bottom boundary + shuffle = True + obj = np.array( + [(i, float(i)) for i in range(np.prod(shape))], dtype="u4,f4" + ) + + def setUp(self): + super().setUp() + desc, _ = tb.descr_from_dtype(self.obj.dtype) + self.array = self.h5file.create_table( + "/", + "table", + desc, + chunkshape=self.chunkshape, + filters=self.filters, + ) + self.array.append(self.obj) + + def _reopen(self): + super()._reopen() + self.array = self.h5file.root.table + + def modified(self, obj): + flat = obj.copy().reshape((np.prod(obj.shape),)) + fnames = flat.dtype.names + for i in range(len(flat)): + for f in fnames: + flat[i][f] *= 2 + return flat.reshape(obj.shape) + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + + for i in range(niter): + theSuite.addTest(common.make_suite(ArrayDirectChunkingTestCase)) + theSuite.addTest(common.make_suite(CArrayDirectChunkingTestCase)) + theSuite.addTest(common.make_suite(EArrayDirectChunkingTestCase)) + theSuite.addTest(common.make_suite(TableDirectChunkingTestCase)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_do_undo.py b/venv/Lib/site-packages/tables/tests/test_do_undo.py new file mode 100644 index 0000000..75e0057 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_do_undo.py @@ -0,0 +1,2790 @@ +import warnings + +import tables as tb +from tables.tests import common + + +class BasicTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test for basic Undo/Redo operations.""" + + _reopen_flag = False + """Whether to reopen the file at certain points.""" + + def _do_reopen(self): + if self._reopen_flag: + self._reopen("r+") + + def setUp(self): + super().setUp() + + h5file = self.h5file + root = h5file.root + + # Create an array + h5file.create_array(root, "array", [1, 2], title="Title example") + + # Create another array object + h5file.create_array(root, "anarray", [1], "Array title") + + # Create a group object + group = h5file.create_group(root, "agroup", "Group title") + + # Create a couple of objects there + h5file.create_array(group, "anarray1", [2], "Array title 1") + h5file.create_array(group, "anarray2", [2], "Array title 2") + + # Create a lonely group in first level + h5file.create_group(root, "agroup2", "Group title 2") + + # Create a new group in the second level + h5file.create_group(group, "agroup3", "Group title 3") + + def test00_simple(self): + """Checking simple do/undo.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00_simple..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray", [3, 4], "Another array") + + # Now undo the past operation + self.h5file.undo() + + # Check that otherarray does not exist in the object tree + self.assertNotIn("/otherarray", self.h5file) + self.assertEqual(self.h5file._curaction, 0) + self.assertEqual(self.h5file._curmark, 0) + + # Redo the operation + self._do_reopen() + self.h5file.redo() + if common.verbose: + print("Object tree after redo:", self.h5file) + + # Check that otherarray has come back to life in a sane state + self.assertIn("/otherarray", self.h5file) + self.assertEqual(self.h5file.root.otherarray.read(), [3, 4]) + self.assertEqual(self.h5file.root.otherarray.title, "Another array") + self.assertEqual(self.h5file._curaction, 1) + self.assertEqual(self.h5file._curmark, 0) + + def test01_twice(self): + """Checking do/undo (twice operations intertwined)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_twice..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray", [3, 4], "Another array") + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + + # Now undo the past operations + self._do_reopen() + self.h5file.undo() + self.assertNotIn("/otherarray", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + self.assertEqual(self.h5file._curaction, 0) + self.assertEqual(self.h5file._curmark, 0) + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertIn("/otherarray", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertEqual(self.h5file.root.otherarray.read(), [3, 4]) + self.assertEqual(self.h5file.root.otherarray2.read(), [4, 5]) + self.assertEqual(self.h5file.root.otherarray.title, "Another array") + self.assertEqual(self.h5file.root.otherarray2.title, "Another array 2") + self.assertEqual(self.h5file._curaction, 2) + self.assertEqual(self.h5file._curmark, 0) + + def test02_twice2(self): + """Checking twice ops and two marks.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_twice2..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray", [3, 4], "Another array") + + # Put a mark + self._do_reopen() + self.h5file.mark() + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + self.assertEqual(self.h5file._curaction, 3) + self.assertEqual(self.h5file._curmark, 1) + + # Unwind just one mark + self.h5file.undo() + self.assertIn("/otherarray", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + self.assertEqual(self.h5file._curaction, 2) + self.assertEqual(self.h5file._curmark, 1) + + # Unwind another mark + self.h5file.undo() + self.assertEqual(self.h5file._curaction, 0) + self.assertEqual(self.h5file._curmark, 0) + self.assertNotIn("/otherarray", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + + # Redo until the next mark + self.h5file.redo() + self.assertIn("/otherarray", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + self._do_reopen() + self.assertEqual(self.h5file._curaction, 2) + self.assertEqual(self.h5file._curmark, 1) + + # Redo until the end + self.h5file.redo() + self.assertIn("/otherarray", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertEqual(self.h5file.root.otherarray.read(), [3, 4]) + self.assertEqual(self.h5file.root.otherarray2.read(), [4, 5]) + self.assertEqual(self.h5file.root.otherarray.title, "Another array") + self.assertEqual(self.h5file.root.otherarray2.title, "Another array 2") + self.assertEqual(self.h5file._curaction, 3) + self.assertEqual(self.h5file._curmark, 1) + + def test03_6times3marks(self): + """Checking with six ops and three marks.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test03_6times3marks..." % self.__class__.__name__ + ) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [3, 4], "Another array 1") + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + + # Put a mark + self.h5file.mark() + self.h5file.create_array("/", "otherarray3", [5, 6], "Another array 3") + self.h5file.create_array("/", "otherarray4", [6, 7], "Another array 4") + + # Put a mark + self._do_reopen() + self.h5file.mark() + self.h5file.create_array("/", "otherarray5", [7, 8], "Another array 5") + self.h5file.create_array("/", "otherarray6", [8, 9], "Another array 6") + + # Unwind just one mark + self.h5file.undo() + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertIn("/otherarray4", self.h5file) + self.assertNotIn("/otherarray5", self.h5file) + self.assertNotIn("/otherarray6", self.h5file) + + # Unwind another mark + self.h5file.undo() + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + self.assertNotIn("/otherarray5", self.h5file) + self.assertNotIn("/otherarray6", self.h5file) + + # Unwind all marks + self.h5file.undo() + self.assertNotIn("/otherarray1", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + self.assertNotIn("/otherarray5", self.h5file) + self.assertNotIn("/otherarray6", self.h5file) + + # Redo until the next mark + self._do_reopen() + self.h5file.redo() + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + self.assertNotIn("/otherarray5", self.h5file) + self.assertNotIn("/otherarray6", self.h5file) + + # Redo until the next mark + self.h5file.redo() + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertIn("/otherarray4", self.h5file) + self.assertNotIn("/otherarray5", self.h5file) + self.assertNotIn("/otherarray6", self.h5file) + + # Redo until the end + self.h5file.redo() + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertIn("/otherarray4", self.h5file) + self.assertIn("/otherarray5", self.h5file) + self.assertIn("/otherarray6", self.h5file) + self.assertEqual(self.h5file.root.otherarray1.read(), [3, 4]) + self.assertEqual(self.h5file.root.otherarray2.read(), [4, 5]) + self.assertEqual(self.h5file.root.otherarray3.read(), [5, 6]) + self.assertEqual(self.h5file.root.otherarray4.read(), [6, 7]) + self.assertEqual(self.h5file.root.otherarray5.read(), [7, 8]) + self.assertEqual(self.h5file.root.otherarray6.read(), [8, 9]) + self.assertEqual(self.h5file.root.otherarray1.title, "Another array 1") + self.assertEqual(self.h5file.root.otherarray2.title, "Another array 2") + self.assertEqual(self.h5file.root.otherarray3.title, "Another array 3") + self.assertEqual(self.h5file.root.otherarray4.title, "Another array 4") + self.assertEqual(self.h5file.root.otherarray5.title, "Another array 5") + self.assertEqual(self.h5file.root.otherarray6.title, "Another array 6") + + def test04_6times3marksro(self): + """Checking with six operations, three marks and do/undo in random + order.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test04_6times3marksro..." % self.__class__.__name__ + ) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [3, 4], "Another array 1") + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + + # Put a mark + self.h5file.mark() + self._do_reopen() + self.h5file.create_array("/", "otherarray3", [5, 6], "Another array 3") + self.h5file.create_array("/", "otherarray4", [6, 7], "Another array 4") + + # Unwind the previous mark + self.h5file.undo() + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + # Put a mark in the middle of stack + if common.verbose: + print("All nodes:", self.h5file.walk_nodes()) + self.h5file.mark() + self._do_reopen() + self.h5file.create_array("/", "otherarray5", [7, 8], "Another array 5") + self.h5file.create_array("/", "otherarray6", [8, 9], "Another array 6") + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + self.assertIn("/otherarray5", self.h5file) + self.assertIn("/otherarray6", self.h5file) + + # Unwind previous mark + self.h5file.undo() + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + self.assertNotIn("/otherarray5", self.h5file) + self.assertNotIn("/otherarray6", self.h5file) + + # Redo until the last mark + self.h5file.redo() + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + self.assertIn("/otherarray5", self.h5file) + self.assertIn("/otherarray6", self.h5file) + + # Redo until the next mark (non-existent, so no action) + self._do_reopen() + self.h5file.redo() + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + self.assertIn("/otherarray5", self.h5file) + self.assertIn("/otherarray6", self.h5file) + self.assertEqual(self.h5file.root.otherarray1.read(), [3, 4]) + self.assertEqual(self.h5file.root.otherarray2.read(), [4, 5]) + self.assertEqual(self.h5file.root.otherarray5.read(), [7, 8]) + self.assertEqual(self.h5file.root.otherarray6.read(), [8, 9]) + self.assertEqual(self.h5file.root.otherarray1.title, "Another array 1") + self.assertEqual(self.h5file.root.otherarray2.title, "Another array 2") + self.assertEqual(self.h5file.root.otherarray5.title, "Another array 5") + self.assertEqual(self.h5file.root.otherarray6.title, "Another array 6") + + def test05_destructive(self): + """Checking with a destructive action during undo.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05_destructive..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [3, 4], "Another array 1") + + # Put a mark + self.h5file.mark() + self._do_reopen() + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + + # Now undo the past operation + self.h5file.undo() + + # Do the destructive operation + self._do_reopen() + self.h5file.create_array("/", "otherarray3", [5, 6], "Another array 3") + + # Check objects + self.assertIn("/otherarray1", self.h5file) + self.assertEqual(self.h5file.root.otherarray1.read(), [3, 4]) + self.assertEqual(self.h5file.root.otherarray1.title, "Another array 1") + self.assertNotIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertEqual(self.h5file.root.otherarray3.read(), [5, 6]) + self.assertEqual(self.h5file.root.otherarray3.title, "Another array 3") + + def test05b_destructive(self): + """Checking with a destructive action during undo (II)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05b_destructive..." % self.__class__.__name__ + ) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [3, 4], "Another array 1") + + # Put a mark + self._do_reopen() + self.h5file.mark() + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + + # Now undo the past operation + self.h5file.undo() + + # Do the destructive operation + self.h5file.create_array("/", "otherarray3", [5, 6], "Another array 3") + + # Put a mark + self._do_reopen() + self.h5file.mark() + self.h5file.create_array("/", "otherarray4", [6, 7], "Another array 4") + self.assertIn("/otherarray4", self.h5file) + + # Now undo the past operation + self.h5file.undo() + + # Check objects + self.assertIn("/otherarray1", self.h5file) + self.assertEqual(self.h5file.root.otherarray1.read(), [3, 4]) + self.assertEqual(self.h5file.root.otherarray1.title, "Another array 1") + self.assertNotIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertEqual(self.h5file.root.otherarray3.read(), [5, 6]) + self.assertEqual(self.h5file.root.otherarray3.title, "Another array 3") + self.assertNotIn("/otherarray4", self.h5file) + + def test05c_destructive(self): + """Checking with a destructive action during undo (III)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05c_destructive..." % self.__class__.__name__ + ) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [3, 4], "Another array 1") + + # Put a mark + self.h5file.mark() + self._do_reopen() + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + + # Now undo the past operation + self.h5file.undo() + + # Do the destructive operation + self.h5file.create_array("/", "otherarray3", [5, 6], "Another array 3") + + # Put a mark + self.h5file.mark() + self._do_reopen() + self.h5file.create_array("/", "otherarray4", [6, 7], "Another array 4") + self.assertIn("/otherarray4", self.h5file) + + # Now unwind twice + self.h5file.undo() + self._do_reopen() + self.h5file.undo() + + # Check objects + self.assertIn("/otherarray1", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + def test05d_destructive(self): + """Checking with a destructive action during undo (IV)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05d_destructive..." % self.__class__.__name__ + ) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [3, 4], "Another array 1") + + # Put a mark + self._do_reopen() + self.h5file.mark() + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + + # Now undo the past operation + self.h5file.undo() + + # Do the destructive operation + self.h5file.create_array("/", "otherarray3", [5, 6], "Another array 3") + + # Put a mark + self.h5file.mark() + self.h5file.create_array("/", "otherarray4", [6, 7], "Another array 4") + self.assertIn("/otherarray4", self.h5file) + + # Now, go to the first mark + self._do_reopen() + self.h5file.undo(0) + + # Check objects + self.assertNotIn("/otherarray1", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + def test05e_destructive(self): + """Checking with a destructive action during undo (V)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05e_destructive..." % self.__class__.__name__ + ) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [3, 4], "Another array 1") + + # Put a mark + self.h5file.mark() + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + + # Now undo the past operation + self.h5file.undo() + self._do_reopen() + + # Do the destructive operation + self.h5file.create_array("/", "otherarray3", [5, 6], "Another array 3") + + # Now, unwind the actions + self.h5file.undo(0) + self._do_reopen() + + # Check objects + self.assertNotIn("/otherarray1", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + + def test05f_destructive(self): + """Checking with a destructive creation of existing node during undo""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05f_destructive..." % self.__class__.__name__ + ) + + self.h5file.enable_undo() + self.h5file.create_array("/", "newarray", [1]) + self.h5file.undo() + self._do_reopen() + self.assertNotIn("/newarray", self.h5file) + newarr = self.h5file.create_array("/", "newarray", [1]) + self.h5file.undo() + self.assertNotIn("/newarray", self.h5file) + self._do_reopen() + self.h5file.redo() + self.assertIn("/newarray", self.h5file) + if not self._reopen_flag: + self.assertIs(self.h5file.root.newarray, newarr) + + def test06_totalunwind(self): + """Checking do/undo (total unwind)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06_totalunwind..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray", [3, 4], "Another array") + self.h5file.mark() + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + + # Now undo the past operations + self._do_reopen() + self.h5file.undo(0) + self.assertNotIn("/otherarray", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + + def test07_totalrewind(self): + """Checking do/undo (total rewind)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test07_totalunwind..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray", [3, 4], "Another array") + self.h5file.mark() + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + + # Now undo the past operations + self.h5file.undo(0) + + # Redo all the operations + self._do_reopen() + self.h5file.redo(-1) + + # Check that objects has come back to life in a sane state + self.assertIn("/otherarray", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertEqual(self.h5file.root.otherarray.read(), [3, 4]) + self.assertEqual(self.h5file.root.otherarray2.read(), [4, 5]) + self.assertEqual(self.h5file.root.otherarray.title, "Another array") + self.assertEqual(self.h5file.root.otherarray2.title, "Another array 2") + + def test08_marknames(self): + """Checking mark names.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08_marknames..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [3, 4], "Another array 1") + self.h5file.mark("first") + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + self.h5file.mark("second") + self.h5file.create_array("/", "otherarray3", [5, 6], "Another array 3") + self.h5file.mark("third") + self.h5file.create_array("/", "otherarray4", [6, 7], "Another array 4") + + # Now go to mark "first" + self.h5file.undo("first") + self._do_reopen() + self.assertIn("/otherarray1", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + # Go to mark "third" + self.h5file.redo("third") + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + # Now go to mark "second" + self.h5file.undo("second") + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + # Go to the end + self._do_reopen() + self.h5file.redo(-1) + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertIn("/otherarray4", self.h5file) + + # Check that objects has come back to life in a sane state + self.assertEqual(self.h5file.root.otherarray1.read(), [3, 4]) + self.assertEqual(self.h5file.root.otherarray2.read(), [4, 5]) + self.assertEqual(self.h5file.root.otherarray3.read(), [5, 6]) + self.assertEqual(self.h5file.root.otherarray4.read(), [6, 7]) + + def test08_initialmark(self): + """Checking initial mark.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08_initialmark..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + initmid = self.h5file.get_current_mark() + + # Create a new array + self.h5file.create_array("/", "otherarray", [3, 4], "Another array") + self.h5file.mark() + self._do_reopen() + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + + # Now undo the past operations + self.h5file.undo(initmid) + self.assertNotIn("/otherarray", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + + # Redo all the operations + self.h5file.redo(-1) + self._do_reopen() + + # Check that objects has come back to life in a sane state + self.assertIn("/otherarray", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertEqual(self.h5file.root.otherarray.read(), [3, 4]) + self.assertEqual(self.h5file.root.otherarray2.read(), [4, 5]) + self.assertEqual(self.h5file.root.otherarray.title, "Another array") + self.assertEqual(self.h5file.root.otherarray2.title, "Another array 2") + + def test09_marknames(self): + """Checking mark names (wrong direction)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09_marknames..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [3, 4], "Another array 1") + self.h5file.mark("first") + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + self.h5file.mark("second") + self._do_reopen() + self.h5file.create_array("/", "otherarray3", [5, 6], "Another array 3") + self.h5file.mark("third") + self.h5file.create_array("/", "otherarray4", [6, 7], "Another array 4") + + # Now go to mark "first" + self.h5file.undo("first") + + # Try to undo up to mark "third" + with self.assertRaises(tb.UndoRedoError): + self.h5file.undo("third") + + # Now go to mark "third" + self.h5file.redo("third") + self._do_reopen() + + # Try to redo up to mark "second" + with self.assertRaises(tb.UndoRedoError): + self.h5file.redo("second") + + # Final checks + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + def test10_goto(self): + """Checking mark names (goto)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test10_goto..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [3, 4], "Another array 1") + self._do_reopen() + self.h5file.mark("first") + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + self.h5file.mark("second") + self.h5file.create_array("/", "otherarray3", [5, 6], "Another array 3") + self._do_reopen() + self.h5file.mark("third") + self.h5file.create_array("/", "otherarray4", [6, 7], "Another array 4") + + # Now go to mark "first" + self.h5file.goto("first") + self.assertIn("/otherarray1", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + # Go to mark "third" + self.h5file.goto("third") + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + # Now go to mark "second" + self._do_reopen() + self.h5file.goto("second") + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + # Go to the end + self.h5file.goto(-1) + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertIn("/otherarray4", self.h5file) + + # Check that objects has come back to life in a sane state + self.assertIn("/otherarray2", self.h5file) + self.assertEqual(self.h5file.root.otherarray1.read(), [3, 4]) + self.assertEqual(self.h5file.root.otherarray2.read(), [4, 5]) + self.assertEqual(self.h5file.root.otherarray3.read(), [5, 6]) + self.assertEqual(self.h5file.root.otherarray4.read(), [6, 7]) + + def test10_gotoint(self): + """Checking mark sequential ids (goto)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test10_gotoint..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [3, 4], "Another array 1") + self.h5file.mark("first") + self.h5file.create_array("/", "otherarray2", [4, 5], "Another array 2") + self.h5file.mark("second") + self._do_reopen() + self.h5file.create_array("/", "otherarray3", [5, 6], "Another array 3") + self.h5file.mark("third") + self.h5file.create_array("/", "otherarray4", [6, 7], "Another array 4") + + # Now go to mark "first" + self.h5file.goto(1) + self._do_reopen() + self.assertIn("/otherarray1", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + # Go to beginning + self.h5file.goto(0) + self.assertNotIn("/otherarray1", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + # Go to mark "third" + self._do_reopen() + self.h5file.goto(3) + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + # Now go to mark "second" + self.h5file.goto(2) + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + self.assertNotIn("/otherarray4", self.h5file) + + # Go to the end + self._do_reopen() + self.h5file.goto(-1) + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertIn("/otherarray4", self.h5file) + + # Check that objects has come back to life in a sane state + self.assertIn("/otherarray2", self.h5file) + self.assertEqual(self.h5file.root.otherarray1.read(), [3, 4]) + self.assertEqual(self.h5file.root.otherarray2.read(), [4, 5]) + self.assertEqual(self.h5file.root.otherarray3.read(), [5, 6]) + self.assertEqual(self.h5file.root.otherarray4.read(), [6, 7]) + + def test11_contiguous(self): + """Creating contiguous marks""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test11_contiguous..." % self.__class__.__name__) + + self.h5file.enable_undo() + m1 = self.h5file.mark() + m2 = self.h5file.mark() + self.assertNotEqual(m1, m2) + self._do_reopen() + self.h5file.undo(m1) + self.assertEqual(self.h5file.get_current_mark(), m1) + self.h5file.redo(m2) + self.assertEqual(self.h5file.get_current_mark(), m2) + self.h5file.goto(m1) + self.assertEqual(self.h5file.get_current_mark(), m1) + self.h5file.goto(m2) + self.assertEqual(self.h5file.get_current_mark(), m2) + self.h5file.goto(-1) + self._do_reopen() + self.assertEqual(self.h5file.get_current_mark(), m2) + self.h5file.goto(0) + self.assertEqual(self.h5file.get_current_mark(), 0) + + def test12_keepMark(self): + """Ensuring the mark is kept after an UNDO operation""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test12_keepMark..." % self.__class__.__name__) + + self.h5file.enable_undo() + self.h5file.create_array("/", "newarray1", [1]) + + mid = self.h5file.mark() + self.assertIsNotNone(mid) + self._do_reopen() + self.h5file.undo() + + # We should have moved to the initial mark. + self.assertEqual(self.h5file.get_current_mark(), 0) + + # So /newarray1 should not be there. + self.assertNotIn("/newarray1", self.h5file) + + def test13_severalEnableDisable(self): + """Checking that successive enable/disable Undo works""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test13_severalEnableDisable..." + % self.__class__.__name__ + ) + + self.h5file.enable_undo() + self.h5file.create_array("/", "newarray1", [1]) + self.h5file.undo() + self._do_reopen() + + # We should have moved to 'mid' mark, not the initial mark. + self.assertEqual(self.h5file.get_current_mark(), 0) + + # So /newarray1 should still be there. + self.assertNotIn("/newarray1", self.h5file) + + # Close this do/undo session + self.h5file.disable_undo() + + # Do something + self.h5file.create_array("/", "newarray2", [1]) + + # Enable again do/undo + self.h5file.enable_undo() + self.h5file.create_array("/", "newarray3", [1]) + mid = self.h5file.mark() + self.h5file.create_array("/", "newarray4", [1]) + self.h5file.undo() + + # We should have moved to 'mid' mark, not the initial mark. + self.assertEqual(self.h5file.get_current_mark(), mid) + + # So /newarray2 and /newarray3 should still be there. + self.assertNotIn("/newarray1", self.h5file) + self.assertIn("/newarray2", self.h5file) + self.assertIn("/newarray3", self.h5file) + self.assertNotIn("/newarray4", self.h5file) + + # Close this do/undo session + self._do_reopen() + self.h5file.disable_undo() + + # Enable again do/undo + self.h5file.enable_undo() + self.h5file.create_array("/", "newarray1", [1]) + self.h5file.create_array("/", "newarray4", [1]) + + # So /newarray2 and /newarray3 should still be there. + self.assertIn("/newarray1", self.h5file) + self.assertIn("/newarray2", self.h5file) + self.assertIn("/newarray3", self.h5file) + self.assertIn("/newarray4", self.h5file) + self.h5file.undo() + self._do_reopen() + self.assertNotIn("/newarray1", self.h5file) + self.assertIn("/newarray2", self.h5file) + self.assertIn("/newarray3", self.h5file) + self.assertNotIn("/newarray4", self.h5file) + + # Close this do/undo session + self.h5file.disable_undo() + + +class PersistenceTestCase(BasicTestCase): + """Test for basic Undo/Redo operations with persistence.""" + + _reopen_flag = True + + +class CreateArrayTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test for create_array operations""" + + def setUp(self): + super().setUp() + + h5file = self.h5file + root = h5file.root + + # Create an array + h5file.create_array(root, "array", [1, 2], title="Title example") + + # Create another array object + h5file.create_array(root, "anarray", [1], "Array title") + + # Create a group object + group = h5file.create_group(root, "agroup", "Group title") + + # Create a couple of objects there + h5file.create_array(group, "anarray1", [2], "Array title 1") + h5file.create_array(group, "anarray2", [2], "Array title 2") + + # Create a lonely group in first level + h5file.create_group(root, "agroup2", "Group title 2") + + # Create a new group in the second level + h5file.create_group(group, "agroup3", "Group title 3") + + def test00(self): + """Checking one action.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [1, 2], "Another array 1") + + # Now undo the past operation + self.h5file.undo() + + # Check that otherarray does not exist in the object tree + self.assertNotIn("/otherarray1", self.h5file) + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertIn("/otherarray1", self.h5file) + self.assertEqual(self.h5file.root.otherarray1.title, "Another array 1") + self.assertEqual(self.h5file.root.otherarray1.read(), [1, 2]) + + def test01(self): + """Checking two actions.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [1, 2], "Another array 1") + self.h5file.create_array("/", "otherarray2", [2, 3], "Another array 2") + + # Now undo the past operation + self.h5file.undo() + + # Check that otherarray does not exist in the object tree + self.assertNotIn("/otherarray1", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertEqual(self.h5file.root.otherarray1.title, "Another array 1") + self.assertEqual(self.h5file.root.otherarray2.title, "Another array 2") + self.assertEqual(self.h5file.root.otherarray1.read(), [1, 2]) + self.assertEqual(self.h5file.root.otherarray2.read(), [2, 3]) + + def test02(self): + """Checking three actions.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [1, 2], "Another array 1") + self.h5file.create_array("/", "otherarray2", [2, 3], "Another array 2") + self.h5file.create_array("/", "otherarray3", [3, 4], "Another array 3") + + # Now undo the past operation + self.h5file.undo() + + # Check that otherarray does not exist in the object tree + self.assertNotIn("/otherarray1", self.h5file) + self.assertNotIn("/otherarray2", self.h5file) + self.assertNotIn("/otherarray3", self.h5file) + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/otherarray2", self.h5file) + self.assertIn("/otherarray3", self.h5file) + self.assertEqual(self.h5file.root.otherarray1.title, "Another array 1") + self.assertEqual(self.h5file.root.otherarray2.title, "Another array 2") + self.assertEqual(self.h5file.root.otherarray3.title, "Another array 3") + self.assertEqual(self.h5file.root.otherarray1.read(), [1, 2]) + self.assertEqual(self.h5file.root.otherarray2.read(), [2, 3]) + self.assertEqual(self.h5file.root.otherarray3.read(), [3, 4]) + + def test03(self): + """Checking three actions in different depth levels.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.create_array("/", "otherarray1", [1, 2], "Another array 1") + self.h5file.create_array( + "/agroup", "otherarray2", [2, 3], "Another array 2" + ) + self.h5file.create_array( + "/agroup/agroup3", "otherarray3", [3, 4], "Another array 3" + ) + + # Now undo the past operation + self.h5file.undo() + + # Check that otherarray does not exist in the object tree + self.assertNotIn("/otherarray1", self.h5file) + self.assertNotIn("/agroup/otherarray2", self.h5file) + self.assertNotIn("/agroup/agroup3/otherarray3", self.h5file) + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertIn("/otherarray1", self.h5file) + self.assertIn("/agroup/otherarray2", self.h5file) + self.assertIn("/agroup/agroup3/otherarray3", self.h5file) + self.assertEqual(self.h5file.root.otherarray1.title, "Another array 1") + self.assertEqual( + self.h5file.root.agroup.otherarray2.title, "Another array 2" + ) + self.assertEqual( + self.h5file.root.agroup.agroup3.otherarray3.title, + "Another array 3", + ) + self.assertEqual(self.h5file.root.otherarray1.read(), [1, 2]) + self.assertEqual(self.h5file.root.agroup.otherarray2.read(), [2, 3]) + self.assertEqual( + self.h5file.root.agroup.agroup3.otherarray3.read(), [3, 4] + ) + + +class CreateGroupTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test for create_group operations""" + + def setUp(self): + super().setUp() + + h5file = self.h5file + root = h5file.root + + # Create an array + h5file.create_array(root, "array", [1, 2], title="Title example") + + # Create another array object + h5file.create_array(root, "anarray", [1], "Array title") + + # Create a group object + group = h5file.create_group(root, "agroup", "Group title") + + # Create a couple of objects there + h5file.create_array(group, "anarray1", [2], "Array title 1") + h5file.create_array(group, "anarray2", [2], "Array title 2") + + # Create a lonely group in first level + h5file.create_group(root, "agroup2", "Group title 2") + + # Create a new group in the second level + h5file.create_group(group, "agroup3", "Group title 3") + + def test00(self): + """Checking one action.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new group + self.h5file.create_group("/", "othergroup1", "Another group 1") + + # Now undo the past operation + self.h5file.undo() + + # Check that othergroup1 does not exist in the object tree + self.assertNotIn("/othergroup1", self.h5file) + + # Redo the operation + self.h5file.redo() + + # Check that othergroup1 has come back to life in a sane state + self.assertIn("/othergroup1", self.h5file) + self.assertEqual( + self.h5file.root.othergroup1._v_title, "Another group 1" + ) + + def test01(self): + """Checking two actions.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new group + self.h5file.create_group("/", "othergroup1", "Another group 1") + self.h5file.create_group("/", "othergroup2", "Another group 2") + + # Now undo the past operation + self.h5file.undo() + + # Check that othergroup does not exist in the object tree + self.assertNotIn("/othergroup1", self.h5file) + self.assertNotIn("/othergroup2", self.h5file) + + # Redo the operation + self.h5file.redo() + + # Check that othergroup* has come back to life in a sane state + self.assertIn("/othergroup1", self.h5file) + self.assertIn("/othergroup2", self.h5file) + self.assertEqual( + self.h5file.root.othergroup1._v_title, "Another group 1" + ) + self.assertEqual( + self.h5file.root.othergroup2._v_title, "Another group 2" + ) + + def test02(self): + """Checking three actions.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new group + self.h5file.create_group("/", "othergroup1", "Another group 1") + self.h5file.create_group("/", "othergroup2", "Another group 2") + self.h5file.create_group("/", "othergroup3", "Another group 3") + + # Now undo the past operation + self.h5file.undo() + + # Check that othergroup* does not exist in the object tree + self.assertNotIn("/othergroup1", self.h5file) + self.assertNotIn("/othergroup2", self.h5file) + self.assertNotIn("/othergroup3", self.h5file) + + # Redo the operation + self.h5file.redo() + + # Check that othergroup* has come back to life in a sane state + self.assertIn("/othergroup1", self.h5file) + self.assertIn("/othergroup2", self.h5file) + self.assertIn("/othergroup3", self.h5file) + self.assertEqual( + self.h5file.root.othergroup1._v_title, "Another group 1" + ) + self.assertEqual( + self.h5file.root.othergroup2._v_title, "Another group 2" + ) + self.assertEqual( + self.h5file.root.othergroup3._v_title, "Another group 3" + ) + + def test03(self): + """Checking three actions in different depth levels.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new group + self.h5file.create_group("/", "othergroup1", "Another group 1") + self.h5file.create_group( + "/othergroup1", "othergroup2", "Another group 2" + ) + self.h5file.create_group( + "/othergroup1/othergroup2", "othergroup3", "Another group 3" + ) + + # Now undo the past operation + self.h5file.undo() + + # Check that othergroup* does not exist in the object tree + self.assertNotIn("/othergroup1", self.h5file) + self.assertNotIn("/othergroup1/othergroup2", self.h5file) + self.assertTrue( + "/othergroup1/othergroup2/othergroup3" not in self.h5file + ) + + # Redo the operation + self.h5file.redo() + + # Check that othergroup* has come back to life in a sane state + self.assertIn("/othergroup1", self.h5file) + self.assertIn("/othergroup1/othergroup2", self.h5file) + self.assertIn("/othergroup1/othergroup2/othergroup3", self.h5file) + self.assertEqual( + self.h5file.root.othergroup1._v_title, "Another group 1" + ) + self.assertEqual( + self.h5file.root.othergroup1.othergroup2._v_title, + "Another group 2", + ) + self.assertEqual( + self.h5file.root.othergroup1.othergroup2.othergroup3._v_title, + "Another group 3", + ) + + +minRowIndex = 10 + + +def populateTable(where, name): + """Create a table under where with name name""" + + class Indexed(tb.IsDescription): + var1 = tb.StringCol(itemsize=4, dflt=b"", pos=1) + var2 = tb.BoolCol(dflt=0, pos=2) + var3 = tb.IntCol(dflt=0, pos=3) + var4 = tb.FloatCol(dflt=0, pos=4) + + nrows = minRowIndex + table = where._v_file.create_table( + where, name, Indexed, "Indexed", None, nrows + ) + for i in range(nrows): + table.row["var1"] = str(i) + + # table.row['var2'] = i > 2 + table.row["var2"] = i % 2 + table.row["var3"] = i + table.row["var4"] = float(nrows - i - 1) + table.row.append() + table.flush() + + # Index all entries: + indexrows = table.cols.var1.create_index() + indexrows = table.cols.var2.create_index() + indexrows = table.cols.var3.create_index() + + # Do not index the var4 column + # indexrows = table.cols.var4.create_index() + if common.verbose: + print("Number of written rows:", nrows) + print("Number of indexed rows:", table.cols.var1.index.nelements) + print("Number of indexed rows(2):", indexrows) + + +class RenameNodeTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test for rename_node operations""" + + def setUp(self): + super().setUp() + + h5file = self.h5file + root = h5file.root + + # Create an array + h5file.create_array(root, "array", [1, 2], title="Title example") + + # Create another array object + h5file.create_array(root, "anarray", [1], "Array title") + + # Create a group object + group = h5file.create_group(root, "agroup", "Group title") + + # Create a couple of objects there + h5file.create_array(group, "anarray1", [2], "Array title 1") + h5file.create_array(group, "anarray2", [2], "Array title 2") + + # Create a lonely group in first level + h5file.create_group(root, "agroup2", "Group title 2") + + # Create a new group in the second level + h5file.create_group(group, "agroup3", "Group title 3") + + # Create a table in root + populateTable(self.h5file.root, "table") + + def test00(self): + """Checking rename_node (over Groups without children)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.rename_node("/agroup2", "agroup3") + + # Now undo the past operation + self.h5file.undo() + + # Check that it does not exist in the object tree + self.assertIn("/agroup2", self.h5file) + self.assertNotIn("/agroup3", self.h5file) + self.assertEqual(self.h5file.root.agroup2._v_title, "Group title 2") + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertNotIn("/agroup2", self.h5file) + self.assertIn("/agroup3", self.h5file) + self.assertEqual(self.h5file.root.agroup3._v_title, "Group title 2") + + def test01(self): + """Checking rename_node (over Groups with children)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.rename_node("/agroup", "agroup3") + + # Now undo the past operation + self.h5file.undo() + + # Check that it does not exist in the object tree + self.assertIn("/agroup", self.h5file) + self.assertNotIn("/agroup3", self.h5file) + + # Check that children are reachable + self.assertIn("/agroup/anarray1", self.h5file) + self.assertIn("/agroup/anarray2", self.h5file) + self.assertIn("/agroup/agroup3", self.h5file) + self.assertEqual(self.h5file.root.agroup._v_title, "Group title") + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertNotIn("/agroup", self.h5file) + self.assertIn("/agroup3", self.h5file) + self.assertEqual(self.h5file.root.agroup3._v_title, "Group title") + + # Check that children are reachable + self.assertIn("/agroup3/anarray1", self.h5file) + self.assertIn("/agroup3/anarray2", self.h5file) + self.assertIn("/agroup3/agroup3", self.h5file) + + def test01b(self): + """Checking rename_node (over Groups with children 2)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.rename_node("/agroup", "agroup3") + self.h5file.rename_node("/agroup3", "agroup4") + + # Now undo the past operation + self.h5file.undo() + + # Check that it does not exist in the object tree + self.assertIn("/agroup", self.h5file) + self.assertNotIn("/agroup4", self.h5file) + + # Check that children are reachable + self.assertIn("/agroup/anarray1", self.h5file) + self.assertIn("/agroup/anarray2", self.h5file) + self.assertIn("/agroup/agroup3", self.h5file) + self.assertEqual(self.h5file.root.agroup._v_title, "Group title") + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertNotIn("/agroup", self.h5file) + self.assertIn("/agroup4", self.h5file) + self.assertEqual(self.h5file.root.agroup4._v_title, "Group title") + + # Check that children are reachable + self.assertIn("/agroup4/anarray1", self.h5file) + self.assertIn("/agroup4/anarray2", self.h5file) + self.assertIn("/agroup4/agroup3", self.h5file) + + def test02(self): + """Checking rename_node (over Leaves)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.rename_node("/anarray", "anarray2") + + # Now undo the past operation + self.h5file.undo() + + # Check that otherarray does not exist in the object tree + self.assertIn("/anarray", self.h5file) + self.assertNotIn("/anarray2", self.h5file) + self.assertEqual(self.h5file.root.anarray.title, "Array title") + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertNotIn("/anarray", self.h5file) + self.assertIn("/anarray2", self.h5file) + self.assertEqual(self.h5file.root.anarray2.title, "Array title") + + def test03(self): + """Checking rename_node (over Tables)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.rename_node("/table", "table2") + + # Now undo the past operation + self.h5file.undo() + + # Check that table2 does not exist in the object tree + self.assertIn("/table", self.h5file) + table = self.h5file.root.table + self.assertIsNotNone(table.cols.var1.index) + self.assertIsNotNone(table.cols.var2.index) + self.assertIsNotNone(table.cols.var3.index) + self.assertIsNone(table.cols.var4.index) + self.assertEqual(table.cols.var1.index.nelements, minRowIndex) + self.assertEqual(table.cols.var2.index.nelements, minRowIndex) + self.assertEqual(table.cols.var3.index.nelements, minRowIndex) + self.assertNotIn("/table2", self.h5file) + self.assertEqual(self.h5file.root.table.title, "Indexed") + + # Redo the operation + self.h5file.redo() + + # Check that table2 has come back to life in a sane state + self.assertNotIn("/table", self.h5file) + self.assertIn("/table2", self.h5file) + self.assertEqual(self.h5file.root.table2.title, "Indexed") + table = self.h5file.root.table2 + self.assertIsNotNone(table.cols.var1.index) + self.assertIsNotNone(table.cols.var2.index) + self.assertIsNotNone(table.cols.var3.index) + self.assertEqual(table.cols.var1.index.nelements, minRowIndex) + self.assertEqual(table.cols.var2.index.nelements, minRowIndex) + self.assertEqual(table.cols.var3.index.nelements, minRowIndex) + self.assertIsNone(table.cols.var4.index) + + +class MoveNodeTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Tests for move_node operations""" + + def setUp(self): + super().setUp() + + h5file = self.h5file + root = h5file.root + + # Create an array + h5file.create_array(root, "array", [1, 2], title="Title example") + + # Create another array object + h5file.create_array(root, "anarray", [1], "Array title") + + # Create a group object + group = h5file.create_group(root, "agroup", "Group title") + + # Create a couple of objects there + h5file.create_array(group, "anarray1", [2], "Array title 1") + h5file.create_array(group, "anarray2", [2], "Array title 2") + + # Create a lonely group in first level + h5file.create_group(root, "agroup2", "Group title 2") + + # Create a new group in the second level + h5file.create_group(group, "agroup3", "Group title 3") + + # Create a table in root + populateTable(self.h5file.root, "table") + + def test00(self): + """Checking move_node (over Leaf)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.move_node("/anarray", "/agroup/agroup3") + + # Now undo the past operation + self.h5file.undo() + + # Check that it does not exist in the object tree + self.assertIn("/anarray", self.h5file) + self.assertNotIn("/agroup/agroup3/anarray", self.h5file) + self.assertEqual(self.h5file.root.anarray.title, "Array title") + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertNotIn("/anarray", self.h5file) + self.assertIn("/agroup/agroup3/anarray", self.h5file) + self.assertEqual( + self.h5file.root.agroup.agroup3.anarray.title, "Array title" + ) + + def test01(self): + """Checking move_node (over Groups with children)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.move_node("/agroup", "/agroup2", "agroup3") + + # Now undo the past operation + self.h5file.undo() + + # Check that it does not exist in the object tree + self.assertIn("/agroup", self.h5file) + self.assertNotIn("/agroup2/agroup3", self.h5file) + + # Check that children are reachable + self.assertIn("/agroup/anarray1", self.h5file) + self.assertIn("/agroup/anarray2", self.h5file) + self.assertIn("/agroup/agroup3", self.h5file) + self.assertEqual(self.h5file.root.agroup._v_title, "Group title") + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertNotIn("/agroup", self.h5file) + self.assertIn("/agroup2/agroup3", self.h5file) + self.assertEqual( + self.h5file.root.agroup2.agroup3._v_title, "Group title" + ) + + # Check that children are reachable + self.assertIn("/agroup2/agroup3/anarray1", self.h5file) + self.assertIn("/agroup2/agroup3/anarray2", self.h5file) + self.assertIn("/agroup2/agroup3/agroup3", self.h5file) + + def test01b(self): + """Checking move_node (over Groups with children 2)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.move_node("/agroup", "/", "agroup3") + self.h5file.move_node("/agroup3", "/agroup2", "agroup4") + + # Now undo the past operation + self.h5file.undo() + + # Check that it does not exist in the object tree + self.assertIn("/agroup", self.h5file) + self.assertNotIn("/agroup2/agroup4", self.h5file) + + # Check that children are reachable + self.assertIn("/agroup/anarray1", self.h5file) + self.assertIn("/agroup/anarray2", self.h5file) + self.assertIn("/agroup/agroup3", self.h5file) + self.assertEqual(self.h5file.root.agroup._v_title, "Group title") + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertNotIn("/agroup", self.h5file) + self.assertIn("/agroup2/agroup4", self.h5file) + self.assertEqual( + self.h5file.root.agroup2.agroup4._v_title, "Group title" + ) + + # Check that children are reachable + self.assertIn("/agroup2/agroup4/anarray1", self.h5file) + self.assertIn("/agroup2/agroup4/anarray2", self.h5file) + self.assertIn("/agroup2/agroup4/agroup3", self.h5file) + + def test02(self): + """Checking move_node (over Leaves)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.move_node("/anarray", "/agroup2", "anarray2") + + # Now undo the past operation + self.h5file.undo() + + # Check that otherarray does not exist in the object tree + self.assertIn("/anarray", self.h5file) + self.assertNotIn("/agroup2/anarray2", self.h5file) + self.assertEqual(self.h5file.root.anarray.title, "Array title") + + # Redo the operation + self.h5file.redo() + + # Check that otherarray has come back to life in a sane state + self.assertNotIn("/anarray", self.h5file) + self.assertIn("/agroup2/anarray2", self.h5file) + self.assertEqual( + self.h5file.root.agroup2.anarray2.title, "Array title" + ) + + def test03(self): + """Checking move_node (over Tables)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.move_node("/table", "/agroup2", "table2") + + # Now undo the past operation + self.h5file.undo() + + # Check that table2 does not exist in the object tree + self.assertIn("/table", self.h5file) + self.assertNotIn("/agroup2/table2", self.h5file) + table = self.h5file.root.table + self.assertIsNotNone(table.cols.var1.index) + self.assertIsNotNone(table.cols.var2.index) + self.assertIsNotNone(table.cols.var3.index) + self.assertIsNone(table.cols.var4.index) + self.assertEqual(table.cols.var1.index.nelements, minRowIndex) + self.assertEqual(table.cols.var2.index.nelements, minRowIndex) + self.assertEqual(table.cols.var3.index.nelements, minRowIndex) + self.assertEqual(self.h5file.root.table.title, "Indexed") + + # Redo the operation + self.h5file.redo() + + # Check that table2 has come back to life in a sane state + self.assertNotIn("/table", self.h5file) + self.assertIn("/agroup2/table2", self.h5file) + self.assertEqual(self.h5file.root.agroup2.table2.title, "Indexed") + table = self.h5file.root.agroup2.table2 + self.assertIsNotNone(table.cols.var1.index) + self.assertIsNotNone(table.cols.var2.index) + self.assertIsNotNone(table.cols.var3.index) + self.assertEqual(table.cols.var1.index.nelements, minRowIndex) + self.assertEqual(table.cols.var2.index.nelements, minRowIndex) + self.assertEqual(table.cols.var3.index.nelements, minRowIndex) + self.assertIsNone(table.cols.var4.index) + + +class RemoveNodeTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test for remove_node operations""" + + def setUp(self): + super().setUp() + + h5file = self.h5file + root = h5file.root + + # Create an array + h5file.create_array(root, "array", [1, 2], title="Title example") + + # Create another array object + h5file.create_array(root, "anarray", [1], "Array title") + + # Create a group object + group = h5file.create_group(root, "agroup", "Group title") + + # Create a couple of objects there + h5file.create_array(group, "anarray1", [2], "Array title 1") + h5file.create_array(group, "anarray2", [2], "Array title 2") + + # Create a lonely group in first level + h5file.create_group(root, "agroup2", "Group title 2") + + # Create a new group in the second level + h5file.create_group(group, "agroup3", "Group title 3") + + # Create a table in root + populateTable(self.h5file.root, "table") + + def test00(self): + """Checking remove_node (over Leaf)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Delete an existing array + self.h5file.remove_node("/anarray") + + # Now undo the past operation + self.h5file.undo() + + # Check that it does exist in the object tree + self.assertIn("/anarray", self.h5file) + self.assertEqual(self.h5file.root.anarray.title, "Array title") + + # Redo the operation + self.h5file.redo() + + # Check that array has gone again + self.assertNotIn("/anarray", self.h5file) + + def test00b(self): + """Checking remove_node (over several Leaves)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00b..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Delete a couple of arrays + self.h5file.remove_node("/anarray") + self.h5file.remove_node("/agroup/anarray2") + + # Now undo the past operation + self.h5file.undo() + + # Check that arrays has come into life + self.assertIn("/anarray", self.h5file) + self.assertIn("/agroup/anarray2", self.h5file) + self.assertEqual(self.h5file.root.anarray.title, "Array title") + self.assertEqual( + self.h5file.root.agroup.anarray2.title, "Array title 2" + ) + + # Redo the operation + self.h5file.redo() + + # Check that arrays has disappeared again + self.assertNotIn("/anarray", self.h5file) + self.assertNotIn("/agroup/anarray2", self.h5file) + + def test00c(self): + """Checking remove_node (over Tables)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00c..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Create a new array + self.h5file.remove_node("/table") + + # Now undo the past operation + self.h5file.undo() + + # Check that table2 does not exist in the object tree + self.assertIn("/table", self.h5file) + table = self.h5file.root.table + self.assertIsNotNone(table.cols.var1.index) + self.assertIsNotNone(table.cols.var2.index) + self.assertIsNotNone(table.cols.var3.index) + self.assertIsNone(table.cols.var4.index) + self.assertEqual(table.cols.var1.index.nelements, minRowIndex) + self.assertEqual(table.cols.var2.index.nelements, minRowIndex) + self.assertEqual(table.cols.var3.index.nelements, minRowIndex) + self.assertEqual(self.h5file.root.table.title, "Indexed") + + # Redo the operation + self.h5file.redo() + + # Check that table2 has come back to life in a sane state + self.assertNotIn("/table", self.h5file) + + def test01(self): + """Checking remove_node (over Groups with children)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Delete a group recursively + self.h5file.remove_node("/agroup", recursive=1) + + # Now undo the past operation + self.h5file.undo() + + # Check that parent and children has come into life in a sane state + self.assertIn("/agroup", self.h5file) + self.assertIn("/agroup/anarray1", self.h5file) + self.assertIn("/agroup/anarray2", self.h5file) + self.assertIn("/agroup/agroup3", self.h5file) + self.assertEqual(self.h5file.root.agroup._v_title, "Group title") + + # Redo the operation + self.h5file.redo() + + # Check that parent and children are not reachable + self.assertNotIn("/agroup", self.h5file) + self.assertNotIn("/agroup/anarray1", self.h5file) + self.assertNotIn("/agroup/anarray2", self.h5file) + self.assertNotIn("/agroup/agroup3", self.h5file) + + def test01b(self): + """Checking remove_node (over Groups with children 2)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # Remove a couple of groups + self.h5file.remove_node("/agroup", recursive=1) + self.h5file.remove_node("/agroup2") + + # Now undo the past operation + self.h5file.undo() + + # Check that they does exist in the object tree + self.assertIn("/agroup", self.h5file) + self.assertIn("/agroup2", self.h5file) + + # Check that children are reachable + self.assertIn("/agroup/anarray1", self.h5file) + self.assertIn("/agroup/anarray2", self.h5file) + self.assertIn("/agroup/agroup3", self.h5file) + self.assertEqual(self.h5file.root.agroup._v_title, "Group title") + + # Redo the operation + self.h5file.redo() + + # Check that groups does not exist again + self.assertNotIn("/agroup", self.h5file) + self.assertNotIn("/agroup2", self.h5file) + + # Check that children are not reachable + self.assertNotIn("/agroup/anarray1", self.h5file) + self.assertNotIn("/agroup/anarray2", self.h5file) + self.assertNotIn("/agroup/agroup3", self.h5file) + + +class CopyNodeTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Tests for copy_node and copy_children operations""" + + def setUp(self): + super().setUp() + + h5file = self.h5file + root = h5file.root + + # Create an array + h5file.create_array(root, "array", [1, 2], title="Title example") + + # Create another array object + h5file.create_array(root, "anarray", [1], "Array title") + + # Create a group object + group = h5file.create_group(root, "agroup", "Group title") + + # Create a couple of objects there + h5file.create_array(group, "anarray1", [2], "Array title 1") + h5file.create_array(group, "anarray2", [2], "Array title 2") + + # Create a lonely group in first level + h5file.create_group(root, "agroup2", "Group title 2") + + # Create a new group in the second level + h5file.create_group(group, "agroup3", "Group title 3") + + # Create a table in root + populateTable(self.h5file.root, "table") + + def test00_copyLeaf(self): + """Checking copy_node (over Leaves)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00_copyLeaf..." % self.__class__.__name__) + + # Enable undo/redo. + self.h5file.enable_undo() + + # /anarray => /agroup/agroup3/ + new_node = self.h5file.copy_node("/anarray", "/agroup/agroup3") + + # Undo the copy. + self.h5file.undo() + + # Check that the copied node does not exist in the object tree. + self.assertNotIn("/agroup/agroup3/anarray", self.h5file) + + # Redo the copy. + self.h5file.redo() + + # Check that the copied node exists again in the object tree. + self.assertIn("/agroup/agroup3/anarray", self.h5file) + self.assertIs(self.h5file.root.agroup.agroup3.anarray, new_node) + + def test00b_copyTable(self): + """Checking copy_node (over Tables)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00b_copyTable..." % self.__class__.__name__) + + # open the do/undo + self.h5file.enable_undo() + + # /table => /agroup/agroup3/ + warnings.filterwarnings("ignore", category=UserWarning) + table = self.h5file.copy_node( + "/table", "/agroup/agroup3", propindexes=True + ) + warnings.filterwarnings("default", category=UserWarning) + self.assertIn("/agroup/agroup3/table", self.h5file) + + table = self.h5file.root.agroup.agroup3.table + self.assertEqual(table.title, "Indexed") + self.assertIsNotNone(table.cols.var1.index) + self.assertIsNotNone(table.cols.var2.index) + self.assertIsNotNone(table.cols.var3.index) + self.assertEqual(table.cols.var1.index.nelements, minRowIndex) + self.assertEqual(table.cols.var2.index.nelements, minRowIndex) + self.assertEqual(table.cols.var3.index.nelements, minRowIndex) + self.assertIsNone(table.cols.var4.index) + + # Now undo the past operation + self.h5file.undo() + table = self.h5file.root.table + self.assertIsNotNone(table.cols.var1.index) + self.assertIsNotNone(table.cols.var2.index) + self.assertIsNotNone(table.cols.var3.index) + self.assertIsNone(table.cols.var4.index) + self.assertEqual(table.cols.var1.index.nelements, minRowIndex) + self.assertEqual(table.cols.var2.index.nelements, minRowIndex) + self.assertEqual(table.cols.var3.index.nelements, minRowIndex) + + # Check that the copied node does not exist in the object tree. + self.assertNotIn("/agroup/agroup3/table", self.h5file) + + # Redo the operation + self.h5file.redo() + + # Check that table has come back to life in a sane state + self.assertIn("/table", self.h5file) + self.assertIn("/agroup/agroup3/table", self.h5file) + table = self.h5file.root.agroup.agroup3.table + self.assertEqual(table.title, "Indexed") + self.assertIsNotNone(table.cols.var1.index) + self.assertIsNotNone(table.cols.var2.index) + self.assertIsNotNone(table.cols.var3.index) + self.assertEqual(table.cols.var1.index.nelements, minRowIndex) + self.assertEqual(table.cols.var2.index.nelements, minRowIndex) + self.assertEqual(table.cols.var3.index.nelements, minRowIndex) + self.assertIsNone(table.cols.var4.index) + + def test01_copyGroup(self): + """Copying a group (recursively).""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_copyGroup..." % self.__class__.__name__) + + # Enable undo/redo. + self.h5file.enable_undo() + + # /agroup => /acopy + new_node = self.h5file.copy_node( + "/agroup", newname="acopy", recursive=True + ) + + # Undo the copy. + self.h5file.undo() + + # Check that the copied node does not exist in the object tree. + self.assertNotIn("/acopy", self.h5file) + self.assertNotIn("/acopy/anarray1", self.h5file) + self.assertNotIn("/acopy/anarray2", self.h5file) + self.assertNotIn("/acopy/agroup3", self.h5file) + + # Redo the copy. + self.h5file.redo() + + # Check that the copied node exists again in the object tree. + self.assertIn("/acopy", self.h5file) + self.assertIn("/acopy/anarray1", self.h5file) + self.assertIn("/acopy/anarray2", self.h5file) + self.assertIn("/acopy/agroup3", self.h5file) + self.assertIs(self.h5file.root.acopy, new_node) + + def test02_copyLeafOverwrite(self): + """Copying a leaf, overwriting destination.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test02_copyLeafOverwrite..." + % self.__class__.__name__ + ) + + # Enable undo/redo. + self.h5file.enable_undo() + + # /anarray => /agroup/agroup + oldNode = self.h5file.root.agroup + new_node = self.h5file.copy_node( + "/anarray", newname="agroup", overwrite=True + ) + + # Undo the copy. + self.h5file.undo() + + # Check that the copied node does not exist in the object tree. + # Check that the overwritten node exists again in the object tree. + self.assertIs(self.h5file.root.agroup, oldNode) + + # Redo the copy. + self.h5file.redo() + + # Check that the copied node exists again in the object tree. + # Check that the overwritten node does not exist in the object tree. + self.assertIs(self.h5file.root.agroup, new_node) + + def test03_copyChildren(self): + """Copying the children of a group""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test03_copyChildren..." % self.__class__.__name__ + ) + + # Enable undo/redo. + self.h5file.enable_undo() + + # /agroup/* => /agroup/ + self.h5file.copy_children("/agroup", "/agroup2", recursive=True) + + # Undo the copy. + self.h5file.undo() + + # Check that the copied nodes do not exist in the object tree. + self.assertNotIn("/agroup2/anarray1", self.h5file) + self.assertNotIn("/agroup2/anarray2", self.h5file) + self.assertNotIn("/agroup2/agroup3", self.h5file) + + # Redo the copy. + self.h5file.redo() + + # Check that the copied nodes exist again in the object tree. + self.assertIn("/agroup2/anarray1", self.h5file) + self.assertIn("/agroup2/anarray2", self.h5file) + self.assertIn("/agroup2/agroup3", self.h5file) + + +class ComplexTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Tests for a mix of all operations""" + + def setUp(self): + super().setUp() + + h5file = self.h5file + root = h5file.root + + # Create an array + h5file.create_array(root, "array", [1, 2], title="Title example") + + # Create another array object + h5file.create_array(root, "anarray", [1], "Array title") + + # Create a group object + group = h5file.create_group(root, "agroup", "Group title") + + # Create a couple of objects there + h5file.create_array(group, "anarray1", [2], "Array title 1") + h5file.create_array(group, "anarray2", [2], "Array title 2") + + # Create a lonely group in first level + h5file.create_group(root, "agroup2", "Group title 2") + + # Create a new group in the second level + h5file.create_group(group, "agroup3", "Group title 3") + + def test00(self): + """Mix of create_array, create_group, renameNone, move_node, + remove_node, copy_node and copy_children.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00..." % self.__class__.__name__) + + # Enable undo/redo. + self.h5file.enable_undo() + + # Create an array + self.h5file.create_array( + self.h5file.root, "anarray3", [1], "Array title 3" + ) + # Create a group + self.h5file.create_group(self.h5file.root, "agroup3", "Group title 3") + + # /anarray => /agroup/agroup3/ + new_node = self.h5file.copy_node("/anarray3", "/agroup/agroup3") + new_node = self.h5file.copy_children( + "/agroup", "/agroup3", recursive=1 + ) + + # rename anarray + self.h5file.rename_node("/anarray", "anarray4") + + # Move anarray + new_node = self.h5file.copy_node("/anarray3", "/agroup") + + # Remove anarray4 + self.h5file.remove_node("/anarray4") + + # Undo the actions + self.h5file.undo() + self.assertNotIn("/anarray4", self.h5file) + self.assertNotIn("/anarray3", self.h5file) + self.assertNotIn("/agroup/agroup3/anarray3", self.h5file) + self.assertNotIn("/agroup3", self.h5file) + self.assertNotIn("/anarray4", self.h5file) + self.assertIn("/anarray", self.h5file) + + # Redo the actions + self.h5file.redo() + + # Check that the copied node exists again in the object tree. + self.assertIn("/agroup/agroup3/anarray3", self.h5file) + self.assertIn("/agroup/anarray3", self.h5file) + self.assertIn("/agroup3/agroup3/anarray3", self.h5file) + self.assertNotIn("/agroup3/anarray3", self.h5file) + self.assertIs(self.h5file.root.agroup.anarray3, new_node) + self.assertNotIn("/anarray", self.h5file) + self.assertNotIn("/anarray4", self.h5file) + + def test01(self): + """Test with multiple generations (Leaf case)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01..." % self.__class__.__name__) + + # Enable undo/redo. + self.h5file.enable_undo() + + # remove /anarray + self.h5file.remove_node("/anarray") + + # Create an array in the same place + self.h5file.create_array( + self.h5file.root, "anarray", [2], "Array title 2" + ) + # remove the array again + self.h5file.remove_node("/anarray") + + # Create an array + self.h5file.create_array( + self.h5file.root, "anarray", [3], "Array title 3" + ) + # remove the array again + self.h5file.remove_node("/anarray") + + # Create an array + self.h5file.create_array( + self.h5file.root, "anarray", [4], "Array title 4" + ) + # Undo the actions + self.h5file.undo() + + # Check that /anarray is in the correct state before redoing + self.assertEqual(self.h5file.root.anarray.title, "Array title") + self.assertEqual(self.h5file.root.anarray[:], [1]) + + # Redo the actions + self.h5file.redo() + self.assertEqual(self.h5file.root.anarray.title, "Array title 4") + self.assertEqual(self.h5file.root.anarray[:], [4]) + + def test02(self): + """Test with multiple generations (Group case)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02..." % self.__class__.__name__) + + # Enable undo/redo. + self.h5file.enable_undo() + + # remove /agroup + self.h5file.remove_node("/agroup2") + + # Create a group in the same place + self.h5file.create_group(self.h5file.root, "agroup2", "Group title 22") + + # remove the group + self.h5file.remove_node("/agroup2") + + # Create a group + self.h5file.create_group(self.h5file.root, "agroup2", "Group title 3") + + # remove the group + self.h5file.remove_node("/agroup2") + + # Create a group + self.h5file.create_group(self.h5file.root, "agroup2", "Group title 4") + + # Create a child group + self.h5file.create_group( + self.h5file.root.agroup2, "agroup5", "Group title 5" + ) + + # Undo the actions + self.h5file.undo() + + # Check that /agroup is in the state before enabling do/undo + self.assertEqual(self.h5file.root.agroup2._v_title, "Group title 2") + self.assertIn("/agroup2", self.h5file) + + # Redo the actions + self.h5file.redo() + self.assertEqual(self.h5file.root.agroup2._v_title, "Group title 4") + self.assertEqual( + self.h5file.root.agroup2.agroup5._v_title, "Group title 5" + ) + + def test03(self): + """Test with multiple generations (Group case, recursive remove)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03..." % self.__class__.__name__) + + # Enable undo/redo. + self.h5file.enable_undo() + + # remove /agroup + self.h5file.remove_node("/agroup", recursive=1) + + # Create a group in the same place + self.h5file.create_group(self.h5file.root, "agroup", "Group title 2") + + # remove the group + self.h5file.remove_node("/agroup") + + # Create a group + self.h5file.create_group(self.h5file.root, "agroup", "Group title 3") + + # remove the group + self.h5file.remove_node("/agroup") + + # Create a group + self.h5file.create_group(self.h5file.root, "agroup", "Group title 4") + + # Create a child group + self.h5file.create_group( + self.h5file.root.agroup, "agroup5", "Group title 5" + ) + # Undo the actions + self.h5file.undo() + + # Check that /agroup is in the state before enabling do/undo + self.assertIn("/agroup", self.h5file) + self.assertEqual(self.h5file.root.agroup._v_title, "Group title") + self.assertIn("/agroup/anarray1", self.h5file) + self.assertIn("/agroup/anarray2", self.h5file) + self.assertIn("/agroup/agroup3", self.h5file) + self.assertNotIn("/agroup/agroup5", self.h5file) + + # Redo the actions + self.h5file.redo() + self.assertIn("/agroup", self.h5file) + self.assertEqual(self.h5file.root.agroup._v_title, "Group title 4") + self.assertIn("/agroup/agroup5", self.h5file) + self.assertEqual( + self.h5file.root.agroup.agroup5._v_title, "Group title 5" + ) + + def test03b(self): + """Test with multiple generations (Group case, recursive remove, + case 2)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03b..." % self.__class__.__name__) + + # Enable undo/redo. + self.h5file.enable_undo() + + # Create a new group with a child + self.h5file.create_group(self.h5file.root, "agroup3", "Group title 3") + self.h5file.create_group( + self.h5file.root.agroup3, "agroup4", "Group title 4" + ) + + # remove /agroup3 + self.h5file.remove_node("/agroup3", recursive=1) + + # Create a group in the same place + self.h5file.create_group(self.h5file.root, "agroup3", "Group title 4") + + # Undo the actions + self.h5file.undo() + + # Check that /agroup is in the state before enabling do/undo + self.assertNotIn("/agroup3", self.h5file) + + # Redo the actions + self.h5file.redo() + self.assertEqual(self.h5file.root.agroup3._v_title, "Group title 4") + self.assertIn("/agroup3", self.h5file) + self.assertNotIn("/agroup/agroup4", self.h5file) + + +class AttributesTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Tests for operation on attributes""" + + def setUp(self): + super().setUp() + + # Create an array. + array = self.h5file.create_array("/", "array", [1, 2]) + + # Set some attributes on it. + attrs = array.attrs + attrs.attr_1 = 10 + attrs.attr_2 = 20 + attrs.attr_3 = 30 + + def test00_setAttr(self): + """Setting a nonexistent attribute""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00_setAttr..." % self.__class__.__name__) + + array = self.h5file.root.array + attrs = array.attrs + + self.h5file.enable_undo() + setattr(attrs, "attr_0", 0) + self.assertIn("attr_0", attrs) + self.assertEqual(attrs.attr_0, 0) + self.h5file.undo() + self.assertNotIn("attr_0", attrs) + self.h5file.redo() + self.assertIn("attr_0", attrs) + self.assertEqual(attrs.attr_0, 0) + + def test01_setAttrExisting(self): + """Setting an existing attribute""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01_setAttrExisting..." + % self.__class__.__name__ + ) + + array = self.h5file.root.array + attrs = array.attrs + + self.h5file.enable_undo() + setattr(attrs, "attr_1", 11) + self.assertIn("attr_1", attrs) + self.assertEqual(attrs.attr_1, 11) + self.h5file.undo() + self.assertIn("attr_1", attrs) + self.assertEqual(attrs.attr_1, 10) + self.h5file.redo() + self.assertIn("attr_1", attrs) + self.assertEqual(attrs.attr_1, 11) + + def test02_delAttr(self): + """Removing an attribute""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_delAttr..." % self.__class__.__name__) + + array = self.h5file.root.array + attrs = array.attrs + + self.h5file.enable_undo() + delattr(attrs, "attr_1") + self.assertNotIn("attr_1", attrs) + self.h5file.undo() + self.assertIn("attr_1", attrs) + self.assertEqual(attrs.attr_1, 10) + self.h5file.redo() + self.assertNotIn("attr_1", attrs) + + def test03_copyNodeAttrs(self): + """Copying an attribute set""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test03_copyNodeAttrs..." % self.__class__.__name__ + ) + + rattrs = self.h5file.root._v_attrs + rattrs.attr_0 = 0 + rattrs.attr_1 = 100 + + array = self.h5file.root.array + attrs = array.attrs + + self.h5file.enable_undo() + attrs._f_copy(self.h5file.root) + self.assertEqual(rattrs.attr_0, 0) + self.assertEqual(rattrs.attr_1, 10) + self.assertEqual(rattrs.attr_2, 20) + self.assertEqual(rattrs.attr_3, 30) + self.h5file.undo() + self.assertEqual(rattrs.attr_0, 0) + self.assertEqual(rattrs.attr_1, 100) + self.assertNotIn("attr_2", rattrs) + self.assertNotIn("attr_3", rattrs) + self.h5file.redo() + self.assertEqual(rattrs.attr_0, 0) + self.assertEqual(rattrs.attr_1, 10) + self.assertEqual(rattrs.attr_2, 20) + self.assertEqual(rattrs.attr_3, 30) + + def test04_replaceNode(self): + """Replacing a node with a rewritten attribute""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_replaceNode..." % self.__class__.__name__) + + array = self.h5file.root.array + attrs = array.attrs + + self.h5file.enable_undo() + attrs.attr_1 = 11 + self.h5file.remove_node("/array") + arr = self.h5file.create_array("/", "array", [1]) + arr.attrs.attr_1 = 12 + self.h5file.undo() + self.assertIn("attr_1", self.h5file.root.array.attrs) + self.assertEqual(self.h5file.root.array.attrs.attr_1, 10) + self.h5file.redo() + self.assertIn("attr_1", self.h5file.root.array.attrs) + self.assertEqual(self.h5file.root.array.attrs.attr_1, 12) + + +class NotLoggedTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test not logged nodes.""" + + class NotLoggedArray(tb.node.NotLoggedMixin, tb.Array): + pass + + def test00_hierarchy(self): + """Performing hierarchy operations on a not logged node.""" + + self.h5file.create_group("/", "tgroup") + self.h5file.enable_undo() + + # Node creation is not undone. + arr = self.NotLoggedArray( + self.h5file.root, "test", [1], self._getMethodName() + ) + self.h5file.undo() + self.assertIn("/test", self.h5file) + + # Node movement is not undone. + arr.move("/tgroup") + self.h5file.undo() + self.assertIn("/tgroup/test", self.h5file) + + # Node removal is not undone. + arr.remove() + self.h5file.undo() + self.assertNotIn("/tgroup/test", self.h5file) + + def test01_attributes(self): + """Performing attribute operations on a not logged node.""" + + arr = self.NotLoggedArray( + self.h5file.root, "test", [1], self._getMethodName() + ) + self.h5file.enable_undo() + + # Attribute creation is not undone. + arr._v_attrs.foo = "bar" + self.h5file.undo() + self.assertEqual(arr._v_attrs.foo, "bar") + + # Attribute change is not undone. + arr._v_attrs.foo = "baz" + self.h5file.undo() + self.assertEqual(arr._v_attrs.foo, "baz") + + # Attribute removal is not undone. + del arr._v_attrs.foo + self.h5file.undo() + self.assertRaises(AttributeError, getattr, arr._v_attrs, "foo") + + +class CreateParentsTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test the ``createparents`` flag.""" + + def setUp(self): + super().setUp() + g1 = self.h5file.create_group("/", "g1") + self.h5file.create_group(g1, "g2") + + def existing(self, paths): + """Return a set of the existing paths in `paths`.""" + return frozenset(path for path in paths if path in self.h5file) + + def basetest(self, doit, pre, post): + pre() + self.h5file.enable_undo() + + paths = ["/g1", "/g1/g2", "/g1/g2/g3", "/g1/g2/g3/g4"] + for newpath in paths: + before = self.existing(paths) + doit(newpath) + after = self.existing(paths) + self.assertTrue(after.issuperset(before)) + + self.h5file.undo() + post(newpath) + after = self.existing(paths) + self.assertEqual(after, before) + + def test00_create(self): + """Test creating a node.""" + + def pre(): + pass + + def doit(newpath): + self.h5file.create_array(newpath, "array", [1], createparents=True) + self.assertIn(tb.path.join_path(newpath, "array"), self.h5file) + + def post(newpath): + self.assertNotIn(tb.path.join_path(newpath, "array"), self.h5file) + + self.basetest(doit, pre, post) + + def test01_move(self): + """Test moving a node.""" + + def pre(): + self.h5file.create_array("/", "array", [1]) + + def doit(newpath): + self.h5file.move_node("/array", newpath, createparents=True) + self.assertNotIn("/array", self.h5file) + self.assertIn(tb.path.join_path(newpath, "array"), self.h5file) + + def post(newpath): + self.assertIn("/array", self.h5file) + self.assertNotIn(tb.path.join_path(newpath, "array"), self.h5file) + + self.basetest(doit, pre, post) + + def test02_copy(self): + """Test copying a node.""" + + def pre(): + self.h5file.create_array("/", "array", [1]) + + def doit(newpath): + self.h5file.copy_node("/array", newpath, createparents=True) + self.assertIn(tb.path.join_path(newpath, "array"), self.h5file) + + def post(newpath): + self.assertNotIn(tb.path.join_path(newpath, "array"), self.h5file) + + self.basetest(doit, pre, post) + + def test03_copyChildren(self): + """Test copying the children of a group.""" + + def pre(): + g = self.h5file.create_group("/", "group") + self.h5file.create_array(g, "array1", [1]) + self.h5file.create_array(g, "array2", [1]) + + def doit(newpath): + self.h5file.copy_children("/group", newpath, createparents=True) + self.assertIn(tb.path.join_path(newpath, "array1"), self.h5file) + self.assertIn(tb.path.join_path(newpath, "array2"), self.h5file) + + def post(newpath): + self.assertNotIn(tb.path.join_path(newpath, "array1"), self.h5file) + self.assertNotIn(tb.path.join_path(newpath, "array2"), self.h5file) + + self.basetest(doit, pre, post) + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + # common.heavy = 1 # uncomment this only for testing purposes + + for n in range(niter): + theSuite.addTest(common.make_suite(BasicTestCase)) + theSuite.addTest(common.make_suite(PersistenceTestCase)) + theSuite.addTest(common.make_suite(CreateArrayTestCase)) + theSuite.addTest(common.make_suite(CreateGroupTestCase)) + theSuite.addTest(common.make_suite(RenameNodeTestCase)) + theSuite.addTest(common.make_suite(MoveNodeTestCase)) + theSuite.addTest(common.make_suite(RemoveNodeTestCase)) + theSuite.addTest(common.make_suite(CopyNodeTestCase)) + theSuite.addTest(common.make_suite(AttributesTestCase)) + theSuite.addTest(common.make_suite(ComplexTestCase)) + theSuite.addTest(common.make_suite(NotLoggedTestCase)) + theSuite.addTest(common.make_suite(CreateParentsTestCase)) + if common.heavy: + pass + + return theSuite + + +if __name__ == "__main__": + import sys + + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_earray.py b/venv/Lib/site-packages/tables/tests/test_earray.py new file mode 100644 index 0000000..62e2543 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_earray.py @@ -0,0 +1,3240 @@ +import sys +from pathlib import Path + +import numpy as np + +import tables as tb +from tables.tests import common + + +class BasicTestCase(common.TempFileMixin, common.PyTablesTestCase): + # Default values + obj = None + flavor = "numpy" + type = "int32" + dtype = "int32" + shape = (2, 0) + start = 0 + stop = 10 + step = 1 + length = 1 + chunksize = 5 + nappends = 10 + compress = 0 + complib = "zlib" # Default compression library + shuffle = 0 + fletcher32 = 0 + reopen = 1 # Tells whether the file has to be reopened on each test or not + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + self.rootgroup = self.h5file.root + self.populateFile() + if self.reopen: + # Close the file + self.h5file.close() + + def populateFile(self): + group = self.rootgroup + obj = self.obj + if obj is None: + if self.type == "string": + atom = tb.StringAtom(itemsize=self.length) + else: + atom = tb.Atom.from_type(self.type) + else: + atom = None + title = self.__class__.__name__ + filters = tb.Filters( + complevel=self.compress, + complib=self.complib, + shuffle=self.shuffle, + fletcher32=self.fletcher32, + ) + earray = self.h5file.create_earray( + group, + "earray1", + atom=atom, + shape=self.shape, + title=title, + filters=filters, + expectedrows=1, + obj=obj, + ) + earray.flavor = self.flavor + + # Fill it with rows + self.rowshape = list(earray.shape) + if obj is not None: + self.rowshape[0] = 0 + self.objsize = self.length + for i in self.rowshape: + if i != 0: + self.objsize *= i + self.extdim = earray.extdim + self.objsize *= self.chunksize + self.rowshape[earray.extdim] = self.chunksize + + if self.type == "string": + object = np.ndarray( + buffer=b"a" * self.objsize, + shape=self.rowshape, + dtype="S%s" % earray.atom.itemsize, + ) + else: + object = np.arange(self.objsize, dtype=earray.atom.dtype.base) + object.shape = self.rowshape + + if common.verbose: + if self.flavor == "numpy": + print("Object to append -->", object) + else: + print("Object to append -->", repr(object)) + for i in range(self.nappends): + if self.type == "string": + earray.append(object) + else: + earray.append(object * i) + + def _get_shape(self): + if self.shape is not None: + shape = self.shape + else: + shape = np.asarray(self.obj).shape + + return shape + + def test00_attributes(self): + if self.reopen: + self._reopen() + obj = self.h5file.get_node("/earray1") + + shape = self._get_shape() + shape = list(shape) + shape[self.extdim] = self.chunksize * self.nappends + if self.obj is not None: + shape[self.extdim] += len(self.obj) + shape = tuple(shape) + + self.assertEqual(obj.flavor, self.flavor) + self.assertEqual(obj.shape, shape) + self.assertEqual(obj.ndim, len(shape)) + self.assertEqual(obj.nrows, shape[self.extdim]) + self.assertEqual(obj.atom.type, self.type) + + def test01_iterEArray(self): + """Checking enlargeable array iterator.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_iterEArray..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + if self.reopen: + self._reopen() + earray = self.h5file.get_node("/earray1") + + # Choose a small value for buffer size + earray.nrowsinbuf = 3 + if common.verbose: + print("EArray descr:", repr(earray)) + print("shape of read array ==>", earray.shape) + print("reopening?:", self.reopen) + + # Build the array to do comparisons + if self.type == "string": + object_ = np.ndarray( + buffer=b"a" * self.objsize, + shape=self.rowshape, + dtype="S%s" % earray.atom.itemsize, + ) + else: + object_ = np.arange(self.objsize, dtype=earray.atom.dtype.base) + object_.shape = self.rowshape + object_ = object_.swapaxes(earray.extdim, 0) + + if self.obj is not None: + initialrows = len(self.obj) + else: + initialrows = 0 + + shape = self._get_shape() + + # Read all the array + for idx, row in enumerate(earray): + if idx < initialrows: + self.assertTrue( + common.allequal( + row, np.asarray(self.obj[idx]), self.flavor + ) + ) + continue + + chunk = int((earray.nrow - initialrows) % self.chunksize) + if chunk == 0: + if self.type == "string": + object__ = object_ + else: + i = int(earray.nrow - initialrows) + object__ = object_ * (i // self.chunksize) + + object = object__[chunk] + # The next adds much more verbosity + if common.verbose and 0: + print("number of row ==>", earray.nrow) + if hasattr(object, "shape"): + print("shape should look as:", object.shape) + print("row in earray ==>", repr(row)) + print("Should look like ==>", repr(object)) + + self.assertEqual( + initialrows + self.nappends * self.chunksize, earray.nrows + ) + self.assertTrue(common.allequal(row, object, self.flavor)) + if hasattr(row, "shape"): + self.assertEqual(len(row.shape), len(shape) - 1) + else: + # Scalar case + self.assertEqual(len(shape), 1) + + # Check filters: + if self.compress != earray.filters.complevel and common.verbose: + print("Error in compress. Class:", self.__class__.__name__) + print("self, earray:", self.compress, earray.filters.complevel) + self.assertEqual(earray.filters.complevel, self.compress) + if self.compress > 0 and tb.which_lib_version(self.complib): + # Some libraries like Blosc support different compressors, + # specified after ":". + self.assertEqual( + earray.filters.complib.split(":")[0], self.complib + ) + if self.shuffle != earray.filters.shuffle and common.verbose: + print("Error in shuffle. Class:", self.__class__.__name__) + print("self, earray:", self.shuffle, earray.filters.shuffle) + self.assertEqual(self.shuffle, earray.filters.shuffle) + if self.fletcher32 != earray.filters.fletcher32 and common.verbose: + print("Error in fletcher32. Class:", self.__class__.__name__) + print( + "self, earray:", self.fletcher32, earray.filters.fletcher32 + ) + self.assertEqual(self.fletcher32, earray.filters.fletcher32) + + def test02_sssEArray(self): + """Checking enlargeable array iterator with (start, stop, step)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_sssEArray..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + if self.reopen: + self._reopen() + earray = self.h5file.get_node("/earray1") + + # Choose a small value for buffer size + earray.nrowsinbuf = 3 + if common.verbose: + print("EArray descr:", repr(earray)) + print("shape of read array ==>", earray.shape) + print("reopening?:", self.reopen) + + # Build the array to do comparisons + if self.type == "string": + object_ = np.ndarray( + buffer=b"a" * self.objsize, + shape=self.rowshape, + dtype="S%s" % earray.atom.itemsize, + ) + else: + object_ = np.arange(self.objsize, dtype=earray.atom.dtype.base) + object_.shape = self.rowshape + object_ = object_.swapaxes(earray.extdim, 0) + + if self.obj is not None: + initialrows = len(self.obj) + else: + initialrows = 0 + + shape = self._get_shape() + + # Read all the array + for idx, row in enumerate( + earray.iterrows(start=self.start, stop=self.stop, step=self.step) + ): + if idx < initialrows: + self.assertTrue( + common.allequal( + row, np.asarray(self.obj[idx]), self.flavor + ) + ) + continue + + if self.chunksize == 1: + index = 0 + else: + index = int((earray.nrow - initialrows) % self.chunksize) + + if self.type == "string": + object__ = object_ + else: + i = int(earray.nrow - initialrows) + object__ = object_ * (i // self.chunksize) + object = object__[index] + + # The next adds much more verbosity + if common.verbose and 0: + print("number of row ==>", earray.nrow) + if hasattr(object, "shape"): + print("shape should look as:", object.shape) + print("row in earray ==>", repr(row)) + print("Should look like ==>", repr(object)) + + self.assertEqual( + initialrows + self.nappends * self.chunksize, earray.nrows + ) + self.assertTrue(common.allequal(row, object, self.flavor)) + if hasattr(row, "shape"): + self.assertEqual(len(row.shape), len(shape) - 1) + else: + # Scalar case + self.assertEqual(len(shape), 1) + + def test03_readEArray(self): + """Checking read() of enlargeable arrays.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_readEArray..." % self.__class__.__name__) + + # This conversion made just in case indices are numpy scalars + if self.start is not None: + self.start = int(self.start) + if self.stop is not None: + self.stop = int(self.stop) + if self.step is not None: + self.step = int(self.step) + + # Create an instance of an HDF5 Table + if self.reopen: + self._reopen() + earray = self.h5file.get_node("/earray1") + + # Choose a small value for buffer size + earray.nrowsinbuf = 3 + if common.verbose: + print("EArray descr:", repr(earray)) + print("shape of read array ==>", earray.shape) + print("reopening?:", self.reopen) + + # Build the array to do comparisons + if self.type == "string": + object_ = np.ndarray( + buffer=b"a" * self.objsize, + shape=self.rowshape, + dtype="S%s" % earray.atom.itemsize, + ) + else: + object_ = np.arange(self.objsize, dtype=earray.atom.dtype.base) + object_.shape = self.rowshape + object_ = object_.swapaxes(earray.extdim, 0) + + if self.obj is not None: + initialrows = len(self.obj) + else: + initialrows = 0 + + rowshape = self.rowshape + rowshape[self.extdim] *= self.nappends + initialrows + if self.type == "string": + object__ = np.empty( + shape=rowshape, dtype=f"S{earray.atom.itemsize}" + ) + else: + object__ = np.empty(shape=rowshape, dtype=self.dtype) + + object__ = object__.swapaxes(0, self.extdim) + + if initialrows: + object__[0:initialrows] = self.obj + + for i in range(self.nappends): + j = initialrows + i * self.chunksize + if self.type == "string": + object__[j : j + self.chunksize] = object_ + else: + object__[j : j + self.chunksize] = object_ * i + + stop = self.stop + + if self.nappends: + # stop == None means read only the element designed by start + # (in read() contexts) + if self.stop is None: + if self.start == -1: # corner case + stop = earray.nrows + else: + stop = self.start + 1 + # Protection against number of elements less than existing + # if rowshape[self.extdim] < self.stop or self.stop == 0: + if rowshape[self.extdim] < stop: + # self.stop == 0 means last row only in read() + # and not in [::] slicing notation + stop = rowshape[self.extdim] + # do a copy() in order to ensure that len(object._data) + # actually do a measure of its length + # object = object__[self.start:stop:self.step].copy() + object = object__[self.start : self.stop : self.step].copy() + # Swap the axes again to have normal ordering + if self.flavor == "numpy": + object = object.swapaxes(0, self.extdim) + else: + object = np.empty(shape=self.shape, dtype=self.dtype) + + # Read all the array + try: + row = earray.read(self.start, self.stop, self.step) + except IndexError: + row = np.empty(shape=self.shape, dtype=self.dtype) + + if common.verbose: + if hasattr(object, "shape"): + print("shape should look as:", object.shape) + print("Object read ==>", repr(row)) + print("Should look like ==>", repr(object)) + + self.assertEqual( + initialrows + self.nappends * self.chunksize, earray.nrows + ) + self.assertTrue(common.allequal(row, object, self.flavor)) + + shape = self._get_shape() + if hasattr(row, "shape"): + self.assertEqual(len(row.shape), len(shape)) + if self.flavor == "numpy": + self.assertEqual(row.itemsize, earray.atom.itemsize) + else: + # Scalar case + self.assertEqual(len(shape), 1) + + def test03_readEArray_out_argument(self): + """Checking read() of enlargeable arrays.""" + + # This conversion made just in case indices are numpy scalars + if self.start is not None: + self.start = int(self.start) + if self.stop is not None: + self.stop = int(self.stop) + if self.step is not None: + self.step = int(self.step) + + # Create an instance of an HDF5 Table + if self.reopen: + self._reopen() + earray = self.h5file.get_node("/earray1") + + # Choose a small value for buffer size + earray.nrowsinbuf = 3 + # Build the array to do comparisons + if self.type == "string": + object_ = np.ndarray( + buffer=b"a" * self.objsize, + shape=self.rowshape, + dtype="S%s" % earray.atom.itemsize, + ) + else: + object_ = np.arange(self.objsize, dtype=earray.atom.dtype.base) + object_.shape = self.rowshape + object_ = object_.swapaxes(earray.extdim, 0) + + if self.obj is not None: + initialrows = len(self.obj) + else: + initialrows = 0 + + rowshape = self.rowshape + rowshape[self.extdim] *= self.nappends + initialrows + if self.type == "string": + object__ = np.empty( + shape=rowshape, dtype=f"S{earray.atom.itemsize}" + ) + else: + object__ = np.empty(shape=rowshape, dtype=self.dtype) + + object__ = object__.swapaxes(0, self.extdim) + + if initialrows: + object__[0:initialrows] = self.obj + + for i in range(self.nappends): + j = initialrows + i * self.chunksize + if self.type == "string": + object__[j : j + self.chunksize] = object_ + else: + object__[j : j + self.chunksize] = object_ * i + + stop = self.stop + + if self.nappends: + # stop == None means read only the element designed by start + # (in read() contexts) + if self.stop is None: + if self.start == -1: # corner case + stop = earray.nrows + else: + stop = self.start + 1 + # Protection against number of elements less than existing + # if rowshape[self.extdim] < self.stop or self.stop == 0: + if rowshape[self.extdim] < stop: + # self.stop == 0 means last row only in read() + # and not in [::] slicing notation + stop = rowshape[self.extdim] + # do a copy() in order to ensure that len(object._data) + # actually do a measure of its length + # object = object__[self.start:stop:self.step].copy() + object = object__[self.start : self.stop : self.step].copy() + # Swap the axes again to have normal ordering + if self.flavor == "numpy": + object = object.swapaxes(0, self.extdim) + else: + object = np.empty(shape=self.shape, dtype=self.dtype) + + # Read all the array + try: + row = np.empty(earray.shape, dtype=earray.atom.dtype) + slice_obj = [slice(None)] * len(earray.shape) + # slice_obj[earray.maindim] = slice(self.start, stop, self.step) + slice_obj[earray.maindim] = slice(self.start, self.stop, self.step) + row = row[tuple(slice_obj)].copy() + earray.read(self.start, self.stop, self.step, out=row) + except IndexError: + row = np.empty(shape=self.shape, dtype=self.dtype) + + if common.verbose: + if hasattr(object, "shape"): + print("shape should look as:", object.shape) + print("Object read ==>", repr(row)) + print("Should look like ==>", repr(object)) + + self.assertEqual( + initialrows + self.nappends * self.chunksize, earray.nrows + ) + self.assertTrue(common.allequal(row, object, self.flavor)) + + shape = self._get_shape() + if hasattr(row, "shape"): + self.assertEqual(len(row.shape), len(shape)) + if self.flavor == "numpy": + self.assertEqual(row.itemsize, earray.atom.itemsize) + else: + # Scalar case + self.assertEqual(len(shape), 1) + + def test04_getitemEArray(self): + """Checking enlargeable array __getitem__ special method.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test04_getitemEArray..." % self.__class__.__name__ + ) + + if not hasattr(self, "slices"): + # If there is not a slices attribute, create it + # This conversion made just in case indices are numpy scalars + if self.start is not None: + self.start = int(self.start) + if self.stop is not None: + self.stop = int(self.stop) + if self.step is not None: + self.step = int(self.step) + self.slices = (slice(self.start, self.stop, self.step),) + + # Create an instance of an HDF5 Table + if self.reopen: + self._reopen() + earray = self.h5file.get_node("/earray1") + + # Choose a small value for buffer size + # earray.nrowsinbuf = 3 # this does not really change the chunksize + if common.verbose: + print("EArray descr:", repr(earray)) + print("shape of read array ==>", earray.shape) + print("reopening?:", self.reopen) + + # Build the array to do comparisons + if self.type == "string": + object_ = np.ndarray( + buffer=b"a" * self.objsize, + shape=self.rowshape, + dtype=f"S{earray.atom.itemsize}", + ) + else: + object_ = np.arange(self.objsize, dtype=earray.atom.dtype.base) + object_.shape = self.rowshape + + object_ = object_.swapaxes(earray.extdim, 0) + + if self.obj is not None: + initialrows = len(self.obj) + else: + initialrows = 0 + + rowshape = self.rowshape + rowshape[self.extdim] *= self.nappends + initialrows + if self.type == "string": + object__ = np.empty( + shape=rowshape, dtype=f"S{earray.atom.itemsize}" + ) + else: + object__ = np.empty(shape=rowshape, dtype=self.dtype) + # Additional conversion for the numpy case + object__ = object__.swapaxes(0, earray.extdim) + + if initialrows: + object__[0:initialrows] = self.obj + + for i in range(self.nappends): + j = initialrows + i * self.chunksize + if self.type == "string": + object__[j : j + self.chunksize] = object_ + else: + object__[j : j + self.chunksize] = object_ * i + + if self.nappends: + # Swap the axes again to have normal ordering + if self.flavor == "numpy": + object__ = object__.swapaxes(0, self.extdim) + else: + object__.swapaxes(0, self.extdim) + # do a copy() in order to ensure that len(object._data) + # actually do a measure of its length + object = object__.__getitem__(self.slices).copy() + else: + object = np.empty(shape=self.shape, dtype=self.dtype) + + # Read all the array + try: + row = earray.__getitem__(self.slices) + except IndexError: + row = np.empty(shape=self.shape, dtype=self.dtype) + + if common.verbose: + print("Object read:\n", repr(row)) + print("Should look like:\n", repr(object)) + if hasattr(object, "shape"): + print("Original object shape:", self.shape) + print("Shape read:", row.shape) + print("shape should look as:", object.shape) + + self.assertEqual( + initialrows + self.nappends * self.chunksize, earray.nrows + ) + self.assertTrue(common.allequal(row, object, self.flavor)) + if not hasattr(row, "shape"): + # Scalar case + self.assertEqual(len(self.shape), 1) + + def test05_setitemEArray(self): + """Checking enlargeable array __setitem__ special method.""" + + if self.__class__.__name__ == "Ellipsis6EArrayTestCase": + # We have a problem with test design here, but I think + # it is not worth the effort to solve it + # F.Alted 2004-10-27 + return + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05_setitemEArray..." % self.__class__.__name__ + ) + + if not hasattr(self, "slices"): + # If there is not a slices attribute, create it + # This conversion made just in case indices are numpy scalars + if self.start is not None: + self.start = int(self.start) + if self.stop is not None: + self.stop = int(self.stop) + if self.step is not None: + self.step = int(self.step) + self.slices = (slice(self.start, self.stop, self.step),) + + # Create an instance of an HDF5 Table + if self.reopen: + self._reopen(mode="a") + earray = self.h5file.get_node("/earray1") + + # Choose a small value for buffer size + # earray.nrowsinbuf = 3 # this does not really change the chunksize + if common.verbose: + print("EArray descr:", repr(earray)) + print("shape of read array ==>", earray.shape) + print("reopening?:", self.reopen) + + # Build the array to do comparisons + if self.type == "string": + object_ = np.ndarray( + buffer=b"a" * self.objsize, + shape=self.rowshape, + dtype=f"S{earray.atom.itemsize}", + ) + else: + object_ = np.arange(self.objsize, dtype=earray.atom.dtype.base) + object_.shape = self.rowshape + + object_ = object_.swapaxes(earray.extdim, 0) + + if self.obj is not None: + initialrows = len(self.obj) + else: + initialrows = 0 + + rowshape = self.rowshape + rowshape[self.extdim] *= self.nappends + initialrows + if self.type == "string": + object__ = np.empty( + shape=rowshape, dtype=f"S{earray.atom.itemsize}" + ) + else: + object__ = np.empty(shape=rowshape, dtype=self.dtype) + # Additional conversion for the numpy case + object__ = object__.swapaxes(0, earray.extdim) + + for i in range(self.nappends): + j = initialrows + i * self.chunksize + if self.type == "string": + object__[j : j + self.chunksize] = object_ + else: + object__[j : j + self.chunksize] = object_ * i + # Modify the earray + # earray[j:j + self.chunksize] = object_ * i + # earray[self.slices] = 1 + + if initialrows: + object__[0:initialrows] = self.obj + + if self.nappends: + # Swap the axes again to have normal ordering + if self.flavor == "numpy": + object__ = object__.swapaxes(0, self.extdim) + else: + object__.swapaxes(0, self.extdim) + # do a copy() in order to ensure that len(object._data) + # actually do a measure of its length + object = object__.__getitem__(self.slices).copy() + else: + object = np.empty(shape=self.shape, dtype=self.dtype) + + if self.flavor == "numpy": + object = np.asarray(object) + + if self.type == "string": + if hasattr(self, "wslice"): + # These sentences should be equivalent + # object[self.wslize] = object[self.wslice].pad("xXx") + # earray[self.wslice] = earray[self.wslice].pad("xXx") + object[self.wslize] = "xXx" + earray[self.wslice] = "xXx" + elif sum(object[self.slices].shape) != 0: + # object[:] = object.pad("xXx") + object[:] = "xXx" + if object.size > 0: + earray[self.slices] = object + else: + if hasattr(self, "wslice"): + object[self.wslice] = object[self.wslice] * 2 + 3 + earray[self.wslice] = earray[self.wslice] * 2 + 3 + elif sum(object[self.slices].shape) != 0: + object = object * 2 + 3 + if np.prod(object.shape) > 0: + earray[self.slices] = earray[self.slices] * 2 + 3 + # Read all the array + row = earray.__getitem__(self.slices) + try: + row = earray.__getitem__(self.slices) + except IndexError: + print("IndexError!") + row = np.empty(shape=self.shape, dtype=self.dtype) + + if common.verbose: + print("Object read:\n", repr(row)) + print("Should look like:\n", repr(object)) + if hasattr(object, "shape"): + print("Original object shape:", self.shape) + print("Shape read:", row.shape) + print("shape should look as:", object.shape) + + self.assertEqual( + initialrows + self.nappends * self.chunksize, earray.nrows + ) + self.assertTrue(common.allequal(row, object, self.flavor)) + if not hasattr(row, "shape"): + # Scalar case + self.assertEqual(len(self.shape), 1) + + +class BasicWriteTestCase(BasicTestCase): + type = "int32" + shape = (0,) + chunksize = 5 + nappends = 10 + step = 1 + # wslice = slice(1,nappends,2) + wslice = 1 # single element case + + +class Basic2WriteTestCase(BasicTestCase): + type = "int32" + dtype = "i4" + shape = (0,) + chunksize = 5 + nappends = 10 + step = 1 + wslice = slice(chunksize - 2, nappends, 2) # range of elements + reopen = 0 # This case does not reopen files + + +class Basic3WriteTestCase(BasicTestCase): + obj = [1, 2] + type = np.asarray(obj).dtype.name + dtype = np.asarray(obj).dtype.str + shape = (0,) + chunkshape = (5,) + step = 1 + reopen = 0 # This case does not reopen files + + +class Basic4WriteTestCase(BasicTestCase): + obj = np.array([1, 2]) + type = obj.dtype.name + dtype = obj.dtype.str + shape = None + chunkshape = (5,) + step = 1 + reopen = 0 # This case does not reopen files + + +class Basic5WriteTestCase(BasicTestCase): + obj = [1, 2] + type = np.asarray(obj).dtype.name + dtype = np.asarray(obj).dtype.str + shape = (0,) + chunkshape = (5,) + step = 1 + reopen = 1 # This case does reopen files + + +class Basic6WriteTestCase(BasicTestCase): + obj = np.array([1, 2]) + type = obj.dtype.name + dtype = obj.dtype.str + shape = None + chunkshape = (5,) + step = 1 + reopen = 1 # This case does reopen files + + +class Basic7WriteTestCase(BasicTestCase): + obj = [[1, 2], [3, 4]] + type = np.asarray(obj).dtype.name + dtype = np.asarray(obj).dtype.str + shape = (0, 2) + chunkshape = (5,) + step = 1 + reopen = 0 # This case does not reopen files + + +class Basic8WriteTestCase(BasicTestCase): + obj = [[1, 2], [3, 4]] + type = np.asarray(obj).dtype.name + dtype = np.asarray(obj).dtype.str + shape = (0, 2) + chunkshape = (5,) + step = 1 + reopen = 1 # This case does reopen files + + +class EmptyEArrayTestCase(BasicTestCase): + type = "int32" + dtype = np.dtype("int32") + shape = (2, 0) + chunksize = 5 + nappends = 0 + start = 0 + stop = 10 + step = 1 + + +class NP_EmptyEArrayTestCase(BasicTestCase): + type = "int32" + dtype = np.dtype("()int32") + shape = (2, 0) + chunksize = 5 + nappends = 0 + + +class Empty2EArrayTestCase(BasicTestCase): + type = "int32" + dtype = "int32" + shape = (2, 0) + chunksize = 5 + nappends = 0 + start = 0 + stop = 10 + step = 1 + reopen = 0 # This case does not reopen files + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class SlicesEArrayTestCase(BasicTestCase): + compress = 1 + complib = "lzo" + type = "int32" + shape = (2, 0) + chunksize = 5 + nappends = 2 + slices = (slice(1, 2, 1), slice(1, 3, 1)) + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class Slices2EArrayTestCase(BasicTestCase): + compress = 1 + complib = "blosc" + type = "int32" + shape = (2, 0, 4) + chunksize = 5 + nappends = 20 + slices = (slice(1, 2, 1), slice(None, None, None), slice(1, 4, 2)) + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2SlicesOptEArrayTestCase(BasicTestCase): + compress = 1 + complib = "blosc2" + type = "int32" + shape = (0, 13, 13) + chunkshape = (4, 4, 4) + nappends = 20 + slices = (slice(None, None), slice(2, 10), slice(0, 10)) + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2ComprTestCase(BasicTestCase): + compress = 1 # sss + complib = "blosc2" + chunkshape = (10, 10) + start = 3 + stop = 10 + step = 3 + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2FletcherTestCase(Blosc2ComprTestCase): + fletcher32 = 1 + start = 0 + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2CrossChunkTestCase(BasicTestCase): + shape = (0, 10) + compress = 1 # sss + complib = "blosc2" + chunkshape = (4, 4) + nappends = 10 + start = 3 + stop = 6 + step = 3 + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2CrossChunkOptTestCase(Blosc2CrossChunkTestCase): + step = 1 # optimized + byteorder = sys.byteorder + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2InnerCrossChunkTestCase(Blosc2CrossChunkTestCase): + shape = (10, 0) + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2InnerCrossChunkOptTestCase(Blosc2InnerCrossChunkTestCase): + step = 1 # optimized + byteorder = sys.byteorder + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2PastLastChunkTestCase(BasicTestCase): + shape = (0, 10) + compress = 1 # sss + complib = "blosc2" + chunkshape = (4, 4) + nappends = 10 + start = 8 + stop = 100 + step = 3 + + +class EllipsisEArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 0) + chunksize = 5 + nappends = 2 + # slices = (slice(1,2,1), Ellipsis) + slices = (Ellipsis, slice(1, 2, 1)) + + +class Ellipsis2EArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 0, 4) + chunksize = 5 + nappends = 20 + slices = (slice(1, 2, 1), Ellipsis, slice(1, 4, 2)) + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class Slices3EArrayTestCase(BasicTestCase): + compress = 1 # To show the chunks id DEBUG is on + complib = "blosc" + type = "int32" + shape = (2, 3, 4, 0) + chunksize = 5 + nappends = 20 + slices = ( + slice(1, 2, 1), + slice(0, None, None), + slice(1, 4, 2), + ) # Don't work + # slices = (slice(None, None, None), slice(0, None, None), + # slice(1,4,1)) # W + # slices = (slice(None, None, None), slice(None, None, None), + # slice(1,4,2)) # N + # slices = (slice(1,2,1), slice(None, None, None), slice(1,4,2)) # N + # Disable the failing test temporarily with a working test case + slices = (slice(1, 2, 1), slice(1, 4, None), slice(1, 4, 2)) # Y + # slices = (slice(1,2,1), slice(0, 4, None), slice(1,4,1)) # Y + slices = (slice(1, 2, 1), slice(0, 4, None), slice(1, 4, 2)) # N + # slices = (slice(1,2,1), slice(0, 4, None), slice(1,4,2), + # slice(0,100,1)) # N + + +class Slices4EArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 4, 0, 5, 6) + chunksize = 5 + nappends = 20 + slices = ( + slice(1, 2, 1), + slice(0, None, None), + slice(1, 4, 2), + slice(0, 4, 2), + slice(3, 5, 2), + slice(2, 7, 1), + ) + + +class Ellipsis3EArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 4, 0) + chunksize = 5 + nappends = 20 + slices = (Ellipsis, slice(0, 4, None), slice(1, 4, 2)) + slices = (slice(1, 2, 1), slice(0, 4, None), slice(1, 4, 2), Ellipsis) + + +class Ellipsis4EArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 4, 0) + chunksize = 5 + nappends = 20 + slices = (Ellipsis, slice(0, 4, None), slice(1, 4, 2)) + slices = (slice(1, 2, 1), Ellipsis, slice(1, 4, 2)) + + +class Ellipsis5EArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 4, 0) + chunksize = 5 + nappends = 20 + slices = (slice(1, 2, 1), slice(0, 4, None), Ellipsis) + + +class Ellipsis6EArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 4, 0) + chunksize = 5 + nappends = 2 + # The next slices gives problems with setting values (test05) + # This is a problem on the test design, not the Array.__setitem__ + # code, though. + slices = (slice(1, 2, 1), slice(0, 4, None), 2, Ellipsis) + + +class Ellipsis7EArrayTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 4, 0) + chunksize = 5 + nappends = 2 + slices = (slice(1, 2, 1), slice(0, 4, None), slice(2, 3), Ellipsis) + + +class MD3WriteTestCase(BasicTestCase): + type = "int32" + shape = (2, 0, 3) + chunksize = 4 + step = 2 + + +class MD5WriteTestCase(BasicTestCase): + type = "int32" + shape = (2, 0, 3, 4, 5) # ok + # shape = (1, 1, 0, 1) # Minimum shape that shows problems with HDF5 1.6.1 + # shape = (2, 3, 0, 4, 5) # Floating point exception (HDF5 1.6.1) + # shape = (2, 3, 3, 0, 5, 6) # Segmentation fault (HDF5 1.6.1) + chunksize = 1 + nappends = 1 + start = 1 + stop = 10 + step = 10 + + +class MD6WriteTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 3, 0, 5, 6) + chunksize = 1 + nappends = 10 + start = 1 + stop = 10 + step = 3 + + +class NP_MD6WriteTestCase(BasicTestCase): + """Testing NumPy scalars as indexes""" + + type = "int32" + shape = (2, 3, 3, 0, 5, 6) + chunksize = 1 + nappends = 10 + + +class MD6WriteTestCase__(BasicTestCase): + type = "int32" + shape = (2, 0) + chunksize = 1 + nappends = 3 + start = 1 + stop = 3 + step = 1 + + +class MD7WriteTestCase(BasicTestCase): + type = "int32" + shape = (2, 3, 3, 4, 5, 0, 3) + chunksize = 10 + nappends = 1 + start = 1 + stop = 10 + step = 2 + + +class MD10WriteTestCase(BasicTestCase): + type = "int32" + shape = (1, 2, 3, 4, 5, 5, 4, 3, 2, 0) + chunksize = 5 + nappends = 10 + start = -1 + stop = -1 + step = 10 + + +class NP_MD10WriteTestCase(BasicTestCase): + type = "int32" + shape = (1, 2, 3, 4, 5, 5, 4, 3, 2, 0) + chunksize = 5 + nappends = 10 + + +class ZlibComprTestCase(BasicTestCase): + compress = 1 + complib = "zlib" + start = 3 + # stop = 0 # means last row + stop = None # means last row from 0.8 on + step = 10 + + +class ZlibShuffleTestCase(BasicTestCase): + shuffle = 1 + compress = 1 + complib = "zlib" + # case start < stop , i.e. no rows read + start = 3 + stop = 1 + step = 10 + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscComprTestCase(BasicTestCase): + compress = 1 # sss + complib = "blosc" + chunksize = 10 + nappends = 100 + start = 3 + stop = 10 + step = 3 + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscShuffleTestCase(BasicTestCase): + compress = 1 + shuffle = 1 + complib = "blosc" + chunksize = 100 + nappends = 10 + start = 3 + stop = 10 + step = 7 + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class LZOComprTestCase(BasicTestCase): + compress = 1 # sss + complib = "lzo" + chunksize = 10 + nappends = 100 + start = 3 + stop = 10 + step = 3 + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class LZOShuffleTestCase(BasicTestCase): + compress = 1 + shuffle = 1 + complib = "lzo" + chunksize = 100 + nappends = 10 + start = 3 + stop = 10 + step = 7 + + +@common.unittest.skipIf( + not common.bzip2_avail, "BZIP2 compression library not available" +) +class Bzip2ComprTestCase(BasicTestCase): + compress = 1 + complib = "bzip2" + chunksize = 100 + nappends = 10 + start = 3 + stop = 10 + step = 8 + + +@common.unittest.skipIf( + not common.bzip2_avail, "BZIP2 compression library not available" +) +class Bzip2ShuffleTestCase(BasicTestCase): + compress = 1 + shuffle = 1 + complib = "bzip2" + chunksize = 100 + nappends = 10 + start = 3 + stop = 10 + step = 6 + + +class Fletcher32TestCase(BasicTestCase): + compress = 0 + fletcher32 = 1 + chunksize = 50 + nappends = 20 + start = 4 + stop = 20 + step = 7 + + +class AllFiltersTestCase(BasicTestCase): + compress = 1 + shuffle = 1 + fletcher32 = 1 + complib = "zlib" + chunksize = 20 # sss + nappends = 50 + start = 2 + stop = 99 + step = 6 + + +# chunksize = 3 +# nappends = 2 +# start = 1 +# stop = 10 +# step = 2 + + +class FloatTypeTestCase(BasicTestCase): + type = "float64" + dtype = "float64" + shape = (2, 0) + chunksize = 5 + nappends = 10 + start = 3 + stop = 10 + step = 20 + + +class ComplexTypeTestCase(BasicTestCase): + type = "complex128" + dtype = "complex128" + shape = (2, 0) + chunksize = 5 + nappends = 10 + start = 3 + stop = 10 + step = 20 + + +class StringTestCase(BasicTestCase): + type = "string" + length = 20 + shape = (2, 0) + # shape = (2,0,20) + chunksize = 5 + nappends = 10 + start = 3 + stop = 10 + step = 20 + slices = (slice(0, 1), slice(1, 2)) + + +class String2TestCase(BasicTestCase): + type = "string" + length = 20 + shape = (0,) + # shape = (0, 20) + chunksize = 5 + nappends = 10 + start = 1 + stop = 10 + step = 2 + + +class StringComprTestCase(BasicTestCase): + type = "string" + length = 20 + shape = (20, 0, 10) + # shape = (20,0,10,20) + compr = 1 + # shuffle = 1 # this shouldn't do nothing on chars + chunksize = 50 + nappends = 10 + start = -1 + stop = 100 + step = 20 + + +class SizeOnDiskInMemoryPropertyTestCase( + common.TempFileMixin, common.PyTablesTestCase +): + + def setUp(self): + super().setUp() + + self.array_size = (0, 10) + # set chunkshape so it divides evenly into array_size, to avoid + # partially filled chunks + self.chunkshape = (1000, 10) + # approximate size (in bytes) of non-data portion of hdf5 file + self.hdf_overhead = 6000 + + def create_array(self, complevel): + filters = tb.Filters(complevel=complevel, complib="blosc") + self.array = self.h5file.create_earray( + "/", + "earray", + atom=tb.Int32Atom(), + shape=self.array_size, + filters=filters, + chunkshape=self.chunkshape, + ) + + def test_zero_length(self): + complevel = 0 + self.create_array(complevel) + self.assertEqual(self.array.size_on_disk, 0) + self.assertEqual(self.array.size_in_memory, 0) + + # add 10 chunks of data in one append + def test_no_compression_one_append(self): + complevel = 0 + self.create_array(complevel) + self.array.append([tuple(range(10))] * self.chunkshape[0] * 10) + self.assertEqual(self.array.size_on_disk, 10 * 1000 * 10 * 4) + self.assertEqual(self.array.size_in_memory, 10 * 1000 * 10 * 4) + + # add 10 chunks of data in two appends + def test_no_compression_multiple_appends(self): + complevel = 0 + self.create_array(complevel) + self.array.append([tuple(range(10))] * self.chunkshape[0] * 5) + self.array.append([tuple(range(10))] * self.chunkshape[0] * 5) + self.assertEqual(self.array.size_on_disk, 10 * 1000 * 10 * 4) + self.assertEqual(self.array.size_in_memory, 10 * 1000 * 10 * 4) + + def test_with_compression(self): + complevel = 1 + self.create_array(complevel) + self.array.append([tuple(range(10))] * self.chunkshape[0] * 10) + file_size = Path(self.h5fname).stat().st_size + self.assertTrue( + abs(self.array.size_on_disk - file_size) <= self.hdf_overhead + ) + self.assertEqual(self.array.size_in_memory, 10 * 1000 * 10 * 4) + self.assertLess(self.array.size_on_disk, self.array.size_in_memory) + + +class OffsetStrideTestCase(common.TempFileMixin, common.PyTablesTestCase): + mode = "w" + compress = 0 + complib = "zlib" # Default compression library + + def setUp(self): + super().setUp() + self.rootgroup = self.h5file.root + + def test01a_String(self): + """Checking earray with offset numpy strings appends.""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01a_StringAtom..." % self.__class__.__name__) + + earray = self.h5file.create_earray( + root, + "strings", + atom=tb.StringAtom(itemsize=3), + shape=(0, 2, 2), + title="Array of strings", + ) + a = np.array([[["a", "b"], ["123", "45"], ["45", "123"]]], dtype="S3") + earray.append(a[:, 1:]) + a = np.array([[["s", "a"], ["ab", "f"], ["s", "abc"], ["abc", "f"]]]) + earray.append(a[:, 2:]) + + # Read all the rows: + row = earray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", earray._v_pathname, ":", earray.nrows) + print("Second row in earray ==>", row[1].tolist()) + + self.assertEqual(earray.nrows, 2) + self.assertEqual(row[0].tolist(), [[b"123", b"45"], [b"45", b"123"]]) + self.assertEqual(row[1].tolist(), [[b"s", b"abc"], [b"abc", b"f"]]) + self.assertEqual(len(row[0]), 2) + self.assertEqual(len(row[1]), 2) + + def test01b_String(self): + """Checking earray with strided numpy strings appends.""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b_StringAtom..." % self.__class__.__name__) + + earray = self.h5file.create_earray( + root, + "strings", + atom=tb.StringAtom(itemsize=3), + shape=(0, 2, 2), + title="Array of strings", + ) + a = np.array([[["a", "b"], ["123", "45"], ["45", "123"]]], dtype="S3") + earray.append(a[:, ::2]) + a = np.array([[["s", "a"], ["ab", "f"], ["s", "abc"], ["abc", "f"]]]) + earray.append(a[:, ::2]) + + # Read all the rows: + row = earray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", earray._v_pathname, ":", earray.nrows) + print("Second row in earray ==>", row[1].tolist()) + + self.assertEqual(earray.nrows, 2) + self.assertEqual(row[0].tolist(), [[b"a", b"b"], [b"45", b"123"]]) + self.assertEqual(row[1].tolist(), [[b"s", b"a"], [b"s", b"abc"]]) + self.assertEqual(len(row[0]), 2) + self.assertEqual(len(row[1]), 2) + + def test02a_int(self): + """Checking earray with offset NumPy ints appends.""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02a_int..." % self.__class__.__name__) + + # Create a string atom + earray = self.h5file.create_earray( + root, + "EAtom", + atom=tb.Int32Atom(), + shape=(0, 3), + title="array of ints", + ) + a = np.array( + [(0, 0, 0), (1, 0, 3), (1, 1, 1), (0, 0, 0)], dtype="int32" + ) + earray.append(a[2:]) # Create an offset + a = np.array([(1, 1, 1), (-1, 0, 0)], dtype="int32") + earray.append(a[1:]) # Create an offset + + # Read all the rows: + row = earray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", earray._v_pathname, ":", earray.nrows) + print("Third row in vlarray ==>", row[2]) + + self.assertEqual(earray.nrows, 3) + self.assertTrue( + common.allequal(row[0], np.array([1, 1, 1], dtype="int32")) + ) + self.assertTrue( + common.allequal(row[1], np.array([0, 0, 0], dtype="int32")) + ) + self.assertTrue( + common.allequal(row[2], np.array([-1, 0, 0], dtype="int32")) + ) + + def test02b_int(self): + """Checking earray with strided NumPy ints appends.""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02b_int..." % self.__class__.__name__) + + earray = self.h5file.create_earray( + root, + "EAtom", + atom=tb.Int32Atom(), + shape=(0, 3), + title="array of ints", + ) + a = np.array( + [(0, 0, 0), (1, 0, 3), (1, 1, 1), (3, 3, 3)], dtype="int32" + ) + earray.append(a[::3]) # Create an offset + a = np.array([(1, 1, 1), (-1, 0, 0)], dtype="int32") + earray.append(a[::2]) # Create an offset + + # Read all the rows: + row = earray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", earray._v_pathname, ":", earray.nrows) + print("Third row in vlarray ==>", row[2]) + + self.assertEqual(earray.nrows, 3) + self.assertTrue( + common.allequal(row[0], np.array([0, 0, 0], dtype="int32")) + ) + self.assertTrue( + common.allequal(row[1], np.array([3, 3, 3], dtype="int32")) + ) + self.assertTrue( + common.allequal(row[2], np.array([1, 1, 1], dtype="int32")) + ) + + def test03a_int(self): + """Checking earray with byteswapped appends (ints)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03a_int..." % self.__class__.__name__) + + earray = self.h5file.create_earray( + root, + "EAtom", + atom=tb.Int32Atom(), + shape=(0, 3), + title="array of ints", + ) + # Add a native ordered array + a = np.array( + [(0, 0, 0), (1, 0, 3), (1, 1, 1), (3, 3, 3)], dtype="int32" + ) + earray.append(a) + # Change the byteorder of the array + a = a.byteswap() + a = a.view(a.dtype.newbyteorder()) + # Add a byteswapped array + earray.append(a) + + # Read all the rows: + native = earray[:4, :] + swapped = earray[4:, :] + if common.verbose: + print("Native rows:", native) + print("Byteorder native rows:", native.dtype.byteorder) + print("Swapped rows:", swapped) + print("Byteorder swapped rows:", swapped.dtype.byteorder) + + self.assertTrue(common.allequal(native, swapped)) + + def test03b_float(self): + """Checking earray with byteswapped appends (floats)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03b_float..." % self.__class__.__name__) + + earray = self.h5file.create_earray( + root, + "EAtom", + atom=tb.Float64Atom(), + shape=(0, 3), + title="array of floats", + ) + # Add a native ordered array + a = np.array( + [(0, 0, 0), (1, 0, 3), (1, 1, 1), (3, 3, 3)], dtype="float64" + ) + earray.append(a) + # Change the byteorder of the array + a = a.byteswap() + a = a.view(a.dtype.newbyteorder()) + # Add a byteswapped array + earray.append(a) + + # Read all the rows: + native = earray[:4, :] + swapped = earray[4:, :] + if common.verbose: + print("Native rows:", native) + print("Byteorder native rows:", native.dtype.byteorder) + print("Swapped rows:", swapped) + print("Byteorder swapped rows:", swapped.dtype.byteorder) + + self.assertTrue(common.allequal(native, swapped)) + + def test04a_int(self): + """Checking earray with byteswapped appends (2, ints)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04a_int..." % self.__class__.__name__) + + byteorder = {"little": "big", "big": "little"}[sys.byteorder] + earray = self.h5file.create_earray( + root, + "EAtom", + atom=tb.Int32Atom(), + shape=(0, 3), + title="array of ints", + byteorder=byteorder, + ) + # Add a native ordered array + a = np.array( + [(0, 0, 0), (1, 0, 3), (1, 1, 1), (3, 3, 3)], dtype="int32" + ) + earray.append(a) + # Change the byteorder of the array + a = a.byteswap() + a = a.view(a.dtype.newbyteorder()) + # Add a byteswapped array + earray.append(a) + + # Read all the rows: + native = earray[:4, :] + swapped = earray[4:, :] + if common.verbose: + print( + "Byteorder native rows:", + tb.utils.byteorders[native.dtype.byteorder], + ) + print("Byteorder earray on-disk:", earray.byteorder) + + self.assertEqual( + tb.utils.byteorders[native.dtype.byteorder], sys.byteorder + ) + self.assertEqual(earray.byteorder, byteorder) + self.assertTrue(common.allequal(native, swapped)) + + def test04b_int(self): + """Checking earray with byteswapped appends (2, ints, reopen)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04b_int..." % self.__class__.__name__) + + byteorder = {"little": "big", "big": "little"}[sys.byteorder] + earray = self.h5file.create_earray( + root, + "EAtom", + atom=tb.Int32Atom(), + shape=(0, 3), + title="array of ints", + byteorder=byteorder, + ) + self._reopen(mode="a") + earray = self.h5file.get_node("/EAtom") + # Add a native ordered array + a = np.array( + [(0, 0, 0), (1, 0, 3), (1, 1, 1), (3, 3, 3)], dtype="int32" + ) + earray.append(a) + # Change the byteorder of the array + a = a.byteswap() + a = a.view(a.dtype.newbyteorder()) + # Add a byteswapped array + earray.append(a) + + # Read all the rows: + native = earray[:4, :] + swapped = earray[4:, :] + if common.verbose: + print( + "Byteorder native rows:", + tb.utils.byteorders[native.dtype.byteorder], + ) + print("Byteorder earray on-disk:", earray.byteorder) + + self.assertEqual( + tb.utils.byteorders[native.dtype.byteorder], sys.byteorder + ) + self.assertEqual(earray.byteorder, byteorder) + self.assertTrue(common.allequal(native, swapped)) + + def test04c_float(self): + """Checking earray with byteswapped appends (2, floats)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04c_float..." % self.__class__.__name__) + + byteorder = {"little": "big", "big": "little"}[sys.byteorder] + earray = self.h5file.create_earray( + root, + "EAtom", + atom=tb.Float64Atom(), + shape=(0, 3), + title="array of floats", + byteorder=byteorder, + ) + # Add a native ordered array + a = np.array( + [(0, 0, 0), (1, 0, 3), (1, 1, 1), (3, 3, 3)], dtype="float64" + ) + earray.append(a) + # Change the byteorder of the array + a = a.byteswap() + a = a.view(a.dtype.newbyteorder()) + # Add a byteswapped array + earray.append(a) + + # Read all the rows: + native = earray[:4, :] + swapped = earray[4:, :] + if common.verbose: + print( + "Byteorder native rows:", + tb.utils.byteorders[native.dtype.byteorder], + ) + print("Byteorder earray on-disk:", earray.byteorder) + + self.assertEqual( + tb.utils.byteorders[native.dtype.byteorder], sys.byteorder + ) + self.assertEqual(earray.byteorder, byteorder) + self.assertTrue(common.allequal(native, swapped)) + + def test04d_float(self): + """Checking earray with byteswapped appends (2, floats, reopen)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04d_float..." % self.__class__.__name__) + + byteorder = {"little": "big", "big": "little"}[sys.byteorder] + earray = self.h5file.create_earray( + root, + "EAtom", + atom=tb.Float64Atom(), + shape=(0, 3), + title="array of floats", + byteorder=byteorder, + ) + self._reopen(mode="a") + earray = self.h5file.get_node("/EAtom") + # Add a native ordered array + a = np.array( + [(0, 0, 0), (1, 0, 3), (1, 1, 1), (3, 3, 3)], dtype="float64" + ) + earray.append(a) + # Change the byteorder of the array + a = a.byteswap() + a = a.view(a.dtype.newbyteorder()) + # Add a byteswapped array + earray.append(a) + + # Read all the rows: + native = earray[:4, :] + swapped = earray[4:, :] + if common.verbose: + print( + "Byteorder native rows:", + tb.utils.byteorders[native.dtype.byteorder], + ) + print("Byteorder earray on-disk:", earray.byteorder) + + self.assertEqual( + tb.utils.byteorders[native.dtype.byteorder], sys.byteorder + ) + self.assertEqual(earray.byteorder, byteorder) + self.assertTrue(common.allequal(native, swapped)) + + +class CopyTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test01_copy(self): + """Checking EArray.copy() method.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_copy..." % self.__class__.__name__) + + # Create an EArray + atom = tb.Int16Atom() + array1 = self.h5file.create_earray( + self.h5file.root, + "array1", + atom=atom, + shape=(0, 2), + title="title array1", + ) + array1.append(np.array([[456, 2], [3, 457]], dtype="int16")) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + # print("dirs-->", dir(array1), dir(array2)) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + self.assertTrue(common.allequal(array1.read(), array2.read())) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.extdim, array2.extdim) + self.assertEqual(array1.flavor, array2.flavor) + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(array1.atom.itemsize, array2.atom.itemsize) + self.assertEqual(array1.title, array2.title) + self.assertEqual(str(array1.atom), str(array2.atom)) + + def test02_copy(self): + """Checking EArray.copy() method (where specified)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_copy..." % self.__class__.__name__) + + # Create an EArray + atom = tb.Int16Atom() + array1 = self.h5file.create_earray( + self.h5file.root, + "array1", + atom=atom, + shape=(0, 2), + title="title array1", + ) + array1.append(np.array([[456, 2], [3, 457]], dtype="int16")) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy to another location + group1 = self.h5file.create_group("/", "group1") + array2 = array1.copy(group1, "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.group1.array2 + + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + # print("dirs-->", dir(array1), dir(array2)) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + self.assertTrue(common.allequal(array1.read(), array2.read())) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.extdim, array2.extdim) + self.assertEqual(array1.flavor, array2.flavor) + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(array1.atom.itemsize, array2.atom.itemsize) + self.assertEqual(array1.title, array2.title) + self.assertEqual(str(array1.atom), str(array2.atom)) + + def test03a_copy(self): + """Checking EArray.copy() method (python flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03b_copy..." % self.__class__.__name__) + + atom = tb.Int16Atom() + array1 = self.h5file.create_earray( + self.h5file.root, + "array1", + atom=atom, + shape=(0, 2), + title="title array1", + ) + array1.flavor = "python" + array1.append(((456, 2), (3, 457))) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all elements are equal + self.assertEqual(array1.read(), array2.read()) + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.extdim, array2.extdim) + self.assertEqual(array1.flavor, array2.flavor) # Very important here! + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(array1.atom.itemsize, array2.atom.itemsize) + self.assertEqual(array1.title, array2.title) + self.assertEqual(str(array1.atom), str(array2.atom)) + + def test03b_copy(self): + """Checking EArray.copy() method (python string flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03d_copy..." % self.__class__.__name__) + + atom = tb.StringAtom(itemsize=3) + array1 = self.h5file.create_earray( + self.h5file.root, + "array1", + atom=atom, + shape=(0, 2), + title="title array1", + ) + array1.flavor = "python" + array1.append([["456", "2"], ["3", "457"]]) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all elements are equal + self.assertEqual(array1.read(), array2.read()) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.extdim, array2.extdim) + self.assertEqual(array1.flavor, array2.flavor) # Very important here! + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(array1.atom.itemsize, array2.atom.itemsize) + self.assertEqual(array1.title, array2.title) + self.assertEqual(str(array1.atom), str(array2.atom)) + + def test03c_copy(self): + """Checking EArray.copy() method (String flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03e_copy..." % self.__class__.__name__) + + atom = tb.StringAtom(itemsize=4) + array1 = self.h5file.create_earray( + self.h5file.root, + "array1", + atom=atom, + shape=(0, 2), + title="title array1", + ) + array1.flavor = "numpy" + array1.append(np.array([["456", "2"], ["3", "457"]], dtype="S4")) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all elements are equal + self.assertTrue(common.allequal(array1.read(), array2.read())) + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.extdim, array2.extdim) + self.assertEqual(array1.flavor, array2.flavor) # Very important here! + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(array1.atom.itemsize, array2.atom.itemsize) + self.assertEqual(array1.title, array2.title) + self.assertEqual(str(array1.atom), str(array2.atom)) + + def test04_copy(self): + """Checking EArray.copy() method (checking title copying)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_copy..." % self.__class__.__name__) + + # Create an EArray + atom = tb.Int16Atom() + array1 = self.h5file.create_earray( + self.h5file.root, + "array1", + atom=atom, + shape=(0, 2), + title="title array1", + ) + array1.append(np.array([[456, 2], [3, 457]], dtype="int16")) + # Append some user attrs + array1.attrs.attr1 = "attr1" + array1.attrs.attr2 = 2 + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another Array + array2 = array1.copy("/", "array2", title="title array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + # Assert user attributes + if common.verbose: + print("title of destination array-->", array2.title) + self.assertEqual(array2.title, "title array2") + + def test05_copy(self): + """Checking EArray.copy() method (user attributes copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05_copy..." % self.__class__.__name__) + + # Create an EArray + atom = tb.Int16Atom() + array1 = self.h5file.create_earray( + self.h5file.root, + "array1", + atom=atom, + shape=(0, 2), + title="title array1", + ) + array1.append(np.array([[456, 2], [3, 457]], dtype="int16")) + # Append some user attrs + array1.attrs.attr1 = "attr1" + array1.attrs.attr2 = 2 + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another Array + array2 = array1.copy("/", "array2", copyuserattrs=1) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Assert user attributes + self.assertEqual(array2.attrs.attr1, "attr1") + self.assertEqual(array2.attrs.attr2, 2) + + def test05b_copy(self): + """Checking EArray.copy() method (user attributes not copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05b_copy..." % self.__class__.__name__) + + # Create an Array + atom = tb.Int16Atom() + array1 = self.h5file.create_earray( + self.h5file.root, + "array1", + atom=atom, + shape=(0, 2), + title="title array1", + ) + array1.append(np.array([[456, 2], [3, 457]], dtype="int16")) + # Append some user attrs + array1.attrs.attr1 = "attr1" + array1.attrs.attr2 = 2 + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another Array + array2 = array1.copy("/", "array2", copyuserattrs=0) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Assert user attributes + self.assertEqual(hasattr(array2.attrs, "attr1"), 0) + self.assertEqual(hasattr(array2.attrs, "attr2"), 0) + + +class CloseCopyTestCase(CopyTestCase): + close = 1 + + +class OpenCopyTestCase(CopyTestCase): + close = 0 + + +class CopyIndexTestCase(common.TempFileMixin, common.PyTablesTestCase): + nrowsinbuf = 2 + + def test01_index(self): + """Checking EArray.copy() method with indexes.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_index..." % self.__class__.__name__) + + # Create an EArray + atom = tb.Int32Atom() + array1 = self.h5file.create_earray( + self.h5file.root, + "array1", + atom=atom, + shape=(0, 2), + title="title array1", + ) + r = np.arange(200, dtype="int32") + r.shape = (100, 2) + array1.append(r) + + # Select a different buffer size: + array1.nrowsinbuf = self.nrowsinbuf + + # Copy to another array + array2 = array1.copy( + "/", "array2", start=self.start, stop=self.stop, step=self.step + ) + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + r2 = r[self.start : self.stop : self.step] + self.assertTrue(common.allequal(r2, array2.read())) + + # Assert the number of rows in array + if common.verbose: + print("nrows in array2-->", array2.nrows) + print("and it should be-->", r2.shape[0]) + self.assertEqual(r2.shape[0], array2.nrows) + + def test02_indexclosef(self): + """Checking EArray.copy() method with indexes (close file version)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_indexclosef..." % self.__class__.__name__) + + # Create an EArray + atom = tb.Int32Atom() + array1 = self.h5file.create_earray( + self.h5file.root, + "array1", + atom=atom, + shape=(0, 2), + title="title array1", + ) + r = np.arange(200, dtype="int32") + r.shape = (100, 2) + array1.append(r) + + # Select a different buffer size: + array1.nrowsinbuf = self.nrowsinbuf + + # Copy to another array + array2 = array1.copy( + "/", "array2", start=self.start, stop=self.stop, step=self.step + ) + # Close and reopen the file + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("array1-->", array1.read()) + print("array2-->", array2.read()) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + r2 = r[self.start : self.stop : self.step] + self.assertTrue(common.allequal(r2, array2.read())) + + # Assert the number of rows in array + if common.verbose: + print("nrows in array2-->", array2.nrows) + print("and it should be-->", r2.shape[0]) + self.assertEqual(r2.shape[0], array2.nrows) + + +class CopyIndex1TestCase(CopyIndexTestCase): + nrowsinbuf = 1 + start = 0 + stop = 7 + step = 1 + + +class CopyIndex2TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + start = 0 + stop = -1 + step = 1 + + +class CopyIndex3TestCase(CopyIndexTestCase): + nrowsinbuf = 3 + start = 1 + stop = 7 + step = 1 + + +class CopyIndex4TestCase(CopyIndexTestCase): + nrowsinbuf = 4 + start = 0 + stop = 6 + step = 1 + + +class CopyIndex5TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + start = 3 + stop = 7 + step = 1 + + +class CopyIndex6TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + start = 3 + stop = 6 + step = 2 + + +class CopyIndex7TestCase(CopyIndexTestCase): + start = 0 + stop = 7 + step = 10 + + +class CopyIndex8TestCase(CopyIndexTestCase): + start = 6 + stop = -1 # Negative values means starting from the end + step = 1 + + +class CopyIndex9TestCase(CopyIndexTestCase): + start = 3 + stop = 4 + step = 1 + + +class CopyIndex10TestCase(CopyIndexTestCase): + nrowsinbuf = 1 + start = 3 + stop = 4 + step = 2 + + +class CopyIndex11TestCase(CopyIndexTestCase): + start = -3 + stop = -1 + step = 2 + + +class CopyIndex12TestCase(CopyIndexTestCase): + start = -1 # Should point to the last element + stop = None # None should mean the last element (including it) + step = 1 + + +class TruncateTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + + # Create an EArray + atom = tb.Int16Atom(dflt=3) + array1 = self.h5file.create_earray( + self.h5file.root, + "array1", + atom=atom, + shape=(0, 2), + title="title array1", + ) + # Add a couple of rows + array1.append(np.array([[456, 2], [3, 457]], dtype="int16")) + + def test00_truncate(self): + """Checking EArray.truncate() method (truncating to 0 rows)""" + + array1 = self.h5file.root.array1 + # Truncate to 0 elements + array1.truncate(0) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + + if common.verbose: + print("array1-->", array1.read()) + + self.assertTrue( + common.allequal( + array1[:], np.array([], dtype="int16").reshape(0, 2) + ) + ) + + def test01_truncate(self): + """Checking EArray.truncate() method (truncating to 1 rows)""" + + array1 = self.h5file.root.array1 + # Truncate to 1 element + array1.truncate(1) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + + if common.verbose: + print("array1-->", array1.read()) + + self.assertTrue( + common.allequal(array1.read(), np.array([[456, 2]], dtype="int16")) + ) + + def test02_truncate(self): + """Checking EArray.truncate() method (truncating to == self.nrows)""" + + array1 = self.h5file.root.array1 + # Truncate to 2 elements + array1.truncate(2) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + + if common.verbose: + print("array1-->", array1.read()) + + self.assertTrue( + common.allequal( + array1.read(), np.array([[456, 2], [3, 457]], dtype="int16") + ) + ) + + def test03_truncate(self): + """Checking EArray.truncate() method (truncating to > self.nrows)""" + + array1 = self.h5file.root.array1 + # Truncate to 4 elements + array1.truncate(4) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + + if common.verbose: + print("array1-->", array1.read()) + + self.assertEqual(array1.nrows, 4) + # Check the original values + self.assertTrue( + common.allequal( + array1[:2], np.array([[456, 2], [3, 457]], dtype="int16") + ) + ) + # Check that the added rows have the default values + self.assertTrue( + common.allequal( + array1[2:], np.array([[3, 3], [3, 3]], dtype="int16") + ) + ) + + +class TruncateOpenTestCase(TruncateTestCase): + close = 0 + + +class TruncateCloseTestCase(TruncateTestCase): + close = 1 + + +# The next test should be run only in **common.heavy** mode +class Rows64bitsTestCase(common.TempFileMixin, common.PyTablesTestCase): + open_mode = "a" + narows = 1000 * 1000 # each numpy object will have 1 million entries + # narows = 1000 # for testing only + nanumber = 1000 * 3 # That should account for more than 2**31-1 + + def setUp(self): + super().setUp() + + # Create an EArray + array = self.h5file.create_earray( + self.h5file.root, + "array", + atom=tb.Int8Atom(), + shape=(0,), + filters=tb.Filters(complib="lzo", complevel=1), + # Specifying expectedrows takes more + # CPU, but less disk + expectedrows=self.narows * self.nanumber, + ) + + # Fill the array + na = np.arange(self.narows, dtype="int8") + for i in range(self.nanumber): + array.append(na) + + def test01_basiccheck(self): + """Some basic checks for earrays exceeding 2**31 rows""" + + array = self.h5file.root.array + + if self.close: + if common.verbose: + # Check how many entries there are in the array + print("Before closing") + print("Entries:", array.nrows, type(array.nrows)) + print("Entries:", array.nrows / (1000 * 1000), "Millions") + print("Shape:", array.shape) + + # Close the file + self._reopen() + + array = self.h5file.root.array + if common.verbose: + print("After re-open") + + # Check how many entries there are in the array + if common.verbose: + print("Entries:", array.nrows, type(array.nrows)) + print("Entries:", array.nrows / (1000 * 1000), "Millions") + print("Shape:", array.shape) + print("Last 10 elements-->", array[-10:]) + stop = self.narows % 256 + if stop > 127: + stop -= 256 + start = stop - 10 + print("Should look like-->", np.arange(start, stop, dtype="int8")) + + nrows = self.narows * self.nanumber + # check nrows + self.assertEqual(array.nrows, nrows) + # Check shape + self.assertEqual(array.shape, (nrows,)) + # check the 10 first elements + self.assertTrue( + common.allequal(array[:10], np.arange(10, dtype="int8")) + ) + # check the 10 last elements + stop = self.narows % 256 + if stop > 127: + stop -= 256 + start = stop - 10 + self.assertTrue( + common.allequal(array[-10:], np.arange(start, stop, dtype="int8")) + ) + + +class Rows64bitsTestCase1(Rows64bitsTestCase): + close = 0 + + +class Rows64bitsTestCase2(Rows64bitsTestCase): + close = 1 + + +# Test for appending zero-sized arrays +class ZeroSizedTestCase(common.TempFileMixin, common.PyTablesTestCase): + open_mode = "a" + + def setUp(self): + super().setUp() + + # Create an EArray + ea = self.h5file.create_earray( + "/", "test", atom=tb.Int32Atom(), shape=(3, 0) + ) + # Append a single row + ea.append([[1], [2], [3]]) + + def test01_canAppend(self): + """Appending zero length array.""" + + fileh = self.h5file + ea = fileh.root.test + arr = np.empty(shape=(3, 0), dtype="int32") + ea.append(arr) + self.assertEqual(ea.nrows, 1, "The number of rows should be 1.") + + def test02_appendWithWrongShape(self): + """Appending zero length array with wrong dimension.""" + + fileh = self.h5file + ea = fileh.root.test + arr = np.empty(shape=(3, 0, 3), dtype="int32") + self.assertRaises(ValueError, ea.append, arr) + + +# Test for dealing with multidimensional atoms +class MDAtomTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test01a_append(self): + """Append a row to a (unidimensional) EArray with a MD tables.Atom.""" + + # Create an EArray + ea = self.h5file.create_earray( + "/", "test", atom=tb.Int32Atom((2, 2)), shape=(0,) + ) + if self.reopen: + self._reopen("a") + ea = self.h5file.root.test + # Append one row + ea.append([[[1, 3], [4, 5]]]) + self.assertEqual(ea.nrows, 1) + if common.verbose: + print("First row-->", ea[0]) + self.assertTrue( + common.allequal(ea[0], np.array([[1, 3], [4, 5]], "i4")) + ) + + def test01b_append(self): + """Append several rows to a (unidimensional) EArray with a MD + tables.Atom.""" + + # Create an EArray + ea = self.h5file.create_earray( + "/", "test", atom=tb.Int32Atom((2, 2)), shape=(0,) + ) + if self.reopen: + self._reopen("a") + ea = self.h5file.root.test + # Append three rows + ea.append([[[1]], [[2]], [[3]]]) # Simple broadcast + self.assertEqual(ea.nrows, 3) + if common.verbose: + print("Third row-->", ea[2]) + self.assertTrue( + common.allequal(ea[2], np.array([[3, 3], [3, 3]], "i4")) + ) + + def test02a_append(self): + """Append a row to a (multidimensional) EArray with a + MD tables.Atom.""" + + # Create an EArray + ea = self.h5file.create_earray( + "/", "test", atom=tb.Int32Atom((2,)), shape=(0, 3) + ) + if self.reopen: + self._reopen("a") + ea = self.h5file.root.test + # Append one row + ea.append([[[1, 3], [4, 5], [7, 9]]]) + self.assertEqual(ea.nrows, 1) + if common.verbose: + print("First row-->", ea[0]) + self.assertTrue( + common.allequal(ea[0], np.array([[1, 3], [4, 5], [7, 9]], "i4")) + ) + + def test02b_append(self): + """Append several rows to a (multidimensional) EArray with a MD + tables.Atom.""" + + # Create an EArray + ea = self.h5file.create_earray( + "/", "test", atom=tb.Int32Atom((2,)), shape=(0, 3) + ) + if self.reopen: + self._reopen("a") + ea = self.h5file.root.test + # Append three rows + ea.append( + [ + [[1, -3], [4, -5], [-7, 9]], + [[-1, 3], [-4, 5], [7, -8]], + [[-2, 3], [-5, 5], [7, -9]], + ] + ) + self.assertEqual(ea.nrows, 3) + if common.verbose: + print("Third row-->", ea[2]) + self.assertTrue( + common.allequal(ea[2], np.array([[-2, 3], [-5, 5], [7, -9]], "i4")) + ) + + def test03a_MDMDMD(self): + """Complex append of a MD array in a MD EArray with a + MD tables.Atom.""" + + # Create an EArray + ea = self.h5file.create_earray( + "/", "test", atom=tb.Int32Atom((2, 4)), shape=(0, 2, 3) + ) + if self.reopen: + self._reopen("a") + ea = self.h5file.root.test + # Append three rows + # The shape of the atom should be added at the end of the arrays + a = np.arange(2 * 3 * 2 * 4, dtype="i4").reshape((2, 3, 2, 4)) + ea.append([a * 1, a * 2, a * 3]) + self.assertEqual(ea.nrows, 3) + if common.verbose: + print("Third row-->", ea[2]) + self.assertTrue(common.allequal(ea[2], a * 3)) + + def test03b_MDMDMD(self): + """Complex append of a MD array in a MD EArray with a MD atom (II).""" + # Create an EArray + ea = self.h5file.create_earray( + "/", "test", atom=tb.Int32Atom((2, 4)), shape=(2, 0, 3) + ) + if self.reopen: + self._reopen("a") + ea = self.h5file.root.test + # Append three rows + # The shape of the atom should be added at the end of the arrays + a = np.arange(2 * 3 * 2 * 4, dtype="i4").reshape((2, 1, 3, 2, 4)) + ea.append(a * 1) + ea.append(a * 2) + ea.append(a * 3) + self.assertEqual(ea.nrows, 3) + if common.verbose: + print("Third row-->", ea[:, 2, ...]) + self.assertTrue( + common.allequal(ea[:, 2, ...], a.reshape((2, 3, 2, 4)) * 3) + ) + + def test03c_MDMDMD(self): + """Complex append of a MD array in a MD EArray with a MD atom (III).""" + # Create an EArray + ea = self.h5file.create_earray( + "/", "test", atom=tb.Int32Atom((2, 4)), shape=(2, 3, 0) + ) + if self.reopen: + self._reopen("a") + ea = self.h5file.root.test + # Append three rows + # The shape of the atom should be added at the end of the arrays + a = np.arange(2 * 3 * 2 * 4, dtype="i4").reshape((2, 3, 1, 2, 4)) + ea.append(a * 1) + ea.append(a * 2) + ea.append(a * 3) + self.assertEqual(ea.nrows, 3) + if common.verbose: + print("Third row-->", ea[:, :, 2, ...]) + self.assertTrue( + common.allequal(ea[:, :, 2, ...], a.reshape((2, 3, 2, 4)) * 3) + ) + + +class MDAtomNoReopen(MDAtomTestCase): + reopen = False + + +class MDAtomReopen(MDAtomTestCase): + reopen = True + + +class AccessClosedTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + self.array = self.h5file.create_earray( + self.h5file.root, "array", atom=tb.Int32Atom(), shape=(0, 10) + ) + self.array.append(np.zeros((10, 10))) + + def test_read(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.read) + + def test_getitem(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.__getitem__, 0) + + def test_setitem(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.__setitem__, 0, 0) + + def test_append(self): + self.h5file.close() + self.assertRaises( + tb.ClosedNodeError, self.array.append, np.zeros((10, 10)) + ) + + +class TestCreateEArrayArgs(common.TempFileMixin, common.PyTablesTestCase): + obj = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + where = "/" + name = "earray" + atom = tb.Atom.from_dtype(obj.dtype) + shape = (0,) + obj.shape[1:] + title = "title" + filters = None + expectedrows = 1000 + chunkshape = (1, 2) + byteorder = None + createparents = False + + def test_positional_args_01(self): + self.h5file.create_earray( + self.where, + self.name, + self.atom, + self.shape, + self.title, + self.filters, + self.expectedrows, + self.chunkshape, + ) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.nrows, 0) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + + def test_positional_args_02(self): + ptarr = self.h5file.create_earray( + self.where, + self.name, + self.atom, + self.shape, + self.title, + self.filters, + self.expectedrows, + self.chunkshape, + ) + ptarr.append(self.obj) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.obj.shape) + self.assertEqual(ptarr.nrows, self.obj.shape[0]) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_positional_args_obj(self): + self.h5file.create_earray( + self.where, + self.name, + None, + None, + self.title, + self.filters, + self.expectedrows, + self.chunkshape, + self.byteorder, + self.createparents, + self.obj, + ) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.obj.shape) + self.assertEqual(ptarr.nrows, self.obj.shape[0]) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj(self): + self.h5file.create_earray( + self.where, + self.name, + title=self.title, + chunkshape=self.chunkshape, + obj=self.obj, + ) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.obj.shape) + self.assertEqual(ptarr.nrows, self.obj.shape[0]) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_atom_shape_01(self): + ptarr = self.h5file.create_earray( + self.where, + self.name, + title=self.title, + chunkshape=self.chunkshape, + atom=self.atom, + shape=self.shape, + ) + ptarr.append(self.obj) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.obj.shape) + self.assertEqual(ptarr.nrows, self.obj.shape[0]) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_atom_shape_02(self): + ptarr = self.h5file.create_earray( + self.where, + self.name, + title=self.title, + chunkshape=self.chunkshape, + atom=self.atom, + shape=self.shape, + ) + # ptarr.append(self.obj) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.shape) + self.assertEqual(ptarr.nrows, 0) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + + def test_kwargs_obj_atom(self): + ptarr = self.h5file.create_earray( + self.where, + self.name, + title=self.title, + chunkshape=self.chunkshape, + obj=self.obj, + atom=self.atom, + ) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.obj.shape) + self.assertEqual(ptarr.nrows, self.obj.shape[0]) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj_shape(self): + ptarr = self.h5file.create_earray( + self.where, + self.name, + title=self.title, + chunkshape=self.chunkshape, + obj=self.obj, + shape=self.shape, + ) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.obj.shape) + self.assertEqual(ptarr.nrows, self.obj.shape[0]) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj_atom_shape(self): + ptarr = self.h5file.create_earray( + self.where, + self.name, + title=self.title, + chunkshape=self.chunkshape, + obj=self.obj, + atom=self.atom, + shape=self.shape, + ) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, self.obj.shape) + self.assertEqual(ptarr.nrows, self.obj.shape[0]) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertEqual(ptarr.chunkshape, self.chunkshape) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj_atom_error(self): + atom = tb.Atom.from_dtype(np.dtype("complex")) + # shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_earray, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=atom, + ) + + def test_kwargs_obj_shape_error(self): + # atom = tables.Atom.from_dtype(np.dtype('complex')) + shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_earray, + self.where, + self.name, + title=self.title, + obj=self.obj, + shape=shape, + ) + + def test_kwargs_obj_atom_shape_error_01(self): + atom = tb.Atom.from_dtype(np.dtype("complex")) + # shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_earray, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=atom, + shape=self.shape, + ) + + def test_kwargs_obj_atom_shape_error_02(self): + # atom = tables.Atom.from_dtype(np.dtype('complex')) + shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_earray, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=self.atom, + shape=shape, + ) + + def test_kwargs_obj_atom_shape_error_03(self): + atom = tb.Atom.from_dtype(np.dtype("complex")) + shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_earray, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=atom, + shape=shape, + ) + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + # common.heavy = 1 # uncomment this only for testing purposes + + # theSuite.addTest(make_suite(BasicWriteTestCase)) + # theSuite.addTest(make_suite(Rows64bitsTestCase1)) + # theSuite.addTest(make_suite(Rows64bitsTestCase2)) + for n in range(niter): + theSuite.addTest(common.make_suite(BasicWriteTestCase)) + theSuite.addTest(common.make_suite(Basic2WriteTestCase)) + theSuite.addTest(common.make_suite(Basic3WriteTestCase)) + theSuite.addTest(common.make_suite(Basic4WriteTestCase)) + theSuite.addTest(common.make_suite(Basic5WriteTestCase)) + theSuite.addTest(common.make_suite(Basic6WriteTestCase)) + theSuite.addTest(common.make_suite(Basic7WriteTestCase)) + theSuite.addTest(common.make_suite(Basic8WriteTestCase)) + theSuite.addTest(common.make_suite(EmptyEArrayTestCase)) + theSuite.addTest(common.make_suite(Empty2EArrayTestCase)) + theSuite.addTest(common.make_suite(SlicesEArrayTestCase)) + theSuite.addTest(common.make_suite(Slices2EArrayTestCase)) + theSuite.addTest(common.make_suite(EllipsisEArrayTestCase)) + theSuite.addTest(common.make_suite(Ellipsis2EArrayTestCase)) + theSuite.addTest(common.make_suite(Ellipsis3EArrayTestCase)) + theSuite.addTest(common.make_suite(ZlibComprTestCase)) + theSuite.addTest(common.make_suite(ZlibShuffleTestCase)) + theSuite.addTest(common.make_suite(BloscComprTestCase)) + theSuite.addTest(common.make_suite(BloscShuffleTestCase)) + theSuite.addTest(common.make_suite(Blosc2SlicesOptEArrayTestCase)) + theSuite.addTest(common.make_suite(Blosc2ComprTestCase)) + theSuite.addTest(common.make_suite(Blosc2FletcherTestCase)) + theSuite.addTest(common.make_suite(Blosc2CrossChunkTestCase)) + theSuite.addTest(common.make_suite(Blosc2CrossChunkOptTestCase)) + theSuite.addTest(common.make_suite(Blosc2InnerCrossChunkTestCase)) + theSuite.addTest(common.make_suite(Blosc2InnerCrossChunkOptTestCase)) + theSuite.addTest(common.make_suite(Blosc2PastLastChunkTestCase)) + theSuite.addTest(common.make_suite(LZOComprTestCase)) + theSuite.addTest(common.make_suite(LZOShuffleTestCase)) + theSuite.addTest(common.make_suite(Bzip2ComprTestCase)) + theSuite.addTest(common.make_suite(Bzip2ShuffleTestCase)) + theSuite.addTest(common.make_suite(FloatTypeTestCase)) + theSuite.addTest(common.make_suite(ComplexTypeTestCase)) + theSuite.addTest(common.make_suite(StringTestCase)) + theSuite.addTest(common.make_suite(String2TestCase)) + theSuite.addTest(common.make_suite(StringComprTestCase)) + theSuite.addTest(common.make_suite(SizeOnDiskInMemoryPropertyTestCase)) + theSuite.addTest(common.make_suite(OffsetStrideTestCase)) + theSuite.addTest(common.make_suite(Fletcher32TestCase)) + theSuite.addTest(common.make_suite(AllFiltersTestCase)) + theSuite.addTest(common.make_suite(CloseCopyTestCase)) + theSuite.addTest(common.make_suite(OpenCopyTestCase)) + theSuite.addTest(common.make_suite(CopyIndex1TestCase)) + theSuite.addTest(common.make_suite(CopyIndex2TestCase)) + theSuite.addTest(common.make_suite(CopyIndex3TestCase)) + theSuite.addTest(common.make_suite(CopyIndex4TestCase)) + theSuite.addTest(common.make_suite(CopyIndex5TestCase)) + theSuite.addTest(common.make_suite(TruncateOpenTestCase)) + theSuite.addTest(common.make_suite(TruncateCloseTestCase)) + theSuite.addTest(common.make_suite(ZeroSizedTestCase)) + theSuite.addTest(common.make_suite(MDAtomNoReopen)) + theSuite.addTest(common.make_suite(MDAtomReopen)) + theSuite.addTest(common.make_suite(AccessClosedTestCase)) + theSuite.addTest(common.make_suite(TestCreateEArrayArgs)) + if common.heavy: + theSuite.addTest(common.make_suite(Slices3EArrayTestCase)) + theSuite.addTest(common.make_suite(Slices4EArrayTestCase)) + theSuite.addTest(common.make_suite(Ellipsis4EArrayTestCase)) + theSuite.addTest(common.make_suite(Ellipsis5EArrayTestCase)) + theSuite.addTest(common.make_suite(Ellipsis6EArrayTestCase)) + theSuite.addTest(common.make_suite(Ellipsis7EArrayTestCase)) + theSuite.addTest(common.make_suite(MD3WriteTestCase)) + theSuite.addTest(common.make_suite(MD5WriteTestCase)) + theSuite.addTest(common.make_suite(MD6WriteTestCase)) + theSuite.addTest(common.make_suite(MD7WriteTestCase)) + theSuite.addTest(common.make_suite(MD10WriteTestCase)) + theSuite.addTest(common.make_suite(CopyIndex6TestCase)) + theSuite.addTest(common.make_suite(CopyIndex7TestCase)) + theSuite.addTest(common.make_suite(CopyIndex8TestCase)) + theSuite.addTest(common.make_suite(CopyIndex9TestCase)) + theSuite.addTest(common.make_suite(CopyIndex10TestCase)) + theSuite.addTest(common.make_suite(CopyIndex11TestCase)) + theSuite.addTest(common.make_suite(CopyIndex12TestCase)) + theSuite.addTest(common.make_suite(Rows64bitsTestCase1)) + theSuite.addTest(common.make_suite(Rows64bitsTestCase2)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_enum.py b/venv/Lib/site-packages/tables/tests/test_enum.py new file mode 100644 index 0000000..bb3a2cc --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_enum.py @@ -0,0 +1,745 @@ +"""Test module for enumerated types under PyTables.""" + +import operator +import itertools + +import numpy as np + +import tables as tb +from tables.tests import common + + +class CreateColTestCase(common.PyTablesTestCase): + """Test creating enumerated column descriptions.""" + + def _createCol(self, enum, dflt, base="uint32", shape=()): + """Create and check an enumerated column description.""" + + enumcol = tb.EnumCol(enum, dflt, base=base, shape=shape) + sameEnum = tb.Enum(enum) + self.assertEqual(enumcol.type, "enum") + self.assertEqual(enumcol.dtype.base.name, enumcol.base.type) + # To avoid 'LongInt' vs 'Int' issues + # self.assertEqual(enumcol.dflt, sameEnum[dflt]) + self.assertEqual(int(enumcol.dflt), int(sameEnum[dflt])) + self.assertEqual(enumcol.dtype.shape, shape) + self.assertEqual(enumcol.enum, sameEnum) + + def test00a_validFromEnum(self): + """Describing an enumerated column from an enumeration.""" + + colors = tb.Enum(["red", "green", "blue"]) + self._createCol(colors, "red") + + def test00b_validFromDict(self): + """Describing an enumerated column from a dictionary.""" + + colors = {"red": 4, "green": 2, "blue": 1} + self._createCol(colors, "red") + + def test00c_validFromList(self): + """Describing an enumerated column from a list.""" + + colors = ["red", "green", "blue"] + self._createCol(colors, "red") + + def test00d_invalidFromType(self): + """Describing an enumerated column from an invalid object.""" + + colors = 123 + self.assertRaises(TypeError, self._createCol, colors, "red") + + def test01_invalidDflt(self): + """Describing an enumerated column with an invalid default object.""" + + colors = {"red": 4, "green": 2, "blue": 1} + self.assertRaises(KeyError, self._createCol, colors, "black") + + def test02a_validDtypeBroader(self): + """Describing an enumerated column with a broader type.""" + + colors = {"red": 4, "green": 2, "blue": 1} + self._createCol(colors, "red", "int64") + + def test02b_invalidDtypeTooNarrow(self): + """Describing an enumerated column with a too narrow type.""" + + colors = ["e%d" % i for i in range(300)] + self.assertRaises(TypeError, self._createCol, colors, "e0", "uint8") + + def test03a_validShapeMD(self): + """Describing an enumerated column with multidimensional shape.""" + + colors = ["red", "green", "blue"] + self._createCol(colors, "red", shape=(2,)) + + def test04a_validReprEnum(self): + """Checking the string representation of an enumeration.""" + + colors = tb.Enum(["red", "green", "blue"]) + enumcol = tb.EnumCol(colors, "red", base="uint32", shape=()) + + # needed due to "Hash randomization" (default on python 3.3) + template = ( + "EnumCol(enum=Enum({%s}), dflt='red', base=UInt32Atom(shape=(), " + f"dflt={np.uint32(0)!r}), shape=(), pos=None)" + ) + permitations = [ + template % ", ".join(items) + for items in itertools.permutations( + ("'blue': 2", "'green': 1", "'red': 0") + ) + ] + self.assertIn(repr(enumcol), permitations) + + def test99a_nonIntEnum(self): + """Describing an enumerated column of floats (not implemented).""" + + colors = {"red": 1.0} + self.assertRaises( + NotImplementedError, + self._createCol, + colors, + "red", + base=tb.FloatAtom(), + ) + + def test99b_nonIntDtype(self): + """Describing an enumerated column encoded as floats. + + (not implemented). + + """ + + colors = ["red", "green", "blue"] + self.assertRaises( + NotImplementedError, self._createCol, colors, "red", "float64" + ) + + def test99b_nonScalarEnum(self): + """Describing an enumerated column of non-scalars (not implemented).""" + + colors = {"red": (1, 2, 3)} + self.assertRaises( + NotImplementedError, + self._createCol, + colors, + "red", + base=tb.IntAtom(shape=3), + ) + + +class CreateAtomTestCase(common.PyTablesTestCase): + """Test creating enumerated atoms.""" + + def _createAtom(self, enum, dflt, base="uint32", shape=()): + """Create and check an enumerated atom.""" + + enumatom = tb.EnumAtom(enum, dflt, base=base, shape=shape) + sameEnum = tb.Enum(enum) + self.assertEqual(enumatom.type, "enum") + self.assertEqual(enumatom.dtype.base.name, enumatom.base.type) + self.assertEqual(enumatom.shape, shape) + self.assertEqual(enumatom.enum, sameEnum) + + def test00a_validFromEnum(self): + """Describing an enumerated atom from an enumeration.""" + + colors = tb.Enum(["red", "green", "blue"]) + self._createAtom(colors, "red") + + def test00b_validFromDict(self): + """Describing an enumerated atom from a dictionary.""" + + colors = {"red": 4, "green": 2, "blue": 1} + self._createAtom(colors, "red") + + def test00c_validFromList(self): + """Describing an enumerated atom from a list.""" + + colors = ["red", "green", "blue"] + self._createAtom(colors, "red") + + def test00d_invalidFromType(self): + """Describing an enumerated atom from an invalid object.""" + + colors = 123 + self.assertRaises(TypeError, self._createAtom, colors, "red") + + def test02a_validDtypeBroader(self): + """Describing an enumerated atom with a broader type.""" + + colors = {"red": 4, "green": 2, "blue": 1} + self._createAtom(colors, "red", base="int64") + + def test02b_invalidDtypeTooNarrow(self): + """Describing an enumerated atom with a too narrow type.""" + + colors = ["e%d" % i for i in range(300)] + self.assertRaises(TypeError, self._createAtom, colors, "red", "uint8") + + def test03a_validShapeMD(self): + """Describing an enumerated atom with multidimensional shape.""" + + colors = ["red", "green", "blue"] + self._createAtom(colors, "red", shape=(2,)) + + def test99a_nonIntEnum(self): + """Describing an enumerated atom of floats (not implemented).""" + + colors = {"red": 1.0} + self.assertRaises( + NotImplementedError, + self._createAtom, + colors, + "red", + base=tb.FloatAtom(), + ) + + def test99b_nonIntDtype(self): + """Describing an enumerated atom encoded as a float. + + (not implemented). + + """ + + colors = ["red", "green", "blue"] + self.assertRaises( + NotImplementedError, self._createAtom, colors, "red", "float64" + ) + + def test99b_nonScalarEnum(self): + """Describing an enumerated atom of non-scalars (not implemented).""" + + colors = {"red": (1, 2, 3)} + self.assertRaises( + NotImplementedError, + self._createAtom, + colors, + "red", + base=tb.IntAtom(shape=3), + ) + + +class EnumTableTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test tables with enumerated columns.""" + + enum = tb.Enum({"red": 4, "green": 2, "blue": 1, "black": 0}) + defaultName = "black" + valueInEnum = enum.red + valueOutOfEnum = 1234 + enumType = "uint16" + + def _description(self, shape=()): + class TestDescription(tb.IsDescription): + rid = tb.IntCol(pos=0) + rcolor = tb.EnumCol( + self.enum, + self.defaultName, + base=self.enumType, + shape=shape, + pos=1, + ) + + return TestDescription + + def test00a_reopen(self): + """Reopening a file with tables using enumerated data.""" + + self.h5file.create_table( + "/", "test", self._description(), title=self._getMethodName() + ) + + self._reopen() + + self.assertEqual( + self.h5file.root.test.get_enum("rcolor"), + self.enum, + "Enumerated type was not restored correctly from disk.", + ) + + def test00b_reopenMD(self): + """Reopening a file with tables using enumerated multi-dimensional + data.""" + + self.h5file.create_table( + "/", "test", self._description((2,)), title=self._getMethodName() + ) + + self._reopen() + + self.assertEqual( + self.h5file.root.test.get_enum("rcolor"), + self.enum, + "Enumerated type was not restored correctly from disk.", + ) + + def test01_rowAppend(self): + """Appending enumerated values using ``row.append()``.""" + + tbl = self.h5file.create_table( + "/", "test", self._description(), title=self._getMethodName() + ) + + appended = [(10, self.valueInEnum), (20, self.valueOutOfEnum)] + + row = tbl.row + + row["rid"] = appended[0][0] + row["rcolor"] = appended[0][1] + row.append() + + row["rid"] = appended[1][0] + self.assertRaises( + ValueError, operator.setitem, row, "rcolor", appended[1][1] + ) + + tbl.flush() + tbl.flavor = "python" + read = tbl.read() + common.verbosePrint( + "* appended value: %s\n" + "* read value: %s\n" % (appended[:-1], read) + ) + self.assertEqual( + appended[:-1], read, "Written and read values differ." + ) + + def test02_append(self): + """Appending enumerated values using ``table.append()``.""" + + tbl = self.h5file.create_table( + "/", "test", self._description(), title=self._getMethodName() + ) + + appended = [(10, self.valueInEnum), (20, self.valueOutOfEnum)] + + tbl.append(appended) + tbl.flush() + tbl.flavor = "python" + read = tbl.read() + common.verbosePrint( + "* appended value: %s\n" "* read value: %s\n" % (appended, read) + ) + self.assertEqual(appended, read, "Written and read values differ.") + + def test03_setitem(self): + """Changing enumerated values using ``table.__setitem__()``.""" + + tbl = self.h5file.create_table( + "/", "test", self._description(), title=self._getMethodName() + ) + + appended = [(10, self.valueInEnum), (20, self.valueInEnum)] + tbl.append(appended) + + written = [(10, self.valueInEnum), (20, self.valueOutOfEnum)] + tbl[:] = written + tbl.flavor = "python" + read = tbl.read() + common.verbosePrint( + "* written value: %s\n" "* read value: %s\n" % (written, read) + ) + self.assertEqual(written, read, "Written and read values differ.") + + def test04_multidim(self): + """Appending multi-dimensional enumerated data.""" + + tbl = self.h5file.create_table( + "/", "test", self._description((2,)), title=self._getMethodName() + ) + + appended = [ + (10, (self.valueInEnum, self.valueOutOfEnum)), + (20, (self.valueInEnum, self.valueOutOfEnum)), + ] + + row = tbl.row + row["rid"] = appended[0][0] + self.assertRaises( + ValueError, operator.setitem, row, "rcolor", appended[0][1] + ) + + tbl.append(appended) + tbl.flush() + tbl.flavor = "python" + read = tbl.read() + for x_appended, x_read in zip(appended, read): + self.assertEqual( + x_appended[0], x_read[0], "Written and read values differ." + ) + self.assertEqual( + x_appended[1][0], + x_read[1][0], + "Written and read values differ.", + ) + self.assertEqual( + x_appended[1][1], + x_read[1][1], + "Written and read values differ.", + ) + + def test05_where(self): + """Searching enumerated data.""" + + tbl = self.h5file.create_table( + "/", "test", self._description(), title=self._getMethodName() + ) + + appended = [ + (10, self.valueInEnum), + (20, self.valueInEnum), + (30, self.valueOutOfEnum), + ] + tbl.append(appended) + tbl.flush() + + searched = [ + (row["rid"], row["rcolor"]) + for row in tbl.where("rcolor == v", {"v": self.valueInEnum}) + ] + common.verbosePrint( + "* ``valueInEnum``: %s\n" + "* ``rcolor`` column: ``%s``\n" + "* ``searched``: %s\n" + "* Should look like: %s\n" + % (self.valueInEnum, tbl.cols.rcolor, searched, appended[:-1]) + ) + self.assertEqual( + searched, appended[:-1], "Search returned incorrect results." + ) + + +class EnumEArrayTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test extendable arrays of enumerated values.""" + + enum = tb.Enum({"red": 4, "green": 2, "blue": 1, "black": 0}) + valueInEnum = enum.red + valueOutOfEnum = 1234 + enumType = "uint16" + + def _atom(self, shape=()): + return tb.EnumAtom(self.enum, "red", base=self.enumType, shape=shape) + + def test00a_reopen(self): + """Reopening a file with extendable arrays using enumerated data.""" + + self.h5file.create_earray( + "/", "test", self._atom(), shape=(0,), title=self._getMethodName() + ) + self.h5file.root.test.flavor = "python" + + self._reopen() + + self.assertEqual( + self.h5file.root.test.get_enum(), + self.enum, + "Enumerated type was not restored correctly from disk.", + ) + + def test00b_reopenMD(self): + """Reopening a file with extendable arrays using enumerated + multi-dimensional data.""" + + self.h5file.create_earray( + "/", + "test", + self._atom(), + shape=(0, 2), + title=self._getMethodName(), + ) + self.h5file.root.test.flavor = "python" + + self._reopen() + + self.assertEqual( + self.h5file.root.test.get_enum(), + self.enum, + "Enumerated type was not restored correctly from disk.", + ) + + def test_enum_default_persistence_red(self): + dflt = "red" + atom = tb.EnumAtom(self.enum, dflt, base=self.enumType, shape=()) + + self.h5file.create_earray( + "/", "test", atom, shape=(0,), title=self._getMethodName() + ) + self._reopen() + + self.assertEqual( + self.h5file.root.test.get_enum(), + self.enum, + "Enumerated type was not restored correctly from disk.", + ) + + self.assertEqual( + self.h5file.root.test.atom.dflt, + self.enum[dflt], + "The default value of enumerated type was not restored correctly " + "from disk.", + ) + + def test_enum_default_persistence_green(self): + dflt = "green" + atom = tb.EnumAtom(self.enum, dflt, base=self.enumType, shape=()) + + self.h5file.create_earray( + "/", "test", atom, shape=(0,), title=self._getMethodName() + ) + self._reopen() + + self.assertEqual( + self.h5file.root.test.get_enum(), + self.enum, + "Enumerated type was not restored correctly from disk.", + ) + + self.assertEqual( + self.h5file.root.test.atom.dflt, + self.enum[dflt], + "The default value of enumerated type was not restored correctly " + "from disk.", + ) + + def test_enum_default_persistence_blue(self): + dflt = "blue" + atom = tb.EnumAtom(self.enum, dflt, base=self.enumType, shape=()) + + self.h5file.create_earray( + "/", "test", atom, shape=(0,), title=self._getMethodName() + ) + self._reopen() + + self.assertEqual( + self.h5file.root.test.get_enum(), + self.enum, + "Enumerated type was not restored correctly from disk.", + ) + + self.assertEqual( + self.h5file.root.test.atom.dflt, + self.enum[dflt], + "The default value of enumerated type was not restored correctly " + "from disk.", + ) + + def test_enum_default_persistence_black(self): + dflt = "black" + atom = tb.EnumAtom(self.enum, dflt, base=self.enumType, shape=()) + + self.h5file.create_earray( + "/", "test", atom, shape=(0,), title=self._getMethodName() + ) + self._reopen() + + self.assertEqual( + self.h5file.root.test.get_enum(), + self.enum, + "Enumerated type was not restored correctly from disk.", + ) + + self.assertEqual( + self.h5file.root.test.atom.dflt, + self.enum[dflt], + "The default value of enumerated type was not restored correctly " + "from disk.", + ) + + def test01_append(self): + """Appending scalar elements of enumerated values.""" + + earr = self.h5file.create_earray( + "/", "test", self._atom(), shape=(0,), title=self._getMethodName() + ) + earr.flavor = "python" + + appended = [self.valueInEnum, self.valueOutOfEnum] + + earr.append(appended) + earr.flush() + read = earr.read() + self.assertEqual(appended, read, "Written and read values differ.") + + def test02_appendMD(self): + """Appending multi-dimensional elements of enumerated values.""" + + earr = self.h5file.create_earray( + "/", + "test", + self._atom(), + shape=(0, 2), + title=self._getMethodName(), + ) + earr.flavor = "python" + + appended = [ + [self.valueInEnum, self.valueOutOfEnum], + [self.valueInEnum, self.valueOutOfEnum], + ] + + earr.append(appended) + earr.flush() + read = earr.read() + self.assertEqual(appended, read, "Written and read values differ.") + + def test03_setitem(self): + """Changing enumerated values using ``earray.__setitem__()``.""" + + earr = self.h5file.create_earray( + "/", "test", self._atom(), shape=(0,), title=self._getMethodName() + ) + earr.flavor = "python" + + appended = (self.valueInEnum, self.valueInEnum) + earr.append(appended) + + written = [self.valueInEnum, self.valueOutOfEnum] + earr[:] = written + read = earr.read() + self.assertEqual(written, read, "Written and read values differ.") + + +class EnumVLArrayTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test variable-length arrays of enumerated values.""" + + enum = tb.Enum({"red": 4, "green": 2, "blue": 1, "black": 0}) + valueInEnum = enum.red + valueOutOfEnum = 1234 + enumType = "uint16" + + def _atom(self, shape=()): + return tb.EnumAtom(self.enum, "red", base=self.enumType, shape=shape) + + def test00a_reopen(self): + """Reopening a file with variable-length arrays using + enumerated data.""" + + self.h5file.create_vlarray( + "/", "test", self._atom(), title=self._getMethodName() + ) + self.h5file.root.test.flavor = "python" + + self._reopen() + + self.assertEqual( + self.h5file.root.test.get_enum(), + self.enum, + "Enumerated type was not restored correctly from disk.", + ) + + def test00b_reopenMD(self): + """Reopening a file with variable-length arrays using enumerated + multi-dimensional data.""" + + self.h5file.create_vlarray( + "/", "test", self._atom((2,)), title=self._getMethodName() + ) + self.h5file.root.test.flavor = "python" + + self._reopen() + + self.assertEqual( + self.h5file.root.test.get_enum(), + self.enum, + "Enumerated type was not restored correctly from disk.", + ) + + def test01_append(self): + """Appending scalar elements of enumerated values.""" + + vlarr = self.h5file.create_vlarray( + "/", "test", self._atom(), title=self._getMethodName() + ) + vlarr.flavor = "python" + + appended = [ + [ + self.valueInEnum, + ], + [self.valueInEnum, self.valueOutOfEnum], + ] + + vlarr.append(appended[0]) + vlarr.append(appended[1]) + vlarr.flush() + read = vlarr.read() + common.verbosePrint( + "* appended value: %s\n" "* read value: %s\n" % (appended, read) + ) + self.assertEqual(appended, read, "Written and read values differ.") + + def test02_appendMD(self): + """Appending multi-dimensional elements of enumerated values.""" + + vlarr = self.h5file.create_vlarray( + "/", "test", self._atom((2,)), title=self._getMethodName() + ) + vlarr.flavor = "python" + + appended = [ + [ + [self.valueInEnum, self.valueInEnum], + ], + [ + [self.valueInEnum, self.valueOutOfEnum], + [self.valueInEnum, self.valueInEnum], + ], + ] + + vlarr.append(appended[0]) + vlarr.append(appended[1]) + vlarr.flush() + read = vlarr.read() + common.verbosePrint( + "* appended value: %s\n" "* read value: %s\n" % (appended, read) + ) + self.assertEqual(appended, read, "Written and read values differ.") + + def test03_setitem(self): + """Changing enumerated values using ``vlarray.__setitem__()``.""" + + vlarr = self.h5file.create_vlarray( + "/", "test", self._atom(), title=self._getMethodName() + ) + vlarr.flavor = "python" + + appended = (self.valueInEnum, self.valueInEnum) + vlarr.append(appended) + + written = [self.valueInEnum, self.valueOutOfEnum] + vlarr[0] = written + read = vlarr.read() + common.verbosePrint( + "* written value: %s\n" "* read value: %s\n" % (written, read) + ) + self.assertEqual(written, read[0], "Written and read values differ.") + + +def suite(): + """Return a test suite consisting of all the test cases in the module.""" + + # These two are for including Enum's doctests here. + import doctest + + theSuite = common.unittest.TestSuite() + niter = 1 + + # theSuite.addTest(make_suite(EnumTableTestCase)) + for i in range(niter): + theSuite.addTest(doctest.DocTestSuite(tb.misc.enum)) + theSuite.addTest(common.make_suite(CreateColTestCase)) + theSuite.addTest(common.make_suite(CreateAtomTestCase)) + theSuite.addTest(common.make_suite(EnumTableTestCase)) + theSuite.addTest(common.make_suite(EnumEArrayTestCase)) + theSuite.addTest(common.make_suite(EnumVLArrayTestCase)) + + return theSuite + + +if __name__ == "__main__": + import sys + + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_expression.py b/venv/Lib/site-packages/tables/tests/test_expression.py new file mode 100644 index 0000000..b9103b2 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_expression.py @@ -0,0 +1,1742 @@ +"""Test module for evaluating expressions under PyTables.""" + +import numpy as np +from numpy import testing as npt + +import tables as tb +from tables.tests import common + +# An example of record + + +class Record(tb.IsDescription): + colInt32 = tb.Int32Col() + colInt64 = tb.Int64Col() + colFloat32 = tb.Float32Col() + colFloat64 = tb.Float64Col() + colComplex = tb.ComplexCol(itemsize=16) + + +# Helper functions +def get_sliced_vars(npvars, start, stop, step): + npvars_ = {} + for name, var in npvars.items(): + if hasattr(var, "__len__"): + npvars_[name] = var[start:stop:step] + else: + npvars_[name] = var + return npvars_ + + +def get_sliced_vars2(npvars, start, stop, step, shape, maindim): + npvars_ = {} + slices = [slice(None) for dim in shape] + slices[maindim] = slice(start, stop, step) + for name, var in npvars.items(): + npvars_[name] = var.__getitem__(tuple(slices)) + return npvars_ + + +# Basic tests +class ExprTestCase(common.TempFileMixin, common.PyTablesTestCase): + + # The shape for the variables in expressions + shape = (10, 20) + + def setUp(self): + super().setUp() + + # The expression + self.expr = "2 * a*b + c" + # Define the NumPy variables to be used in expression + N = np.prod(self.shape) + self.a = a = np.arange(0, N, dtype="int32").reshape(self.shape) + self.b = b = np.arange(N, 2 * N, dtype="int64").reshape(self.shape) + self.c = c = np.arange(2 * N, 3 * N, dtype="int32").reshape(self.shape) + self.r1 = r1 = np.empty(N, dtype="int64").reshape(self.shape) + self.npvars = { + "a": a, + "b": b, + "c": c, + } + # Define other variables, if needed + root = self.h5file.root + if self.kind == "Array": + self.a = self.h5file.create_array(root, "a", a) + self.b = self.h5file.create_array(root, "b", b) + self.c = self.h5file.create_array(root, "c", c) + self.r1 = self.h5file.create_array(root, "r1", r1) + elif self.kind == "CArray": + self.a = self.h5file.create_carray( + root, "a", atom=tb.Atom.from_dtype(a.dtype), shape=self.shape + ) + self.b = self.h5file.create_carray( + root, "b", atom=tb.Atom.from_dtype(b.dtype), shape=self.shape + ) + self.c = self.h5file.create_carray( + root, "c", atom=tb.Atom.from_dtype(c.dtype), shape=self.shape + ) + self.r1 = self.h5file.create_carray( + root, "r1", atom=tb.Atom.from_dtype(r1.dtype), shape=self.shape + ) + self.a[:] = a + self.b[:] = b + self.c[:] = c + elif self.kind == "EArray": + shape = list(self.shape) + shape[0] = 0 + self.a = self.h5file.create_earray( + root, "a", atom=tb.Atom.from_dtype(a.dtype), shape=shape + ) + self.b = self.h5file.create_earray( + root, "b", atom=tb.Atom.from_dtype(b.dtype), shape=shape + ) + self.c = self.h5file.create_earray( + root, "c", atom=tb.Atom.from_dtype(c.dtype), shape=shape + ) + self.r1 = self.h5file.create_earray( + root, "r1", atom=tb.Atom.from_dtype(r1.dtype), shape=shape + ) + self.a.append(a) + self.b.append(b) + self.c.append(c) + self.r1.append(r1) # Fill with uninitialized values + elif self.kind == "Column": + ra = np.rec.fromarrays( + [a, b, c, r1], + dtype="%si4,%si8,%si4,%si8" % ((self.shape[1:],) * 4), + ) + t = self.h5file.create_table(root, "t", ra) + self.a = t.cols.f0 + self.b = t.cols.f1 + self.c = t.cols.f2 + self.d = t.cols.f3 + self.vars = { + "a": self.a, + "b": self.b, + "c": self.c, + } + + def test00_simple(self): + """Checking that expression is correctly evaluated.""" + + expr = tb.Expr(self.expr, self.vars) + r1 = expr.eval() + r2 = eval(self.expr, self.npvars) + if common.verbose: + print("Computed expression:", repr(r1)) + print("Should look like:", repr(r2)) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test01_out(self): + """Checking that expression is correctly evaluated (`out` param)""" + + expr = tb.Expr(self.expr, self.vars) + expr.set_output(self.r1) + r1 = expr.eval() + if self.kind != "NumPy": + r1 = r1[:] + r2 = eval(self.expr, self.npvars) + if common.verbose: + print("Computed expression:", repr(r1)) + print("Should look like:", repr(r2)) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test02_out(self): + """Checking that expression is correctly evaluated when slice is + outside of data samples (`out` param)""" + expr = tb.Expr(self.expr, self.vars) + # maybe it's better to use the leading dimension instead? + maxshape = max(self.shape) + start, stop, step = (maxshape + 1, maxshape + 2, None) + expr.set_inputs_range(start, stop, step) + r1 = expr.eval() + # create an empty array with the same dtype and shape + zeros = np.zeros(shape=self.shape, dtype=r1.dtype) + r2 = zeros[start:stop:step] + self.assertListEqual(r1.tolist(), r2.tolist()) + if common.verbose: + print("Computed expression:", repr(r1)) + print("Should look like:", repr(r2)) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + +class ExprNumPy(ExprTestCase): + kind = "NumPy" + + +class ExprArray(ExprTestCase): + kind = "Array" + + +class ExprCArray(ExprTestCase): + kind = "CArray" + + +class ExprEArray(ExprTestCase): + kind = "EArray" + + +class ExprColumn(ExprTestCase): + kind = "Column" + + +# Test for mixed containers +class MixedContainersTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + + # The expression + self.expr = "2 * a*b + c**2+d**2+e-f+g" + + # Create a directory in file for outputs + root = self.h5file.root + outs = self.h5file.create_group(root, "outs") + + # Define the NumPy variables to be used in expression + N = np.prod(self.shape) + + # Initial values for variables + a = np.arange(0, N, dtype="int32").reshape(self.shape) + b = np.arange(N, 2 * N, dtype="int64").reshape(self.shape) + c = np.arange(2 * N, 3 * N, dtype="int32").reshape(self.shape) + d = np.arange(3 * N, 4 * N, dtype="int32").reshape(self.shape) + e = np.arange(4 * N, 5 * N, dtype="int32").reshape(self.shape) + self.f = f = int(3) # a regular python type + self.g = g = np.int16(2) # a NumPy scalar type + + # Original values + self.npvars = {"a": a, "b": b, "c": c, "d": d, "e": e, "f": f, "g": g} + rnda = b.copy() + + # ndarray input and output + self.a = a + self.rnda = rnda + + # Array input and output + self.b = self.h5file.create_array(root, "b", b) + self.rarr = self.b.copy(outs) + + # CArray input and output + self.c = self.h5file.create_carray( + root, "c", atom=tb.Atom.from_dtype(c.dtype), shape=self.shape + ) + self.c[:] = c + self.rcarr = self.c.copy(outs) + + # EArray input and output + eshape = list(self.shape) + eshape[0] = 0 + self.d = self.h5file.create_earray( + root, "d", atom=tb.Atom.from_dtype(d.dtype), shape=eshape + ) + self.d.append(d) + self.rearr = self.d.copy(outs) + + # Column input and output + rtype = {} + colshape = self.shape[1:] + for i, col in enumerate((a, b, c, d, e, rnda)): + rtype["f%d" % i] = tb.Col.from_sctype(col.dtype.type, colshape) + t = self.h5file.create_table(root, "t", rtype) + nrows = self.shape[0] + row = t.row + for nrow in range(nrows): + for i, col in enumerate((a, b, c, d, e, rnda)): + row["f%d" % i] = col[nrow] + row.append() + t.flush() + self.e = t.cols.f4 + self.rcol = t.cols.f5 + # Input vars + self.vars = { + "a": self.a, + "b": self.b, + "c": self.c, + "d": self.d, + "e": self.e, + "f": self.f, + "g": self.g, + } + + def test00a_simple(self): + """Checking expressions with mixed objects.""" + + expr = tb.Expr(self.expr, self.vars) + r1 = expr.eval() + r2 = eval(self.expr, self.npvars) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test00b_simple_scalars(self): + """Checking that scalars in expression evaluate correctly.""" + + expr_str = "2 * f + g" + expr = tb.Expr(expr_str, self.vars) + r1 = expr.eval() + r2 = eval(expr_str, self.npvars) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + msg = f"Evaluate is returning a wrong value: {expr_str}\n{r1=}\n{r2=}" + self.assertEqual(r1.shape, r2.shape, msg=msg) + # In something like 2 * np.in16(3) + np.int16(2) the result is still a + # np.int16 in NumPy 2.0, so we shouldn't actually check dtype but just + # the kind + self.assertEqual(r1.dtype.kind, r2.dtype.kind, msg=msg) + self.assertEqual(r1, r2, msg=msg) + + def test01a_out(self): + """Checking expressions with mixed objects (`out` param)""" + + expr = tb.Expr(self.expr, self.vars) + for r1 in self.rnda, self.rarr, self.rcarr, self.rearr, self.rcol: + if common.verbose: + print("Checking output container:", type(r1)) + expr.set_output(r1) + r1 = expr.eval() + if not isinstance(r1, type(self.rnda)): + r1 = r1[:] + r2 = eval(self.expr, self.npvars) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test01b_out_scalars(self): + """Checking expressions with mixed objects (`out` param, scalars)""" + + if len(self.shape) > 1: + # This test is only meant for undimensional outputs + return + expr_str = "2 * f + g" + expr = tb.Expr(expr_str, self.vars) + for r1 in self.rnda, self.rarr, self.rcarr, self.rearr, self.rcol: + if common.verbose: + print("Checking output container:", type(r1)) + expr.set_output(r1) + r1 = expr.eval() + r1 = r1[()] # convert a 0-dim array into a scalar + r2 = eval(expr_str, self.npvars) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + msg = ( + f"Evaluate is returning a wrong value: " + f"{expr_str}\n{r1=}\n{r2=}" + ) + # On NumPy 2 type promotion is different so don't check type + # strictly here + self.assertTrue( + common.areArraysEqual(r1, r2, check_type=False), msg=msg + ) + self.assertEqual(r1.dtype.kind, r2.dtype.kind) + + def test02a_sss(self): + """Checking mixed objects and start, stop, step (I)""" + + start, stop, step = (self.start, self.stop, 1) + expr = tb.Expr(self.expr, self.vars) + expr.set_inputs_range(start, stop, step) + r1 = expr.eval() + npvars = get_sliced_vars(self.npvars, start, stop, step) + r2 = eval(self.expr, npvars) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test02b_sss(self): + """Checking mixed objects and start, stop, step (II)""" + + start, stop, step = (0, self.shape[0], self.step) + expr = tb.Expr(self.expr, self.vars) + expr.set_inputs_range(start, stop, step) + r1 = expr.eval() + npvars = get_sliced_vars(self.npvars, start, stop, step) + r2 = eval(self.expr, npvars) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test02c_sss(self): + """Checking mixed objects and start, stop, step (III)""" + + start, stop, step = (self.start, self.stop, self.step) + expr = tb.Expr(self.expr, self.vars) + expr.set_inputs_range(start, stop, step) + r1 = expr.eval() + npvars = get_sliced_vars(self.npvars, start, stop, step) + r2 = eval(self.expr, npvars) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test03_sss(self): + """Checking start, stop, step as numpy.int64.""" + + start, stop, step = ( + np.int64(i) for i in (self.start, self.stop, self.step) + ) + expr = tb.Expr(self.expr, self.vars) + expr.set_inputs_range(start, stop, step) + r1 = expr.eval() + npvars = get_sliced_vars(self.npvars, start, stop, step) + r2 = eval(self.expr, npvars) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + +class MixedContainers0(MixedContainersTestCase): + shape = (1,) + start, stop, step = (0, 1, 1) + + +class MixedContainers1(MixedContainersTestCase): + shape = (10,) + start, stop, step = (3, 6, 2) + + +class MixedContainers2(MixedContainersTestCase): + shape = (10, 5) + start, stop, step = (2, 9, 3) + + +class MixedContainers3(MixedContainersTestCase): + shape = (10, 3, 2) + start, stop, step = (2, -1, 1) + + +# Test for unaligned objects +class UnalignedObject(common.PyTablesTestCase): + + def test00_simple(self): + """Checking expressions with unaligned objects.""" + + # Build unaligned arrays + a0 = np.empty(10, dtype="int8") + a1 = np.arange(10, dtype="int32") + a2 = a1.copy() + a3 = a2.copy() + ra = np.rec.fromarrays([a0, a1, a2, a3]) + # The inputs + a = ra["f1"] + b = ra["f2"] + self.assertEqual(a.flags.aligned, False) + self.assertEqual(b.flags.aligned, False) + # The expression + sexpr = "2 * a + b" + expr = tb.Expr(sexpr) + r1 = expr.eval() + r2 = eval(sexpr) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test01_md(self): + """Checking expressions with unaligned objects (MD version)""" + + # Build unaligned arrays + a0 = np.empty((10, 4), dtype="int8") + a1 = np.arange(10 * 4, dtype="int32").reshape(10, 4) + a2 = a1.copy() + a3 = a2.copy() + ra = np.rec.fromarrays([a0, a1, a2, a3]) + # The inputs + a = ra["f1"] + b = ra["f2"] + self.assertEqual(a.flags.aligned, False) + self.assertEqual(b.flags.aligned, False) + # The expression + sexpr = "2 * a + b" + expr = tb.Expr(sexpr) + r1 = expr.eval() + r2 = eval(sexpr) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + +# Test for non-contiguous objects +class NonContiguousObject(common.PyTablesTestCase): + + def test00_simple(self): + """Checking expressions with non-contiguous objects""" + + # Build non-contiguous arrays as inputs + a = np.arange(10, dtype="int32") + b = a[::2] + a = b * 2 + self.assertEqual(b.flags.contiguous, False) + self.assertEqual(b.flags.aligned, True) + # The expression + sexpr = "2 * a + b" + expr = tb.Expr(sexpr) + r1 = expr.eval() + r2 = eval(sexpr) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test01a_md(self): + """Checking expressions with non-contiguous objects (MD version, I)""" + + # Build non-contiguous arrays + a = np.arange(10 * 4, dtype="int32").reshape(10, 4) + b = a[::2] + a = b * 2 + self.assertEqual(b.flags.contiguous, False) + self.assertEqual(b.flags.aligned, True) + # The expression + sexpr = "2 * a + b" + expr = tb.Expr(sexpr) + r1 = expr.eval() + r2 = eval(sexpr) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test01b_md(self): + """Checking expressions with non-contiguous objects (MD version, II)""" + + # Build non-contiguous arrays + a = np.arange(10 * 4, dtype="int32").reshape(10, 4) + b = a[:, ::2] + a = b * 2 + self.assertEqual(b.flags.contiguous, False) + self.assertEqual(b.flags.aligned, True) + # The expression + sexpr = "2 * a + b" + expr = tb.Expr(sexpr) + r1 = expr.eval() + r2 = eval(sexpr) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + +# Test for errors +class ExprError(common.TempFileMixin, common.PyTablesTestCase): + + # The shape for the variables in expressions + shape = (10,) + + def setUp(self): + super().setUp() + + # Define the NumPy variables to be used in expression + N = np.prod(self.shape) + self.a = np.arange(N, dtype="int32").reshape(self.shape) + self.b = np.arange(N, dtype="int64").reshape(self.shape) + self.c = np.arange(N, dtype="int32").reshape(self.shape) + self.r1 = np.empty(N, dtype="int64").reshape(self.shape) + + def _test00_shape(self): + """Checking that inconsistent shapes are detected.""" + + self.b = self.b.reshape(self.shape + (1,)) + expr = "a * b + c" + vars_ = { + "a": self.a, + "b": self.b, + "c": self.c, + } + expr = tb.Expr(expr, vars_) + self.assertRaises(ValueError, expr.eval) + + def test02_uint64(self): + """Checking that uint64 arrays in expression are detected.""" + + self.b = self.b.view("uint64") + expr = "a * b + c" + vars_ = { + "a": self.a, + "b": self.b, + "c": self.c, + } + self.assertRaises(NotImplementedError, tb.Expr, expr, vars_) + + def test03_table(self): + """Checking that tables in expression are detected.""" + + class Rec(tb.IsDescription): + col1 = tb.Int32Col() + col2 = tb.Int64Col() + + t = self.h5file.create_table("/", "a", Rec) + expr = "a * b + c" + vars_ = { + "a": t, + "b": self.b, + "c": self.c, + } + self.assertRaises(TypeError, tb.Expr, expr, vars_) + + def test04_nestedcols(self): + """Checking that nested cols in expression are detected.""" + + class Nested(tb.IsDescription): + col1 = tb.Int32Col() + + class col2(tb.IsDescription): + col3 = tb.Int64Col() + + t = self.h5file.create_table("/", "a", Nested) + expr = "a * b + c" + # The next non-nested column should work + a = t.cols.col2.col3 + vars_ = { + "a": a, + "b": self.b, + "c": self.c, + } + expr = tb.Expr(expr, vars_) + r1 = expr.eval() + self.assertIsNotNone(r1) + # But a nested column should not + a = t.cols.col2 + vars_ = { + "a": a, + "b": self.b, + "c": self.c, + } + self.assertRaises(TypeError, tb.Expr, expr, vars_) + + def test05_vlarray(self): + """Checking that VLArrays in expression are detected.""" + + vla = self.h5file.create_vlarray("/", "a", tb.Int32Col()) + expr = "a * b + c" + vars_ = { + "a": vla, + "b": self.b, + "c": self.c, + } + self.assertRaises(TypeError, tb.Expr, expr, vars_) + + +# Test for broadcasting arrays +class BroadcastTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00_simple(self): + """Checking broadcast in expression.""" + + shapes = (self.shape1, self.shape2, self.shape3) + # Build arrays with different shapes as inputs + a = np.arange(np.prod(shapes[0]), dtype="i4").reshape(shapes[0]) + b = np.arange(np.prod(shapes[1]), dtype="i4").reshape(shapes[1]) + c = np.arange(np.prod(shapes[2]), dtype="i4").reshape(shapes[2]) + root = self.h5file.root + if a.shape[0] > 0: + a1 = self.h5file.create_array(root, "a1", a) + else: + a1 = self.h5file.create_earray( + root, "a1", atom=tb.Int32Col(), shape=a.shape + ) + self.assertIsNotNone(a1) + b1 = self.h5file.create_array(root, "b1", b) + self.assertIsNotNone(b1) + c1 = self.h5file.create_array(root, "c1", c) + self.assertIsNotNone(c1) + # The expression + expr = tb.Expr("2 * a1 + b1-c1") + r1 = expr.eval() + r2 = eval("2 * a + b-c") + if common.verbose: + print("Tested shapes:", self.shape1, self.shape2, self.shape3) + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + +class Broadcast0(BroadcastTestCase): + shape1 = (0, 3, 4) + shape2 = (3, 4) + shape3 = (4,) + + +class Broadcast1(BroadcastTestCase): + shape1 = (2, 3, 4) + shape2 = (3, 4) + shape3 = (4,) + + +class Broadcast2(BroadcastTestCase): + shape1 = ( + 3, + 4, + ) + shape2 = (3, 4) + shape3 = (4,) + + +class Broadcast3(BroadcastTestCase): + shape1 = (4,) + shape2 = (3, 4) + shape3 = (4,) + + +class Broadcast4(BroadcastTestCase): + shape1 = (1,) + shape2 = (3, 4) + shape3 = (4,) + + +class Broadcast5(BroadcastTestCase): + shape1 = (1,) + shape2 = (3, 1) + shape3 = (4,) + + +# Test for different length inputs +class DiffLengthTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00_simple(self): + """Checking different length inputs in expression.""" + + shapes = (list(self.shape1), list(self.shape2), list(self.shape3)) + # Build arrays with different shapes as inputs + a = np.arange(np.prod(shapes[0]), dtype="i4").reshape(shapes[0]) + b = np.arange(np.prod(shapes[1]), dtype="i4").reshape(shapes[1]) + c = np.arange(np.prod(shapes[2]), dtype="i4").reshape(shapes[2]) + # The expression + expr = tb.Expr("2 * a + b-c") + r1 = expr.eval() + # Compute the minimum length for shapes + maxdim = max([len(shape) for shape in shapes]) + minlen = min( + [ + shape[0] + for i, shape in enumerate(shapes) + if len(shape) == maxdim + ] + ) + for i, shape in enumerate(shapes): + if len(shape) == maxdim: + shape[0] = minlen + # Build arrays with the new shapes as inputs + a = np.arange(np.prod(shapes[0]), dtype="i4").reshape(shapes[0]) + self.assertIsNotNone(a) + b = np.arange(np.prod(shapes[1]), dtype="i4").reshape(shapes[1]) + self.assertIsNotNone(b) + c = np.arange(np.prod(shapes[2]), dtype="i4").reshape(shapes[2]) + self.assertIsNotNone(c) + r2 = eval("2 * a + b-c") + if common.verbose: + print("Tested shapes:", self.shape1, self.shape2, self.shape3) + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + +class DiffLength0(DiffLengthTestCase): + shape1 = (0,) + shape2 = (10,) + shape3 = (20,) + + +class DiffLength1(DiffLengthTestCase): + shape1 = (3,) + shape2 = (10,) + shape3 = (20,) + + +class DiffLength2(DiffLengthTestCase): + shape1 = (3, 4) + shape2 = (2, 3, 4) + shape3 = (4, 3, 4) + + +class DiffLength3(DiffLengthTestCase): + shape1 = (1, 3, 4) + shape2 = (2, 3, 4) + shape3 = (4, 3, 4) + + +class DiffLength4(DiffLengthTestCase): + shape1 = (0, 3, 4) + shape2 = (2, 3, 4) + shape3 = (4, 3, 4) + + +# Test for different type inputs +class TypesTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00_bool(self): + """Checking booleans in expression.""" + + # Build arrays with different shapes as inputs + a = np.array([True, False, True]) + b = np.array([False, True, False]) + root = self.h5file.root + a1 = self.h5file.create_array(root, "a1", a) + self.assertIsNotNone(a1) + b1 = self.h5file.create_array(root, "b1", b) + self.assertIsNotNone(b1) + expr = tb.Expr("a | b") + r1 = expr.eval() + r2 = eval("a | b") + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test01_shortint(self): + """Checking int8,uint8,int16,uint16 and int32 in expression.""" + + for dtype in "int8", "uint8", "int16", "uint16", "int32": + if common.verbose: + print("Checking type:", dtype) + # Build arrays with different shapes as inputs + a = np.array([1, 2, 3], dtype) + b = np.array([3, 4, 5], dtype) + root = self.h5file.root + a1 = self.h5file.create_array(root, "a1", a) + b1 = self.h5file.create_array(root, "b1", b) + two = np.int32(2) + self.assertIsInstance(two, np.integer) + expr = tb.Expr("two * a1-b1") + r1 = expr.eval() + a = np.array([1, 2, 3], "int32") + b = np.array([3, 4, 5], "int32") + r2 = eval("two * a-b") + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertEqual(r1.dtype, r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + # Remove created leaves + a1.remove() + b1.remove() + + def test02_longint(self): + """Checking uint32 and int64 in expression.""" + + for dtype in "uint32", "int64": + if common.verbose: + print("Checking type:", dtype) + # Build arrays with different shapes as inputs + a = np.array([1, 2, 3], dtype) + b = np.array([3, 4, 5], dtype) + root = self.h5file.root + a1 = self.h5file.create_array(root, "a1", a) + b1 = self.h5file.create_array(root, "b1", b) + expr = tb.Expr("2 * a1-b1") + r1 = expr.eval() + a = np.array([1, 2, 3], "int64") + b = np.array([3, 4, 5], "int64") + r2 = eval("2 * a-b") + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertEqual(r1.dtype, r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + # Remove created leaves + a1.remove() + b1.remove() + + def test03_float(self): + """Checking float32 and float64 in expression.""" + + for dtype in "float32", "float64": + if common.verbose: + print("Checking type:", dtype) + # Build arrays with different shapes as inputs + a = np.array([1, 2, 3], dtype) + b = np.array([3, 4, 5], dtype) + root = self.h5file.root + a1 = self.h5file.create_array(root, "a1", a) + b1 = self.h5file.create_array(root, "b1", b) + expr = tb.Expr("2 * a1-b1") + r1 = expr.eval() + a = np.array([1, 2, 3], dtype) + b = np.array([3, 4, 5], dtype) + r2 = eval("2 * a-b") + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertEqual(r1.dtype, r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + # Remove created leaves + a1.remove() + b1.remove() + + def test04_complex(self): + """Checking complex64 and complex128 in expression.""" + + for dtype in "complex64", "complex128": + if common.verbose: + print("Checking type:", dtype) + # Build arrays with different shapes as inputs + a = np.array([1, 2j, 3 + 2j], dtype) + b = np.array([3, 4j, 5 + 1j], dtype) + root = self.h5file.root + a1 = self.h5file.create_array(root, "a1", a) + b1 = self.h5file.create_array(root, "b1", b) + expr = tb.Expr("2 * a1-b1") + r1 = expr.eval() + a = np.array([1, 2j, 3 + 2j], "complex128") + b = np.array([3, 4j, 5 + 1j], "complex128") + r2 = eval("2 * a-b") + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertEqual(r1.dtype, r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + # Remove created leaves + a1.remove() + b1.remove() + + def test05_string(self): + """Checking strings in expression.""" + + # Build arrays with different shapes as inputs + a = np.array(["a", "bd", "cd"], "S") + b = np.array(["a", "bdcd", "ccdc"], "S") + root = self.h5file.root + a1 = self.h5file.create_array(root, "a1", a) + self.assertIsNotNone(a1) + b1 = self.h5file.create_array(root, "b1", b) + self.assertIsNotNone(b1) + expr = tb.Expr("(a1 > b'a') | ( b1 > b'b')") + r1 = expr.eval() + r2 = eval("(a > b'a') | ( b > b'b')") + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + +# Test for different functions +class FunctionsTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00_simple(self): + """Checking some math functions in expression.""" + + # Build arrays with different shapes as inputs + a = np.array([0.1, 0.2, 0.3]) + b = np.array([0.3, 0.4, 0.5]) + root = self.h5file.root + a1 = self.h5file.create_array(root, "a1", a) + self.assertIsNotNone(a1) + b1 = self.h5file.create_array(root, "b1", b) + self.assertIsNotNone(b1) + # The expression + expr = tb.Expr("sin(a1) * sqrt(b1)") + r1 = expr.eval() + r2 = np.sin(a) * np.sqrt(b) + if common.verbose: + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + npt.assert_array_almost_equal_nulp(r1, r2) + + +# Test for EArrays with maindim != 0 +class MaindimTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00_simple(self): + """Checking other dimensions than 0 as main dimension.""" + + shape = list(self.shape) + # Build input arrays + a = np.arange(np.prod(shape), dtype="i4").reshape(shape) + b = a.copy() + c = a.copy() + root = self.h5file.root + shape[self.maindim] = 0 + a1 = self.h5file.create_earray( + root, "a1", atom=tb.Int32Col(), shape=shape + ) + b1 = self.h5file.create_earray( + root, "b1", atom=tb.Int32Col(), shape=shape + ) + c1 = self.h5file.create_earray( + root, "c1", atom=tb.Int32Col(), shape=shape + ) + a1.append(a) + b1.append(b) + c1.append(c) + # The expression + expr = tb.Expr("2 * a1 + b1-c1") + r1 = expr.eval() + r2 = eval("2 * a + b-c") + if common.verbose: + print("Tested shape:", shape) + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test01_out(self): + """Checking other dimensions than 0 as main dimension (out)""" + + shape = list(self.shape) + # Build input arrays + a = np.arange(np.prod(shape), dtype="i4").reshape(shape) + b = a.copy() + c = a.copy() + root = self.h5file.root + shape[self.maindim] = 0 + a1 = self.h5file.create_earray( + root, "a1", atom=tb.Int32Col(), shape=shape + ) + b1 = self.h5file.create_earray( + root, "b1", atom=tb.Int32Col(), shape=shape + ) + c1 = self.h5file.create_earray( + root, "c1", atom=tb.Int32Col(), shape=shape + ) + r1 = self.h5file.create_earray( + root, "r1", atom=tb.Int32Col(), shape=shape + ) + a1.append(a) + b1.append(b) + c1.append(c) + r1.append(c) + # The expression + expr = tb.Expr("2 * a1 + b1-c1") + expr.set_output(r1) + expr.eval() + r2 = eval("2 * a + b-c") + if common.verbose: + print("Tested shape:", shape) + print("Computed expression:", repr(r1[:]), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1[:], r2), + "Evaluate is returning a wrong value.", + ) + + def test02_diff_in_maindims(self): + """Checking different main dimensions in inputs.""" + + shape = list(self.shape) + # Build input arrays + a = np.arange(np.prod(shape), dtype="i4").reshape(shape) + b = a.copy() + c = a.copy() + root = self.h5file.root + shape2 = shape[:] + shape[self.maindim] = 0 + shape2[0] = 0 + a1 = self.h5file.create_earray( + root, "a1", atom=tb.Int32Col(), shape=shape + ) + self.assertEqual(a1.maindim, self.maindim) + b1 = self.h5file.create_earray( + root, "b1", atom=tb.Int32Col(), shape=shape2 + ) + self.assertEqual(b1.maindim, 0) + c1 = self.h5file.create_earray( + root, "c1", atom=tb.Int32Col(), shape=shape + ) + r1 = self.h5file.create_earray( + root, "r1", atom=tb.Int32Col(), shape=shape + ) + a1.append(a) + b1.append(b) + c1.append(c) + r1.append(c) + # The expression + expr = tb.Expr("2 * a1 + b1-c1") + r1 = expr.eval() + r2 = eval("2 * a + b-c") + if common.verbose: + print("Tested shape:", shape) + print("Computed expression:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test03_diff_in_out_maindims(self): + """Checking different maindims in inputs and output.""" + + shape = list(self.shape) + # Build input arrays + a = np.arange(np.prod(shape), dtype="i4").reshape(shape) + b = a.copy() + c = a.copy() + root = self.h5file.root + shape2 = shape[:] + shape[self.maindim] = 0 + shape2[0] = 0 + a1 = self.h5file.create_earray( + root, "a1", atom=tb.Int32Col(), shape=shape + ) + self.assertEqual(a1.maindim, self.maindim) + b1 = self.h5file.create_earray( + root, "b1", atom=tb.Int32Col(), shape=shape + ) + c1 = self.h5file.create_earray( + root, "c1", atom=tb.Int32Col(), shape=shape + ) + r1 = self.h5file.create_earray( + root, "r1", atom=tb.Int32Col(), shape=shape2 + ) + self.assertEqual(r1.maindim, 0) + a1.append(a) + b1.append(b) + c1.append(c) + r1.append(c) + # The expression + expr = tb.Expr("2 * a1 + b1-c1") + expr.set_output(r1) + expr.eval() + r2 = eval("2 * a + b-c") + if common.verbose: + print("Tested shape:", shape) + print("Computed expression:", repr(r1[:]), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1[:], r2), + "Evaluate is returning a wrong value.", + ) + + def test04_diff_in_out_maindims_lengths(self): + """Checking different maindims and lengths in inputs and output.""" + + shape = list(self.shape) + # Build input arrays + a = np.arange(np.prod(shape), dtype="i4").reshape(shape) + b = a.copy() + c = a.copy() + root = self.h5file.root + shape2 = shape[:] + shape[self.maindim] = 0 + shape2[0] = 0 + a1 = self.h5file.create_earray( + root, "a1", atom=tb.Int32Col(), shape=shape + ) + self.assertEqual(a1.maindim, self.maindim) + b1 = self.h5file.create_earray( + root, "b1", atom=tb.Int32Col(), shape=shape + ) + c1 = self.h5file.create_earray( + root, "c1", atom=tb.Int32Col(), shape=shape + ) + r1 = self.h5file.create_earray( + root, "r1", atom=tb.Int32Col(), shape=shape2 + ) + self.assertEqual(r1.maindim, 0) + a1.append(a) + a1.append(a) + b1.append(b) + b1.append(b) + c1.append(c) + c1.append(c) + r1.append(c) # just once so that output is smaller + # The expression + expr = tb.Expr("2 * a1 + b1-c1") + expr.set_output(r1) + # This should raise an error + self.assertRaises(ValueError, expr.eval) + + +class Maindim0(MaindimTestCase): + maindim = 1 + shape = (1, 2) + + +class Maindim1(MaindimTestCase): + maindim = 1 + shape = (2, 3) + + +class Maindim2(MaindimTestCase): + maindim = 1 + shape = (2, 3, 4) + + +class Maindim3(MaindimTestCase): + maindim = 2 + shape = (2, 3, 4) + + +# Test `append` mode flag in `set_output()` +class AppendModeTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test01_append(self): + """Checking append mode in `set_output()`""" + + shape = [3, 2] + # Build input arrays + a = np.arange(np.prod(shape), dtype="i4").reshape(shape) + b = a.copy() + c = a.copy() + shape[1] = 0 + root = self.h5file.root + a1 = self.h5file.create_earray( + root, "a1", atom=tb.Int32Col(), shape=shape + ) + b1 = self.h5file.create_earray( + root, "b1", atom=tb.Int32Col(), shape=shape + ) + c1 = self.h5file.create_earray( + root, "c1", atom=tb.Int32Col(), shape=shape + ) + r1 = self.h5file.create_earray( + root, "r1", atom=tb.Int32Col(), shape=shape + ) + a1.append(a) + b1.append(b) + c1.append(c) + if not self.append: + r1.append(c) + # The expression + expr = tb.Expr("2 * a1 + b1-c1") + expr.set_output(r1, append_mode=self.append) + expr.eval() + r2 = eval("2 * a + b-c") + if common.verbose: + print("Tested shape:", shape) + print("Computed expression:", repr(r1[:]), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1[:], r2), + "Evaluate is returning a wrong value.", + ) + + +class AppendModeTrue(AppendModeTestCase): + append = True + + +class AppendModeFalse(AppendModeTestCase): + append = False + + +# Test for `__iter__()` iterator +class iterTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + shape = list(self.shape) + # Build input arrays + a = np.arange(np.prod(shape), dtype="i4").reshape(shape) + b = a.copy() + c = a.copy() + self.npvars = {"a": a, "b": b, "c": c} + shape[self.maindim] = 0 + root = self.h5file.root + a1 = self.h5file.create_earray( + root, "a1", atom=tb.Int32Col(), shape=shape + ) + b1 = self.h5file.create_earray( + root, "b1", atom=tb.Int32Col(), shape=shape + ) + c1 = self.h5file.create_earray( + root, "c1", atom=tb.Int32Col(), shape=shape + ) + a1.append(a) + b1.append(b) + c1.append(c) + self.vars = {"a": a1, "b": b1, "c": c1} + # The expression + self.sexpr = "2 * a + b-c" + + def test00_iter(self): + """Checking the __iter__ iterator.""" + + expr = tb.Expr(self.sexpr, self.vars) + r1 = np.array([row for row in expr]) + r2 = eval(self.sexpr, self.npvars) + if common.verbose: + print("Tested shape, maindim:", self.shape, self.maindim) + print("Computed expression:", repr(r1[:]), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1[:], r2), + "Evaluate is returning a wrong value.", + ) + + def test01a_sss(self): + """Checking the __iter__ iterator (with ranges, I)""" + + start, stop, step = self.range_[0], None, None + expr = tb.Expr(self.sexpr, self.vars) + expr.set_inputs_range(start, stop, step) + r1 = np.array([row for row in expr]) + npvars = get_sliced_vars2( + self.npvars, start, stop, step, self.shape, self.maindim + ) + r2 = eval(self.sexpr, npvars) + if common.verbose: + print("Tested shape, maindim:", self.shape, self.maindim) + print("Computed expression:", repr(r1[:]), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1[:], r2), + "Evaluate is returning a wrong value.", + ) + + def test01b_sss(self): + """Checking the __iter__ iterator (with ranges, II)""" + + start, stop, step = self.range_[0], self.range_[2], None + expr = tb.Expr(self.sexpr, self.vars) + expr.set_inputs_range(start, stop, step) + r1 = np.array([row for row in expr]) + npvars = get_sliced_vars2( + self.npvars, start, stop, step, self.shape, self.maindim + ) + r2 = eval(self.sexpr, npvars) + if common.verbose: + print("Tested shape, maindim:", self.shape, self.maindim) + print("Computed expression:", repr(r1[:]), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1[:], r2), + "Evaluate is returning a wrong value.", + ) + + def test01c_sss(self): + """Checking the __iter__ iterator (with ranges, III)""" + + start, stop, step = self.range_ + expr = tb.Expr(self.sexpr, self.vars) + expr.set_inputs_range(start, stop, step) + r1 = np.array([row for row in expr]) + npvars = get_sliced_vars2( + self.npvars, start, stop, step, self.shape, self.maindim + ) + r2 = eval(self.sexpr, npvars) + if common.verbose: + print("Tested shape, maindim:", self.shape, self.maindim) + print("Computed expression:", repr(r1[:]), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1[:], r2), + "Evaluate is returning a wrong value.", + ) + + +class iter0(iterTestCase): + maindim = 0 + shape = (0,) + range_ = (1, 2, 1) + + +class iter1(iterTestCase): + maindim = 0 + shape = (3,) + range_ = (1, 2, 1) + + +class iter2(iterTestCase): + maindim = 0 + shape = (3, 2) + range_ = (0, 3, 2) + + +class iter3(iterTestCase): + maindim = 1 + shape = (3, 2) + range_ = (0, 3, 2) + + +class iter4(iterTestCase): + maindim = 2 + shape = (3, 2, 1) + range_ = (1, 3, 2) + + +class iter5(iterTestCase): + maindim = 2 + shape = (1, 2, 5) + range_ = (0, 4, 2) + + +# Test for set_output_range +class setOutputRangeTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00_simple(self): + """Checking the range selection for output.""" + + shape = list(self.shape) + start, stop, step = self.range_ + # Build input arrays + a = np.arange(np.prod(shape), dtype="i4").reshape(shape) + b = a.copy() + r = a.copy() + root = self.h5file.root + a1 = self.h5file.create_array(root, "a1", a) + self.assertIsNotNone(a1) + b1 = self.h5file.create_array(root, "b1", b) + self.assertIsNotNone(b1) + r1 = self.h5file.create_array(root, "r1", r) + # The expression + expr = tb.Expr("a1-b1-1") + expr.set_output(r1) + expr.set_output_range(start, stop, step) + expr.eval() + r2 = eval("a-b-1") + r[start:stop:step] = r2[: len(range(start, stop, step))] + if common.verbose: + print("Tested shape:", shape) + print("Computed expression:", repr(r1[:]), r1.dtype) + print("Should look like:", repr(r), r.dtype) + self.assertTrue( + common.areArraysEqual(r1[:], r), + "Evaluate is returning a wrong value.", + ) + + def test01_maindim(self): + """Checking the range selection for output (maindim > 0)""" + + shape = list(self.shape) + start, stop, step = self.range_ + # Build input arrays + a = np.arange(np.prod(shape), dtype="i4").reshape(shape) + b = a.copy() + r = a.copy() + shape[self.maindim] = 0 + root = self.h5file.root + a1 = self.h5file.create_earray( + root, "a1", atom=tb.Int32Col(), shape=shape + ) + b1 = self.h5file.create_earray( + root, "b1", atom=tb.Int32Col(), shape=shape + ) + r1 = self.h5file.create_earray( + root, "r1", atom=tb.Int32Col(), shape=shape + ) + a1.append(a) + b1.append(b) + r1.append(r) + # The expression + expr = tb.Expr("a1-b1-1") + expr.set_output(r1) + expr.set_output_range(start, stop, step) + expr.eval() + r2 = eval("a-b-1") + lsl = tuple([slice(None)] * self.maindim) + # print "lsl-->", lsl + (slice(start,stop,step),) + lrange = len(range(start, stop, step)) + r.__setitem__( + lsl + (slice(start, stop, step),), + r2.__getitem__(lsl + (slice(0, lrange),)), + ) + if common.verbose: + print("Tested shape:", shape) + print("Computed expression:", repr(r1[:]), r1.dtype) + print("Should look like:", repr(r), r.dtype) + self.assertTrue( + common.areArraysEqual(r1[:], r), + "Evaluate is returning a wrong value.", + ) + + +class setOutputRange0(setOutputRangeTestCase): + maindim = 0 + shape = (10,) + range_ = (0, 1, 2) + + +class setOutputRange1(setOutputRangeTestCase): + maindim = 0 + shape = (10,) + range_ = (0, 10, 2) + + +class setOutputRange2(setOutputRangeTestCase): + maindim = 0 + shape = (10,) + range_ = (1, 10, 2) + + +class setOutputRange3(setOutputRangeTestCase): + maindim = 0 + shape = (10, 1) + range_ = (1, 10, 3) + + +class setOutputRange4(setOutputRangeTestCase): + maindim = 0 + shape = (10, 2) + range_ = (1, 10, 3) + + +class setOutputRange5(setOutputRangeTestCase): + maindim = 0 + shape = (5, 3, 1) + range_ = (1, 5, 1) + + +class setOutputRange6(setOutputRangeTestCase): + maindim = 1 + shape = (2, 5) + range_ = (1, 3, 2) + + +class setOutputRange7(setOutputRangeTestCase): + maindim = 1 + shape = (2, 5, 1) + range_ = (1, 3, 2) + + +class setOutputRange8(setOutputRangeTestCase): + maindim = 2 + shape = (1, 3, 5) + range_ = (1, 5, 2) + + +class setOutputRange9(setOutputRangeTestCase): + maindim = 3 + shape = (1, 3, 4, 5) + range_ = (1, 5, 3) + + +# Test for very large inputs +class VeryLargeInputsTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00_simple(self): + """Checking very large inputs.""" + + shape = self.shape + # Use filters so as to not use too much space + if tb.which_lib_version("blosc") is not None: + filters = tb.Filters(complevel=1, complib="blosc", shuffle=False) + elif tb.which_lib_version("lzo") is not None: + filters = tb.Filters(complevel=1, complib="lzo", shuffle=False) + else: + filters = tb.Filters(complevel=1, shuffle=False) + # Build input arrays + root = self.h5file.root + a = self.h5file.create_carray( + root, + "a", + atom=tb.Float64Atom(dflt=3), + shape=shape, + filters=filters, + ) + self.assertIsNotNone(a) + b = self.h5file.create_carray( + root, + "b", + atom=tb.Float64Atom(dflt=2), + shape=shape, + filters=filters, + ) + self.assertIsNotNone(b) + r1 = self.h5file.create_carray( + root, + "r1", + atom=tb.Float64Atom(dflt=3), + shape=shape, + filters=filters, + ) + # The expression + expr = tb.Expr("a * b-6") # Should give 0 + expr.set_output(r1) + expr.eval() + r1 = r1[-10:] # Get the last ten rows + r2 = np.zeros(10, dtype="float64") + if common.verbose: + print("Tested shape:", shape) + print("Ten last rows:", repr(r1), r1.dtype) + print("Should look like:", repr(r2), r2.dtype) + self.assertTrue( + common.areArraysEqual(r1, r2), + "Evaluate is returning a wrong value.", + ) + + def test01_iter(self): + """Checking very large inputs (__iter__ version)""" + + shape = self.shape + if shape[0] >= 2**24: + # The iterator is much slower, so don't run it for + # extremely large arrays. + if common.verbose: + print("Skipping this *very* long test") + return + # Use filters so as to not use too much space + if tb.which_lib_version("lzo") is not None: + filters = tb.Filters(complevel=1, complib="lzo", shuffle=False) + else: + filters = tb.Filters(complevel=1, shuffle=False) + + # Build input arrays + root = self.h5file.root + a = self.h5file.create_carray( + root, "a", atom=tb.Int32Atom(dflt=1), shape=shape, filters=filters + ) + self.assertIsNotNone(a) + b = self.h5file.create_carray( + root, "b", atom=tb.Int32Atom(dflt=2), shape=shape, filters=filters + ) + self.assertIsNotNone(b) + r1 = self.h5file.create_carray( + root, "r1", atom=tb.Int32Atom(dflt=3), shape=shape, filters=filters + ) + # The expression + expr = tb.Expr("a-b + 1") + r1 = sum(expr) # Should give 0 + if common.verbose: + print("Tested shape:", shape) + print("Cummulated sum:", r1) + print("Should look like:", 0) + self.assertEqual(r1, 0, "Evaluate is returning a wrong value.") + + +# The next can go on regular tests, as it should be light enough +class VeryLargeInputs1(VeryLargeInputsTestCase): + shape = (2**20,) # larger than any internal I/O buffers + + +# The next is only meant for 'heavy' mode as it can take more than 1 minute +# on modern machines +class VeryLargeInputs2(VeryLargeInputsTestCase): + shape = (2**32 + 1,) # check that arrays > 32-bit are supported + + +def suite(): + """Return a test suite consisting of all the test cases in the module.""" + + theSuite = common.unittest.TestSuite() + niter = 1 + # common.heavy = 1 # uncomment this only for testing purposes + + for i in range(niter): + theSuite.addTest(common.make_suite(ExprNumPy)) + theSuite.addTest(common.make_suite(ExprArray)) + theSuite.addTest(common.make_suite(ExprCArray)) + theSuite.addTest(common.make_suite(ExprEArray)) + theSuite.addTest(common.make_suite(ExprColumn)) + theSuite.addTest(common.make_suite(MixedContainers0)) + theSuite.addTest(common.make_suite(MixedContainers1)) + theSuite.addTest(common.make_suite(MixedContainers2)) + theSuite.addTest(common.make_suite(MixedContainers3)) + theSuite.addTest(common.make_suite(UnalignedObject)) + theSuite.addTest(common.make_suite(NonContiguousObject)) + theSuite.addTest(common.make_suite(ExprError)) + theSuite.addTest(common.make_suite(Broadcast0)) + theSuite.addTest(common.make_suite(Broadcast1)) + theSuite.addTest(common.make_suite(Broadcast2)) + theSuite.addTest(common.make_suite(Broadcast3)) + theSuite.addTest(common.make_suite(Broadcast4)) + theSuite.addTest(common.make_suite(Broadcast5)) + theSuite.addTest(common.make_suite(DiffLength0)) + theSuite.addTest(common.make_suite(DiffLength1)) + theSuite.addTest(common.make_suite(DiffLength2)) + theSuite.addTest(common.make_suite(DiffLength3)) + theSuite.addTest(common.make_suite(DiffLength4)) + theSuite.addTest(common.make_suite(TypesTestCase)) + theSuite.addTest(common.make_suite(FunctionsTestCase)) + theSuite.addTest(common.make_suite(Maindim0)) + theSuite.addTest(common.make_suite(Maindim1)) + theSuite.addTest(common.make_suite(Maindim2)) + theSuite.addTest(common.make_suite(Maindim3)) + theSuite.addTest(common.make_suite(AppendModeTrue)) + theSuite.addTest(common.make_suite(AppendModeFalse)) + theSuite.addTest(common.make_suite(iter0)) + theSuite.addTest(common.make_suite(iter1)) + theSuite.addTest(common.make_suite(iter2)) + theSuite.addTest(common.make_suite(iter3)) + theSuite.addTest(common.make_suite(iter4)) + theSuite.addTest(common.make_suite(iter5)) + theSuite.addTest(common.make_suite(setOutputRange0)) + theSuite.addTest(common.make_suite(setOutputRange1)) + theSuite.addTest(common.make_suite(setOutputRange2)) + theSuite.addTest(common.make_suite(setOutputRange3)) + theSuite.addTest(common.make_suite(setOutputRange4)) + theSuite.addTest(common.make_suite(setOutputRange5)) + theSuite.addTest(common.make_suite(setOutputRange6)) + theSuite.addTest(common.make_suite(setOutputRange7)) + theSuite.addTest(common.make_suite(setOutputRange8)) + theSuite.addTest(common.make_suite(setOutputRange9)) + theSuite.addTest(common.make_suite(VeryLargeInputs1)) + if common.heavy: + theSuite.addTest(common.make_suite(VeryLargeInputs2)) + return theSuite + + +if __name__ == "__main__": + import sys + + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_garbage.py b/venv/Lib/site-packages/tables/tests/test_garbage.py new file mode 100644 index 0000000..2e2331a --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_garbage.py @@ -0,0 +1,53 @@ +"""Test module for detecting uncollectable garbage in PyTables. + +This test module *must* be loaded in the last place. It just checks for +the existence of uncollectable garbage in ``gc.garbage`` after running +all the tests. + +""" + +import gc + +from tables.tests import common + + +class GarbageTestCase(common.PyTablesTestCase): + """Test for uncollectable garbage.""" + + def test00(self): + """Checking for uncollectable garbage.""" + + garbageLen = len(gc.garbage) + if garbageLen == 0: + return # success + + if common.verbose: + classCount = {} + # Count uncollected objects for each class. + for obj in gc.garbage: + objClass = obj.__class__.__name__ + if objClass in classCount: + classCount[objClass] += 1 + else: + classCount[objClass] = 1 + incidence = [ + "``%s``: %d" % (cls, cnt) for (cls, cnt) in classCount.items() + ] + print("Class incidence:", ", ".join(incidence)) + self.fail("Possible leak: %d uncollected objects." % garbageLen) + + +def suite(): + """Return a test suite consisting of all the test cases in the module.""" + + theSuite = common.unittest.TestSuite() + theSuite.addTest(common.make_suite(GarbageTestCase)) + return theSuite + + +if __name__ == "__main__": + import sys + + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_hdf5compat.py b/venv/Lib/site-packages/tables/tests/test_hdf5compat.py new file mode 100644 index 0000000..d7f2d7e --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_hdf5compat.py @@ -0,0 +1,445 @@ +"""Test module for compatibility with plain HDF files.""" + +import shutil +import tempfile +from pathlib import Path + +import numpy as np + +import tables as tb +from tables.tests import common + + +class PaddedArrayTestCase(common.TestFileMixin, common.PyTablesTestCase): + """Test for H5T_COMPOUND (Table) datatype with padding. + + Regression test for issue gh-734 + + itemsize.h5 was created with h5py with the array `expectedData` (see below) + in the table `/Test`: + 'A' and 'B' are 4 + 4 bytes, with 8 bytes padding. + + $ h5ls -v itemsize.h5 + Test Dataset {3/3} + Location: 1:800 + Links: 1 + Storage: 48 logical bytes, 48 allocated bytes, 100.00% utilization + Type: struct { + "A" +0 native unsigned int + "B" +4 native unsigned int + } 16 bytes + + """ + + h5fname = common.test_filename("itemsize.h5") + + def test(self): + arr = self.h5file.get_node("/Test") + data = arr.read() + expectedData = np.array( + [(1, 11), (2, 12), (3, 13)], + dtype={ + "names": ["A", "B"], + "formats": [" 2 + table.row["var2"] = i % 2 + table.row["var3"] = i + table.row["var4"] = float(self.nrows - i - 1) + table.row.append() + table.flush() + # Index all entries: + for col in table.colinstances.values(): + indexrows = col.create_index(_blocksizes=small_blocksizes) + if common.verbose: + print("Number of written rows:", self.nrows) + print("Number of indexed rows:", indexrows) + + return + + def test00_flushLastRow(self): + """Checking flushing an Index incrementing only the last row.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test00_flushLastRow..." % self.__class__.__name__ + ) + + # Open the HDF5 file in append mode + self.h5file = tb.open_file(self.h5fname, mode="a") + table = self.h5file.root.table + # Add just 3 rows more + for i in range(3): + table.row["var1"] = str(i).encode("ascii") + table.row.append() + table.flush() # redo the indexes + idxcol = table.cols.var1.index + if common.verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Number of elements per slice:", idxcol.slicesize) + print("Chunk size:", idxcol.sorted.chunksize) + print("Elements in last row:", idxcol.indicesLR[-1]) + + # Do a selection + results = [p["var1"] for p in table.where('var1 == b"1"')] + self.assertEqual(len(results), 2) + self.assertEqual(results, [b"1"] * 2) + + def test00_update(self): + """Checking automatic re-indexing after an update operation.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00_update..." % self.__class__.__name__) + + # Open the HDF5 file in append mode + self.h5file = tb.open_file(self.h5fname, mode="a") + table = self.h5file.root.table + # Modify a couple of columns + for i, row in enumerate(table.where("(var3>1) & (var3<5)")): + row["var1"] = str(i) + row["var3"] = i + row.update() + table.flush() # redo the indexes + idxcol1 = table.cols.var1.index + idxcol3 = table.cols.var3.index + if common.verbose: + print("Dirtyness of var1 col:", idxcol1.dirty) + print("Dirtyness of var3 col:", idxcol3.dirty) + self.assertEqual(idxcol1.dirty, False) + self.assertEqual(idxcol3.dirty, False) + + # Do a couple of selections + results = [p["var1"] for p in table.where('var1 == b"1"')] + self.assertEqual(len(results), 2) + self.assertEqual(results, [b"1"] * 2) + results = [p["var3"] for p in table.where("var3 == 0")] + self.assertEqual(len(results), 2) + self.assertEqual(results, [0] * 2) + + def test01_readIndex(self): + """Checking reading an Index (string flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_readIndex..." % self.__class__.__name__) + + # Open the HDF5 file in read-only mode + self.h5file = tb.open_file(self.h5fname, mode="r") + table = self.h5file.root.table + idxcol = table.cols.var1.index + if common.verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Number of elements per slice:", idxcol.slicesize) + print("Chunk size:", idxcol.sorted.chunksize) + + # Do a selection + results = [p["var1"] for p in table.where('var1 == b"1"')] + self.assertEqual(len(results), 1) + self.assertEqual(results, [b"1"]) + + def test02_readIndex(self): + """Checking reading an Index (bool flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_readIndex..." % self.__class__.__name__) + + # Open the HDF5 file in read-only mode + self.h5file = tb.open_file(self.h5fname, mode="r") + table = self.h5file.root.table + idxcol = table.cols.var2.index + if common.verbose: + print("Rows in table:", table.nrows) + print("Max rows in buf:", table.nrowsinbuf) + print("Number of elements per slice:", idxcol.slicesize) + print("Chunk size:", idxcol.sorted.chunksize) + + # Do a selection + results = [p["var2"] for p in table.where("var2 == True")] + if common.verbose: + print("Selected values:", results) + self.assertEqual(len(results), self.nrows // 2) + self.assertEqual(results, [True] * (self.nrows // 2)) + + def test03_readIndex(self): + """Checking reading an Index (int flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_readIndex..." % self.__class__.__name__) + + # Open the HDF5 file in read-only mode + self.h5file = tb.open_file(self.h5fname, mode="r") + table = self.h5file.root.table + idxcol = table.cols.var3.index + if common.verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Number of elements per slice:", idxcol.slicesize) + print("Chunk size:", idxcol.sorted.chunksize) + + # Do a selection + results = [p["var3"] for p in table.where("(1 500: + tests.append(self.nrows - 500) + for limit in tests: + handle_a = [0, table.where("(var3 < e)", dict(e=limit))] + handle_b = [0, table.where("(var3 < e)", dict(e=limit))] + + try: + while True: + next(handle_b[1]) + handle_b[0] += 1 + except StopIteration: + for _ in handle_a[1]: + handle_a[0] += 1 + for _ in handle_b[1]: + handle_b[0] += 1 + + self.assertEqual(handle_a[0], limit) + self.assertEqual(handle_b[0], limit) + self.assertEqual( + len(list(table.where("(var3 < e)", dict(e=limit)))), limit + ) + + +small_ss = small_blocksizes[2] + + +class BasicReadTestCase(BasicTestCase): + compress = 0 + complib = "zlib" + shuffle = 0 + fletcher32 = 0 + nrows = small_ss + + +class ZlibReadTestCase(BasicTestCase): + compress = 1 + complib = "zlib" + shuffle = 0 + fletcher32 = 0 + nrows = small_ss + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscReadTestCase(BasicTestCase): + compress = 1 + complib = "blosc" + shuffle = 0 + fletcher32 = 0 + nrows = small_ss + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class LZOReadTestCase(BasicTestCase): + compress = 1 + complib = "lzo" + shuffle = 0 + fletcher32 = 0 + nrows = small_ss + + +@common.unittest.skipIf( + not common.bzip2_avail, "BZIP2 compression library not available" +) +class Bzip2ReadTestCase(BasicTestCase): + compress = 1 + complib = "bzip2" + shuffle = 0 + fletcher32 = 0 + nrows = small_ss + + +class ShuffleReadTestCase(BasicTestCase): + compress = 1 + complib = "zlib" + shuffle = 1 + fletcher32 = 0 + nrows = small_ss + + +class Fletcher32ReadTestCase(BasicTestCase): + compress = 1 + complib = "zlib" + shuffle = 0 + fletcher32 = 1 + nrows = small_ss + + +class ShuffleFletcher32ReadTestCase(BasicTestCase): + compress = 1 + complib = "zlib" + shuffle = 1 + fletcher32 = 1 + nrows = small_ss + + +class OneHalfTestCase(BasicTestCase): + nrows = small_ss + small_ss // 2 + + +class UpperBoundTestCase(BasicTestCase): + nrows = small_ss + 1 + + +class LowerBoundTestCase(BasicTestCase): + nrows = small_ss * 2 - 1 + + +class DeepTableIndexTestCase(common.TempFileMixin, common.PyTablesTestCase): + nrows = minRowIndex + + def test01(self): + """Checking the indexing of a table in a 2nd level hierarchy""" + + # Create an instance of an HDF5 Table + group = self.h5file.create_group(self.h5file.root, "agroup") + # Create a table + title = "This is the IndexArray title" + table = self.h5file.create_table( + group, "table", TDescr, title, None, self.nrows + ) + for i in range(self.nrows): + # Fill rows with defaults + table.row.append() + table.flush() + # Index some column + indexrows = table.cols.var1.create_index() + self.assertIsNotNone(indexrows) + idxcol = table.cols.var1.index + # Some sanity checks + self.assertEqual(table.colindexed["var1"], 1) + self.assertIsNotNone(idxcol) + self.assertEqual(idxcol.nelements, self.nrows) + + def test01b(self): + """Checking the indexing of a table in 2nd level + (persistent version)""" + + # Create an instance of an HDF5 Table + group = self.h5file.create_group(self.h5file.root, "agroup") + + # Create a table + title = "This is the IndexArray title" + table = self.h5file.create_table( + group, "table", TDescr, title, None, self.nrows + ) + for i in range(self.nrows): + # Fill rows with defaults + table.row.append() + table.flush() + + # Index some column + indexrows = table.cols.var1.create_index() + self.assertIsNotNone(indexrows) + idxcol = table.cols.var1.index + + # Close and re-open this file + self._reopen(mode="a") + + table = self.h5file.root.agroup.table + idxcol = table.cols.var1.index + # Some sanity checks + self.assertEqual(table.colindexed["var1"], 1) + self.assertIsNotNone(idxcol) + self.assertEqual(idxcol.nelements, self.nrows) + + def test02(self): + """Checking the indexing of a table in a 4th level hierarchy""" + + # Create an instance of an HDF5 Table + group = self.h5file.create_group(self.h5file.root, "agroup") + group = self.h5file.create_group(group, "agroup") + group = self.h5file.create_group(group, "agroup") + + # Create a table + title = "This is the IndexArray title" + table = self.h5file.create_table( + group, "table", TDescr, title, None, self.nrows + ) + for i in range(self.nrows): + # Fill rows with defaults + table.row.append() + table.flush() + + # Index some column + indexrows = table.cols.var1.create_index() + self.assertIsNotNone(indexrows) + idxcol = table.cols.var1.index + + # Some sanity checks + self.assertEqual(table.colindexed["var1"], 1) + self.assertIsNotNone(idxcol) + self.assertEqual(idxcol.nelements, self.nrows) + + def test02b(self): + """Checking the indexing of a table in a 4th level + (persistent version)""" + + # Create an instance of an HDF5 Table + group = self.h5file.create_group(self.h5file.root, "agroup") + group = self.h5file.create_group(group, "agroup") + group = self.h5file.create_group(group, "agroup") + + # Create a table + title = "This is the IndexArray title" + table = self.h5file.create_table( + group, "table", TDescr, title, None, self.nrows + ) + for i in range(self.nrows): + # Fill rows with defaults + table.row.append() + table.flush() + + # Index some column + indexrows = table.cols.var1.create_index() + self.assertIsNotNone(indexrows) + idxcol = table.cols.var1.index + + # Close and re-open this file + self._reopen(mode="a") + + table = self.h5file.root.agroup.agroup.agroup.table + idxcol = table.cols.var1.index + + # Some sanity checks + self.assertEqual(table.colindexed["var1"], 1) + self.assertIsNotNone(idxcol) + self.assertEqual(idxcol.nelements, self.nrows) + + def test03(self): + """Checking the indexing of a table in a 100th level hierarchy""" + + # Create an instance of an HDF5 Table + group = self.h5file.root + for i in range(100): + group = self.h5file.create_group(group, "agroup") + + # Create a table + title = "This is the IndexArray title" + table = self.h5file.create_table( + group, "table", TDescr, title, None, self.nrows + ) + for i in range(self.nrows): + # Fill rows with defaults + table.row.append() + table.flush() + + # Index some column + indexrows = table.cols.var1.create_index() + self.assertIsNotNone(indexrows) + idxcol = table.cols.var1.index + + # Some sanity checks + self.assertEqual(table.colindexed["var1"], 1) + self.assertIsNotNone(idxcol) + self.assertEqual(idxcol.nelements, self.nrows) + + +class IndexProps: + def __init__( + self, + auto=tb.index.default_auto_index, + filters=tb.index.default_index_filters, + ): + self.auto = auto + self.filters = filters + + +DefaultProps = IndexProps() +NoAutoProps = IndexProps(auto=False) +ChangeFiltersProps = IndexProps( + filters=tb.Filters( + complevel=6, complib="zlib", shuffle=False, fletcher32=False + ) +) + + +class AutomaticIndexingTestCase(common.TempFileMixin, common.PyTablesTestCase): + reopen = 1 + iprops = NoAutoProps + colsToIndex = ["var1", "var2", "var3"] + small_blocksizes = (16, 8, 4, 2) + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + title = "This is the IndexArray title" + root = self.h5file.root + + # Make the chunkshape smaller or equal than small_blocksizes[-1] + chunkshape = (2,) + self.table = self.h5file.create_table( + root, + "table", + TDescr, + title, + None, + self.nrows, + chunkshape=chunkshape, + ) + self.table.autoindex = self.iprops.auto + for colname in self.colsToIndex: + self.table.colinstances[colname].create_index( + _blocksizes=self.small_blocksizes + ) + for i in range(self.nrows): + # Fill rows with defaults + self.table.row.append() + self.table.flush() + if self.reopen: + self._reopen(mode="a") + self.table = self.h5file.root.table + + def test01_attrs(self): + """Checking indexing attributes (part1)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_attrs..." % self.__class__.__name__) + + table = self.table + if self.iprops is DefaultProps: + self.assertEqual(table.indexed, 0) + else: + self.assertEqual(table.indexed, 1) + if self.iprops is DefaultProps: + self.assertEqual(table.colindexed["var1"], 0) + self.assertIsNone(table.cols.var1.index) + self.assertEqual(table.colindexed["var2"], 0) + self.assertIsNone(table.cols.var2.index) + self.assertEqual(table.colindexed["var3"], 0) + self.assertIsNone(table.cols.var3.index) + self.assertEqual(table.colindexed["var4"], 0) + self.assertIsNone(table.cols.var4.index) + else: + # Check that the var1, var2 and var3 (and only these) + # has been indexed + self.assertEqual(table.colindexed["var1"], 1) + self.assertIsNotNone(table.cols.var1.index) + self.assertEqual(table.colindexed["var2"], 1) + self.assertIsNotNone(table.cols.var2.index) + self.assertEqual(table.colindexed["var3"], 1) + self.assertIsNotNone(table.cols.var3.index) + self.assertEqual(table.colindexed["var4"], 0) + self.assertIsNone(table.cols.var4.index) + + def test02_attrs(self): + """Checking indexing attributes (part2)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_attrs..." % self.__class__.__name__) + + table = self.table + + # Check the policy parameters + if common.verbose: + if table.indexed: + print("index props:", table.autoindex) + else: + print("Table is not indexed") + + # Check non-default values for index saving policy + if self.iprops is NoAutoProps: + self.assertFalse(table.autoindex) + elif self.iprops is ChangeFiltersProps: + self.assertTrue(table.autoindex) + + # Check Index() objects exists and are properly placed + if self.iprops is DefaultProps: + self.assertEqual(table.cols.var1.index, None) + self.assertEqual(table.cols.var2.index, None) + self.assertEqual(table.cols.var3.index, None) + self.assertEqual(table.cols.var4.index, None) + else: + self.assertIsInstance(table.cols.var1.index, tb.index.Index) + self.assertIsInstance(table.cols.var2.index, tb.index.Index) + self.assertIsInstance(table.cols.var3.index, tb.index.Index) + self.assertEqual(table.cols.var4.index, None) + + def test03_counters(self): + """Checking indexing counters""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_counters..." % self.__class__.__name__) + table = self.table + + # Check the counters for indexes + if common.verbose: + if table.indexed: + print("indexedrows:", table._indexedrows) + print("unsavedindexedrows:", table._unsaved_indexedrows) + index = table.cols.var1.index + print("table rows:", table.nrows) + print("computed indexed rows:", index.nrows * index.slicesize) + else: + print("Table is not indexed") + if self.iprops is not DefaultProps: + index = table.cols.var1.index + indexedrows = index.nelements + self.assertEqual(table._indexedrows, indexedrows) + indexedrows = index.nelements + self.assertEqual( + table._unsaved_indexedrows, self.nrows - indexedrows + ) + + def test04_noauto(self): + """Checking indexing counters (non-automatic mode)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_noauto..." % self.__class__.__name__) + table = self.table + + # Force a sync in indexes + table.flush_rows_to_index() + + # Check the counters for indexes + if common.verbose: + if table.indexed: + print("indexedrows:", table._indexedrows) + print("unsavedindexedrows:", table._unsaved_indexedrows) + index = table.cols.var1.index + print("computed indexed rows:", index.nelements) + else: + print("Table is not indexed") + + # No unindexated rows should remain + index = table.cols.var1.index + if self.iprops is DefaultProps: + self.assertIsNone(index) + else: + indexedrows = index.nelements + self.assertEqual(table._indexedrows, index.nelements) + self.assertEqual( + table._unsaved_indexedrows, self.nrows - indexedrows + ) + + # Check non-default values for index saving policy + if self.iprops is NoAutoProps: + self.assertFalse(table.autoindex) + elif self.iprops is ChangeFiltersProps: + self.assertTrue(table.autoindex) + + def test05_icounters(self): + """Checking indexing counters (remove_rows)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05_icounters..." % self.__class__.__name__) + table = self.table + + # Force a sync in indexes + table.flush_rows_to_index() + + # Non indexated rows should remain here + if self.iprops is not DefaultProps: + indexedrows = table._indexedrows + unsavedindexedrows = table._unsaved_indexedrows + + # Now, remove some rows: + table.remove_rows(2, 4) + if self.reopen: + self._reopen(mode="a") + table = self.h5file.root.table + + # Check the counters for indexes + if common.verbose: + if table.indexed: + print("indexedrows:", table._indexedrows) + print("original indexedrows:", indexedrows) + print("unsavedindexedrows:", table._unsaved_indexedrows) + print("original unsavedindexedrows:", unsavedindexedrows) + # index = table.cols.var1.index + print("index dirty:", table.cols.var1.index.dirty) + else: + print("Table is not indexed") + + # Check the counters + self.assertEqual(table.nrows, self.nrows - 2) + if self.iprops is NoAutoProps: + self.assertTrue(table.cols.var1.index.dirty) + + # Check non-default values for index saving policy + if self.iprops is NoAutoProps: + self.assertFalse(table.autoindex) + elif self.iprops is ChangeFiltersProps: + self.assertTrue(table.autoindex) + + def test06_dirty(self): + """Checking dirty flags (remove_rows action)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06_dirty..." % self.__class__.__name__) + table = self.table + + # Force a sync in indexes + table.flush_rows_to_index() + + # Now, remove some rows: + table.remove_rows(3, 5) + if self.reopen: + self._reopen(mode="a") + table = self.h5file.root.table + + # Check the dirty flag for indexes + if common.verbose: + print("auto flag:", table.autoindex) + for colname in table.colnames: + if table.cols._f_col(colname).index: + print( + "dirty flag col %s: %s" + % (colname, table.cols._f_col(colname).index.dirty) + ) + # Check the flags + for colname in table.colnames: + if table.cols._f_col(colname).index: + if not table.autoindex: + self.assertEqual( + table.cols._f_col(colname).index.dirty, True + ) + else: + self.assertEqual( + table.cols._f_col(colname).index.dirty, False + ) + + def test07_noauto(self): + """Checking indexing counters (modify_rows, no-auto mode)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test07_noauto..." % self.__class__.__name__) + table = self.table + + # Force a sync in indexes + table.flush_rows_to_index() + + # No unindexated rows should remain here + if self.iprops is not DefaultProps: + indexedrows = table._indexedrows + unsavedindexedrows = table._unsaved_indexedrows + + # Now, modify just one row: + table.modify_rows(3, None, 1, [("asa", 0, 3, 3.1)]) + if self.reopen: + self._reopen(mode="a") + table = self.h5file.root.table + + # Check the counters for indexes + if common.verbose: + if table.indexed: + print("indexedrows:", table._indexedrows) + print("original indexedrows:", indexedrows) + print("unsavedindexedrows:", table._unsaved_indexedrows) + print("original unsavedindexedrows:", unsavedindexedrows) + index = table.cols.var1.index + print("computed indexed rows:", index.nelements) + else: + print("Table is not indexed") + + # Check the counters + self.assertEqual(table.nrows, self.nrows) + if self.iprops is NoAutoProps: + self.assertTrue(table.cols.var1.index.dirty) + + # Check the dirty flag for indexes + if common.verbose: + for colname in table.colnames: + if table.cols._f_col(colname).index: + print( + "dirty flag col %s: %s" + % (colname, table.cols._f_col(colname).index.dirty) + ) + for colname in table.colnames: + if table.cols._f_col(colname).index: + if not table.autoindex: + self.assertEqual( + table.cols._f_col(colname).index.dirty, True + ) + else: + self.assertEqual( + table.cols._f_col(colname).index.dirty, False + ) + + def test07b_noauto(self): + """Checking indexing queries (modify in iterator, no-auto mode)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test07b_noauto..." % self.__class__.__name__) + table = self.table + + # Force a sync in indexes + table.flush_rows_to_index() + + # Do a query that uses indexes + res = [row.nrow for row in table.where("(var2 == True) & (var3 > 0)")] + + # Now, modify just one row: + for row in table: + if row.nrow == 3: + row["var1"] = "asa" + row["var2"] = True + row["var3"] = 3 + row["var4"] = 3.1 + row.update() + table.flush() + if self.reopen: + self._reopen(mode="a") + table = self.h5file.root.table + + # Do a query that uses indexes + resq = [row.nrow for row in table.where("(var2 == True) & (var3 > 0)")] + res_ = res + [3] + if common.verbose: + print("AutoIndex?:", table.autoindex) + print("Query results (original):", res) + print("Query results (after modifying table):", resq) + print("Should look like:", res_) + self.assertEqual(res_, resq) + + def test07c_noauto(self): + """Checking indexing queries (append, no-auto mode)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test07c_noauto..." % self.__class__.__name__) + table = self.table + + # Force a sync in indexes + table.flush_rows_to_index() + + # Do a query that uses indexes + res = [row.nrow for row in table.where("(var2 == True) & (var3 > 0)")] + + # Now, append three rows + table.append([("asa", True, 1, 3.1)]) + table.append([("asb", True, 2, 3.1)]) + table.append([("asc", True, 3, 3.1)]) + table.flush() + if self.reopen: + self._reopen(mode="a") + table = self.h5file.root.table + + # Do a query that uses indexes + resq = [row.nrow for row in table.where("(var2 == True) & (var3 > 0)")] + res_ = res + [table.nrows - 3, table.nrows - 2, table.nrows - 1] + if common.verbose: + print("AutoIndex?:", table.autoindex) + print("Query results (original):", res) + print("Query results (after modifying table):", resq) + print("Should look like:", res_) + self.assertEqual(res_, resq) + + def test08_dirty(self): + """Checking dirty flags (modify_columns)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08_dirty..." % self.__class__.__name__) + table = self.table + + # Force a sync in indexes + table.flush_rows_to_index() + + # Non indexed rows should remain here + if self.iprops is not DefaultProps: + indexedrows = table._indexedrows + self.assertIsNotNone(indexedrows) + unsavedindexedrows = table._unsaved_indexedrows + self.assertIsNotNone(unsavedindexedrows) + + # Now, modify a couple of rows: + table.modify_columns( + 1, columns=[["asa", "asb"], [1.0, 2.0]], names=["var1", "var4"] + ) + if self.reopen: + self._reopen(mode="a") + table = self.h5file.root.table + + # Check the counters + self.assertEqual(table.nrows, self.nrows) + if self.iprops is NoAutoProps: + self.assertTrue(table.cols.var1.index.dirty) + + # Check the dirty flag for indexes + if common.verbose: + for colname in table.colnames: + if table.cols._f_col(colname).index: + print( + "dirty flag col %s: %s" + % (colname, table.cols._f_col(colname).index.dirty) + ) + for colname in table.colnames: + if table.cols._f_col(colname).index: + if not table.autoindex: + if colname in ["var1"]: + self.assertEqual( + table.cols._f_col(colname).index.dirty, True + ) + else: + self.assertEqual( + table.cols._f_col(colname).index.dirty, False + ) + else: + self.assertEqual( + table.cols._f_col(colname).index.dirty, False + ) + + def test09a_propIndex(self): + """Checking propagate Index feature in Table.copy() (attrs)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09a_propIndex..." % self.__class__.__name__) + table = self.table + + # Don't force a sync in indexes + # table.flush_rows_to_index() + # Non indexed rows should remain here + if self.iprops is not DefaultProps: + indexedrows = table._indexedrows + self.assertIsNotNone(indexedrows) + unsavedindexedrows = table._unsaved_indexedrows + self.assertIsNotNone(unsavedindexedrows) + + # Now, remove some rows to make columns dirty + # table.remove_rows(3,5) + # Copy a Table to another location + table2 = table.copy("/", "table2", propindexes=True) + if self.reopen: + self._reopen(mode="a") + table = self.h5file.root.table + table2 = self.h5file.root.table2 + + index1 = table.cols.var1.index + index2 = table2.cols.var1.index + if common.verbose: + print("Copied index:", index2) + print("Original index:", index1) + if index1: + print("Elements in copied index:", index2.nelements) + print("Elements in original index:", index1.nelements) + + # Check the counters + self.assertEqual(table.nrows, table2.nrows) + if table.indexed: + self.assertTrue(table2.indexed) + if self.iprops is DefaultProps: + # No index: the index should not exist + self.assertIsNone(index1) + self.assertIsNone(index2) + elif self.iprops is NoAutoProps: + self.assertIsNotNone(index2) + + # Check the dirty flag for indexes + if common.verbose: + for colname in table2.colnames: + if table2.cols._f_col(colname).index: + print( + "dirty flag col %s: %s" + % (colname, table2.cols._f_col(colname).index.dirty) + ) + for colname in table2.colnames: + if table2.cols._f_col(colname).index: + self.assertEqual( + table2.cols._f_col(colname).index.dirty, False + ) + + def test09b_propIndex(self): + """Checking that propindexes=False works""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09b_propIndex..." % self.__class__.__name__) + table = self.table + + # Don't force a sync in indexes + # table.flush_rows_to_index() + # Non indexed rows should remain here + if self.iprops is not DefaultProps: + indexedrows = table._indexedrows + self.assertIsNotNone(indexedrows) + unsavedindexedrows = table._unsaved_indexedrows + self.assertIsNotNone(unsavedindexedrows) + + # Now, remove some rows to make columns dirty + # table.remove_rows(3,5) + # Copy a Table to another location + table2 = table.copy("/", "table2", propindexes=False) + if self.reopen: + self._reopen(mode="a") + table = self.h5file.root.table + table2 = self.h5file.root.table2 + + if common.verbose: + print("autoindex?:", self.iprops.auto) + print("Copied index indexed?:", table2.cols.var1.is_indexed) + print("Original index indexed?:", table.cols.var1.is_indexed) + if self.iprops is DefaultProps: + # No index: the index should not exist + self.assertFalse(table2.cols.var1.is_indexed) + self.assertFalse(table.cols.var1.is_indexed) + elif self.iprops is NoAutoProps: + self.assertFalse(table2.cols.var1.is_indexed) + self.assertTrue(table.cols.var1.is_indexed) + + def test10_propIndex(self): + """Checking propagate Index feature in Table.copy() (values)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test10_propIndex..." % self.__class__.__name__) + table = self.table + + # Don't force a sync in indexes + # table.flush_rows_to_index() + # Non indexed rows should remain here + if self.iprops is not DefaultProps: + indexedrows = table._indexedrows + self.assertIsNotNone(indexedrows) + unsavedindexedrows = table._unsaved_indexedrows + self.assertIsNotNone(unsavedindexedrows) + + # Now, remove some rows to make columns dirty + # table.remove_rows(3,5) + # Copy a Table to another location + table2 = table.copy("/", "table2", propindexes=True) + if self.reopen: + self._reopen(mode="a") + table = self.h5file.root.table + table2 = self.h5file.root.table2 + + index1 = table.cols.var3.index + index2 = table2.cols.var3.index + if common.verbose: + print("Copied index:", index2) + print("Original index:", index1) + if index1: + print("Elements in copied index:", index2.nelements) + print("Elements in original index:", index1.nelements) + + def test11_propIndex(self): + """Checking propagate Index feature in Table.copy() (dirty flags)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test11_propIndex..." % self.__class__.__name__) + table = self.table + + # Force a sync in indexes + table.flush_rows_to_index() + + # Non indexed rows should remain here + if self.iprops is not DefaultProps: + indexedrows = table._indexedrows + self.assertIsNotNone(indexedrows) + unsavedindexedrows = table._unsaved_indexedrows + self.assertIsNotNone(unsavedindexedrows) + + # Now, modify an indexed column and an unindexed one + # to make the "var1" dirty + table.modify_columns( + 1, columns=[["asa", "asb"], [1.0, 2.0]], names=["var1", "var4"] + ) + + # Copy a Table to another location + table2 = table.copy("/", "table2", propindexes=True) + if self.reopen: + self._reopen(mode="a") + table = self.h5file.root.table + table2 = self.h5file.root.table2 + + index1 = table.cols.var1.index + index2 = table2.cols.var1.index + if common.verbose: + print("Copied index:", index2) + print("Original index:", index1) + if index1: + print("Elements in copied index:", index2.nelements) + print("Elements in original index:", index1.nelements) + + # Check the dirty flag for indexes + if common.verbose: + for colname in table2.colnames: + if table2.cols._f_col(colname).index: + print( + "dirty flag col %s: %s" + % (colname, table2.cols._f_col(colname).index.dirty) + ) + for colname in table2.colnames: + if table2.cols._f_col(colname).index: + if table2.autoindex: + # All the destination columns should be non-dirty because + # the copy removes the dirty state and puts the + # index in a sane state + self.assertEqual( + table2.cols._f_col(colname).index.dirty, False + ) + + +# minRowIndex = 10000 # just if one wants more indexed rows to be checked +class AI1TestCase(AutomaticIndexingTestCase): + # nrows = 10002 + nrows = 102 + reopen = 0 + iprops = NoAutoProps + colsToIndex = ["var1", "var2", "var3"] + + +class AI2TestCase(AutomaticIndexingTestCase): + # nrows = 10002 + nrows = 102 + reopen = 1 + iprops = NoAutoProps + colsToIndex = ["var1", "var2", "var3"] + + +class AI4bTestCase(AutomaticIndexingTestCase): + # nrows = 10012 + nrows = 112 + reopen = 1 + iprops = NoAutoProps + colsToIndex = ["var1", "var2", "var3"] + + +class AI5TestCase(AutomaticIndexingTestCase): + sbs, bs, ss, cs = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + nrows = ss * 11 - 1 + reopen = 0 + iprops = NoAutoProps + colsToIndex = ["var1", "var2", "var3"] + + +class AI6TestCase(AutomaticIndexingTestCase): + sbs, bs, ss, cs = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + nrows = ss * 21 + 1 + reopen = 1 + iprops = NoAutoProps + colsToIndex = ["var1", "var2", "var3"] + + +class AI7TestCase(AutomaticIndexingTestCase): + sbs, bs, ss, cs = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + nrows = ss * 12 - 1 + # nrows = ss * 1-1 # faster test + reopen = 0 + iprops = NoAutoProps + colsToIndex = ["var1", "var2", "var3"] + + +class AI8TestCase(AutomaticIndexingTestCase): + sbs, bs, ss, cs = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + nrows = ss * 15 + 100 + # nrows = ss * 1 + 100 # faster test + reopen = 1 + iprops = NoAutoProps + colsToIndex = ["var1", "var2", "var3"] + + +class AI9TestCase(AutomaticIndexingTestCase): + sbs, bs, ss, cs = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + nrows = ss + reopen = 0 + iprops = DefaultProps + colsToIndex = [] + + +class AI10TestCase(AutomaticIndexingTestCase): + # nrows = 10002 + nrows = 102 + reopen = 1 + iprops = DefaultProps + colsToIndex = [] + + +class AI11TestCase(AutomaticIndexingTestCase): + # nrows = 10002 + nrows = 102 + reopen = 0 + iprops = ChangeFiltersProps + colsToIndex = ["var1", "var2", "var3"] + + +class AI12TestCase(AutomaticIndexingTestCase): + # nrows = 10002 + nrows = 102 + reopen = 0 + iprops = ChangeFiltersProps + colsToIndex = ["var1", "var2", "var3"] + + +class ManyNodesTestCase(common.TempFileMixin, common.PyTablesTestCase): + opem_kwargs = dict(node_cache_slots=64) + + def test00(self): + """Indexing many nodes in one single session (based on bug #26)""" + + IdxRecord = { + "f0": tb.Int8Col(), + "f1": tb.Int8Col(), + "f2": tb.Int8Col(), + } + + for qn in range(5): + for sn in range(5): + qchr = "chr" + str(qn) + name = "chr" + str(sn) + path = "/at/%s/pt" % (qchr) + table = self.h5file.create_table( + path, name, IdxRecord, createparents=1 + ) + table.cols.f0.create_index() + table.cols.f1.create_index() + table.cols.f2.create_index() + table.row.append() + table.flush() + + +class IndexPropsChangeTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test case for changing index properties in a table.""" + + class MyDescription(tb.IsDescription): + icol = tb.IntCol() + + oldIndexProps = IndexProps() + newIndexProps = IndexProps(auto=False, filters=tb.Filters(complevel=9)) + + def setUp(self): + super().setUp() + + table = self.h5file.create_table("/", "test", self.MyDescription) + table.autoindex = self.oldIndexProps.auto + row = table.row + for i in range(100): + row["icol"] = i % 25 + row.append() + table.flush() + self.table = table + + def test_attributes(self): + """Storing index properties as table attributes.""" + for refprops in [self.oldIndexProps, self.newIndexProps]: + self.assertEqual(self.table.autoindex, refprops.auto) + self.table.autoindex = self.newIndexProps.auto + + def test_copyattrs(self): + """Copying index properties attributes.""" + oldtable = self.table + newtable = oldtable.copy("/", "test2") + self.assertEqual(oldtable.autoindex, newtable.autoindex) + + +class IndexFiltersTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test case for setting index filters.""" + + def setUp(self): + super().setUp() + description = {"icol": tb.IntCol()} + self.table = self.h5file.create_table("/", "test", description) + + def test_createIndex(self): + """Checking input parameters in new indexes.""" + # Different from default. + argfilters = copy.copy(tb.index.default_index_filters) + argfilters.shuffle = not tb.index.default_index_filters.shuffle + + # Different both from default and the previous one. + idxfilters = copy.copy(tb.index.default_index_filters) + idxfilters.shuffle = not tb.index.default_index_filters.shuffle + idxfilters.fletcher32 = not tb.index.default_index_filters.fletcher32 + + icol = self.table.cols.icol + + # First create + icol.create_index(kind="ultralight", optlevel=4) + self.assertEqual(icol.index.kind, "ultralight") + self.assertEqual(icol.index.optlevel, 4) + self.assertEqual(icol.index.filters, tb.index.default_index_filters) + icol.remove_index() + + # Second create + icol.create_index(kind="medium", optlevel=3, filters=argfilters) + self.assertEqual(icol.index.kind, "medium") + self.assertEqual(icol.index.optlevel, 3) + self.assertEqual(icol.index.filters, argfilters) + icol.remove_index() + + def test_reindex(self): + """Checking input parameters in recomputed indexes.""" + icol = self.table.cols.icol + icol.create_index( + kind="full", optlevel=5, filters=tb.Filters(complevel=3) + ) + kind = icol.index.kind + optlevel = icol.index.optlevel + filters = icol.index.filters + icol.reindex() + ni = icol.index + if common.verbose: + print(f"Old parameters: {kind}, {optlevel}, {filters}") + print( + "New parameters: {}, {}, {}".format( + ni.kind, ni.optlevel, ni.filters + ) + ) + self.assertEqual(ni.kind, kind) + self.assertEqual(ni.optlevel, optlevel) + self.assertEqual(ni.filters, filters) + + +class OldIndexTestCase(common.TestFileMixin, common.PyTablesTestCase): + h5fname = common.test_filename("idx-std-1.x.h5") + + def test1_x(self): + """Check that files with 1.x indexes are recognized and warned.""" + + self.assertWarns( + tb.exceptions.OldIndexWarning, self.h5file.get_node, "/table" + ) + + +# Sensible parameters for indexing with small blocksizes +small_blocksizes = (512, 128, 32, 8) + + +class CompletelySortedIndexTestCase( + common.TempFileMixin, common.PyTablesTestCase +): + """Test case for testing a complete sort in a table.""" + + nrows = 100 + nrowsinbuf = 11 + + class MyDescription(tb.IsDescription): + rcol = tb.IntCol(pos=1) + icol = tb.IntCol(pos=2) + + def setUp(self): + super().setUp() + table = self.h5file.create_table("/", "table", self.MyDescription) + row = table.row + nrows = self.nrows + for i in range(nrows): + row["rcol"] = i + row["icol"] = nrows - i + row.append() + table.flush() + self.table = table + self.icol = self.table.cols.icol + # A full index with maximum optlevel should always be completely sorted + self.icol.create_csindex(_blocksizes=small_blocksizes) + + def test00_isCompletelySortedIndex(self): + """Testing the Column.is_csi property.""" + + icol = self.icol + self.assertEqual(icol.index.is_csi, True) + icol.remove_index() + # Other kinds than full, should never return a CSI + icol.create_index(kind="medium", optlevel=9) + self.assertEqual(icol.index.is_csi, False) + icol.remove_index() + # As the table is small, lesser optlevels should be able to + # create a completely sorted index too. + icol.create_index(kind="full", optlevel=6) + self.assertEqual(icol.index.is_csi, True) + # Checking a CSI in a sorted copy + self.table.copy("/", "table2", sortby="icol", checkCSI=True) + self.assertEqual(icol.index.is_csi, True) + + def test01_readSorted1(self): + """Testing the Index.read_sorted() method with no arguments.""" + + icol = self.icol + sortedcol = np.sort(icol[:]) + sortedcol2 = icol.index.read_sorted() + if common.verbose: + print("Original sorted column:", sortedcol) + print("The values from the index:", sortedcol2) + self.assertTrue(common.allequal(sortedcol, sortedcol2)) + + def test01_readSorted2(self): + """Testing the Index.read_sorted() method with arguments (I).""" + + icol = self.icol + sortedcol = np.sort(icol[:])[30:55] + sortedcol2 = icol.index.read_sorted(30, 55) + if common.verbose: + print("Original sorted column:", sortedcol) + print("The values from the index:", sortedcol2) + self.assertTrue(common.allequal(sortedcol, sortedcol2)) + + def test01_readSorted3(self): + """Testing the Index.read_sorted() method with arguments (II).""" + + icol = self.icol + sortedcol = np.sort(icol[:])[33:97] + sortedcol2 = icol.index.read_sorted(33, 97) + if common.verbose: + print("Original sorted column:", sortedcol) + print("The values from the index:", sortedcol2) + self.assertTrue(common.allequal(sortedcol, sortedcol2)) + + def test02_readIndices1(self): + """Testing the Index.read_indices() method with no arguments.""" + + icol = self.icol + indicescol = np.argsort(icol[:]).astype("uint64") + indicescol2 = icol.index.read_indices() + if common.verbose: + print("Original indices column:", indicescol) + print("The values from the index:", indicescol2) + self.assertTrue(common.allequal(indicescol, indicescol2)) + + def test02_readIndices2(self): + """Testing the Index.read_indices() method with arguments (I).""" + + icol = self.icol + indicescol = np.argsort(icol[:])[30:55].astype("uint64") + indicescol2 = icol.index.read_indices(30, 55) + if common.verbose: + print("Original indices column:", indicescol) + print("The values from the index:", indicescol2) + self.assertTrue(common.allequal(indicescol, indicescol2)) + + def test02_readIndices3(self): + """Testing the Index.read_indices() method with arguments (II).""" + + icol = self.icol + indicescol = np.argsort(icol[:])[33:97].astype("uint64") + indicescol2 = icol.index.read_indices(33, 97) + if common.verbose: + print("Original indices column:", indicescol) + print("The values from the index:", indicescol2) + self.assertTrue(common.allequal(indicescol, indicescol2)) + + def test02_readIndices4(self): + """Testing the Index.read_indices() method with arguments (III).""" + + icol = self.icol + indicescol = np.argsort(icol[:])[33:97:2].astype("uint64") + indicescol2 = icol.index.read_indices(33, 97, 2) + if common.verbose: + print("Original indices column:", indicescol) + print("The values from the index:", indicescol2) + self.assertTrue(common.allequal(indicescol, indicescol2)) + + def test02_readIndices5(self): + """Testing the Index.read_indices() method with arguments (IV).""" + + icol = self.icol + indicescol = np.argsort(icol[:])[33:55:5].astype("uint64") + indicescol2 = icol.index.read_indices(33, 55, 5) + if common.verbose: + print("Original indices column:", indicescol) + print("The values from the index:", indicescol2) + self.assertTrue(common.allequal(indicescol, indicescol2)) + + def test02_readIndices6(self): + """Testing the Index.read_indices() method with step only.""" + + icol = self.icol + indicescol = np.argsort(icol[:])[::3].astype("uint64") + indicescol2 = icol.index.read_indices(step=3) + if common.verbose: + print("Original indices column:", indicescol) + print("The values from the index:", indicescol2) + self.assertTrue(common.allequal(indicescol, indicescol2)) + + def test03_getitem1(self): + """Testing the Index.__getitem__() method with no arguments.""" + + icol = self.icol + indicescol = np.argsort(icol[:]).astype("uint64") + indicescol2 = icol.index[:] + if common.verbose: + print("Original indices column:", indicescol) + print("The values from the index:", indicescol2) + self.assertTrue(common.allequal(indicescol, indicescol2)) + + def test03_getitem2(self): + """Testing the Index.__getitem__() method with start.""" + + icol = self.icol + indicescol = np.argsort(icol[:])[31].astype("uint64") + indicescol2 = icol.index[31] + if common.verbose: + print("Original indices column:", indicescol) + print("The values from the index:", indicescol2) + self.assertTrue(common.allequal(indicescol, indicescol2)) + + def test03_getitem3(self): + """Testing the Index.__getitem__() method with start, stop.""" + + icol = self.icol + indicescol = np.argsort(icol[:])[2:16].astype("uint64") + indicescol2 = icol.index[2:16] + if common.verbose: + print("Original indices column:", indicescol) + print("The values from the index:", indicescol2) + self.assertTrue(common.allequal(indicescol, indicescol2)) + + def test04_itersorted1(self): + """Testing the Table.itersorted() method with no arguments.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol") + sortedtable2 = np.array( + [row.fetch_all_fields() for row in table.itersorted("icol")], + dtype=table._v_dtype, + ) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from the iterator:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test04_itersorted2(self): + """Testing the Table.itersorted() method with a start.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol")[15:] + sortedtable2 = np.array( + [ + row.fetch_all_fields() + for row in table.itersorted("icol", start=15) + ], + dtype=table._v_dtype, + ) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from the iterator:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test04_itersorted3(self): + """Testing the Table.itersorted() method with a stop.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol")[:20] + sortedtable2 = np.array( + [ + row.fetch_all_fields() + for row in table.itersorted("icol", stop=20) + ], + dtype=table._v_dtype, + ) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from the iterator:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test04_itersorted4(self): + """Testing the Table.itersorted() method with a start and stop.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol")[15:20] + sortedtable2 = np.array( + [ + row.fetch_all_fields() + for row in table.itersorted("icol", start=15, stop=20) + ], + dtype=table._v_dtype, + ) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from the iterator:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test04_itersorted5(self): + """Testing the Table.itersorted() method with a start, stop and + step.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol")[15:45:4] + sortedtable2 = np.array( + [ + row.fetch_all_fields() + for row in table.itersorted("icol", start=15, stop=45, step=4) + ], + dtype=table._v_dtype, + ) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from the iterator:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test04_itersorted6(self): + """Testing the Table.itersorted() method with a start, stop and + step.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol")[33:55:5] + sortedtable2 = np.array( + [ + row.fetch_all_fields() + for row in table.itersorted("icol", start=33, stop=55, step=5) + ], + dtype=table._v_dtype, + ) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from the iterator:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test04_itersorted7(self): + """Testing the Table.itersorted() method with checkCSI=True.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol") + sortedtable2 = np.array( + [ + row.fetch_all_fields() + for row in table.itersorted("icol", checkCSI=True) + ], + dtype=table._v_dtype, + ) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from the iterator:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test04_itersorted8(self): + """Testing the Table.itersorted() method with a start, stop and + negative step.""" + + # see also gh-252 + table = self.table + sortedtable = np.sort(table[:], order="icol")[55:33:-5] + sortedtable2 = np.array( + [ + row.fetch_all_fields() + for row in table.itersorted("icol", start=55, stop=33, step=-5) + ], + dtype=table._v_dtype, + ) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from the iterator:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test04_itersorted9(self): + """Testing the Table.itersorted() method with a negative step -5.""" + + # see also gh-252 + table = self.table + sortedtable = np.sort(table[:], order="icol")[::-5] + sortedtable2 = np.array( + [ + row.fetch_all_fields() + for row in table.itersorted("icol", step=-5) + ], + dtype=table._v_dtype, + ) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from the iterator:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test04_itersorted10(self): + """Testing the Table.itersorted() method with a negative step -1.""" + + # see also gh-252 + table = self.table + sortedtable = np.sort(table[:], order="icol")[::-1] + sortedtable2 = np.array( + [ + row.fetch_all_fields() + for row in table.itersorted("icol", step=-1) + ], + dtype=table._v_dtype, + ) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from the iterator:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05_readSorted1(self): + """Testing the Table.read_sorted() method with no arguments.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol") + sortedtable2 = table.read_sorted("icol") + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05_readSorted2(self): + """Testing the Table.read_sorted() method with a start.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol")[16:17] + sortedtable2 = table.read_sorted("icol", start=16) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05_readSorted3(self): + """Testing the Table.read_sorted() method with a start and stop.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol")[16:33] + sortedtable2 = table.read_sorted("icol", start=16, stop=33) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05_readSorted4(self): + """Testing the Table.read_sorted() method with a start, stop and + step.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol")[33:55:5] + sortedtable2 = table.read_sorted("icol", start=33, stop=55, step=5) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05_readSorted5(self): + """Testing the Table.read_sorted() method with only a step.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol")[::3] + sortedtable2 = table.read_sorted("icol", step=3) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05_readSorted6(self): + """Testing the Table.read_sorted() method with negative step.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol")[::-1] + sortedtable2 = table.read_sorted("icol", step=-1) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05_readSorted7(self): + """Testing the Table.read_sorted() method with negative step (II).""" + + table = self.table + sortedtable = np.sort(table[:], order="icol")[::-2] + sortedtable2 = table.read_sorted("icol", step=-2) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05_readSorted8(self): + """Testing the Table.read_sorted() method with negative step (III)).""" + + table = self.table + sstart = 100 - 24 - 1 + sstop = 100 - 54 - 1 + sortedtable = np.sort(table[:], order="icol")[sstart:sstop:-1] + sortedtable2 = table.read_sorted("icol", start=24, stop=54, step=-1) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05_readSorted9(self): + """Testing the Table.read_sorted() method with negative step (IV)).""" + + table = self.table + sstart = 100 - 14 - 1 + sstop = 100 - 54 - 1 + sortedtable = np.sort(table[:], order="icol")[sstart:sstop:-3] + sortedtable2 = table.read_sorted("icol", start=14, stop=54, step=-3) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05_readSorted10(self): + """Testing the Table.read_sorted() method with negative step (V)).""" + + table = self.table + sstart = 100 - 24 - 1 + sstop = 100 - 25 - 1 + sortedtable = np.sort(table[:], order="icol")[sstart:sstop:-2] + sortedtable2 = table.read_sorted("icol", start=24, stop=25, step=-2) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05_readSorted11(self): + """Testing the Table.read_sorted() method with start > stop.""" + + table = self.table + sstart = 100 - 137 - 1 + sstop = 100 - 25 - 1 + sortedtable = np.sort(table[:], order="icol")[sstart:sstop:-2] + sortedtable2 = table.read_sorted("icol", start=137, stop=25, step=-2) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05a_readSorted12(self): + """Testing the Table.read_sorted() method with checkCSI (I).""" + + table = self.table + sortedtable = np.sort(table[:], order="icol") + sortedtable2 = table.read_sorted("icol", checkCSI=True) + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test05b_readSorted12(self): + """Testing the Table.read_sorted() method with checkCSI (II).""" + + table = self.table + self.assertRaises( + ValueError, table.read_sorted, "rcol", checkCSI=False + ) + + def test06_copy_sorted1(self): + """Testing the Table.copy(sortby) method with no arguments.""" + + table = self.table + # Copy to another table + table.nrowsinbuf = self.nrowsinbuf + table2 = table.copy("/", "table2", sortby="icol") + sortedtable = np.sort(table[:], order="icol") + sortedtable2 = table2[:] + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from copy:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test06_copy_sorted2(self): + """Testing the Table.copy(sortby) method with step=-1.""" + + table = self.table + # Copy to another table + table.nrowsinbuf = self.nrowsinbuf + table2 = table.copy("/", "table2", sortby="icol", step=-1) + sortedtable = np.sort(table[:], order="icol")[::-1] + sortedtable2 = table2[:] + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from copy:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test06_copy_sorted3(self): + """Testing the Table.copy(sortby) method with only a start.""" + + table = self.table + # Copy to another table + table.nrowsinbuf = self.nrowsinbuf + table2 = table.copy("/", "table2", sortby="icol", start=3) + sortedtable = np.sort(table[:], order="icol")[3:4] + sortedtable2 = table2[:] + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from copy:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test06_copy_sorted4(self): + """Testing the Table.copy(sortby) method with start, stop.""" + + table = self.table + # Copy to another table + table.nrowsinbuf = self.nrowsinbuf + table2 = table.copy("/", "table2", sortby="icol", start=3, stop=40) + sortedtable = np.sort(table[:], order="icol")[3:40] + sortedtable2 = table2[:] + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from copy:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test06_copy_sorted5(self): + """Testing the Table.copy(sortby) method with start, stop, step.""" + + table = self.table + # Copy to another table + table.nrowsinbuf = self.nrowsinbuf + table2 = table.copy( + "/", "table2", sortby="icol", start=3, stop=33, step=5 + ) + sortedtable = np.sort(table[:], order="icol")[3:33:5] + sortedtable2 = table2[:] + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from copy:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test06_copy_sorted6(self): + """Testing the Table.copy(sortby) method after table re-opening.""" + + self._reopen(mode="a") + table = self.h5file.root.table + # Copy to another table + table.nrowsinbuf = self.nrowsinbuf + table2 = table.copy("/", "table2", sortby="icol") + sortedtable = np.sort(table[:], order="icol") + sortedtable2 = table2[:] + if common.verbose: + print("Original sorted table:", sortedtable) + print("The values from copy:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test06_copy_sorted7(self): + """Testing the `checkCSI` parameter of Table.copy() (I).""" + + table = self.table + + # Copy to another table + table.nrowsinbuf = self.nrowsinbuf + table2 = table.copy("/", "table2", sortby="icol") + self.assertRaises( + ValueError, + table2.copy, + "/", + "table3", + sortby="rcol", + checkCSI=False, + ) + + def test06_copy_sorted8(self): + """Testing the `checkCSI` parameter of Table.copy() (II).""" + + table = self.table + + # Copy to another table + table.nrowsinbuf = self.nrowsinbuf + table2 = table.copy("/", "table2", sortby="icol") + self.assertRaises( + ValueError, + table2.copy, + "/", + "table3", + sortby="rcol", + checkCSI=True, + ) + + def test07_isCSI_noelements(self): + """Testing the representation of an index with no elements.""" + + t2 = self.h5file.create_table("/", "t2", self.MyDescription) + irows = t2.cols.rcol.create_csindex() + if common.verbose: + print("repr(t2)-->\n", repr(t2)) + self.assertEqual(irows, 0) + self.assertEqual(t2.colindexes["rcol"].is_csi, False) + + +class ReadSortedIndexTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test case for testing sorted reading in a "full" sorted column.""" + + nrows = 100 + nrowsinbuf = 11 + + class MyDescription(tb.IsDescription): + rcol = tb.IntCol(pos=1) + icol = tb.IntCol(pos=2) + + def setUp(self): + super().setUp() + + table = self.h5file.create_table("/", "table", self.MyDescription) + row = table.row + nrows = self.nrows + for i in range(nrows): + row["rcol"] = i + row["icol"] = nrows - i + row.append() + table.flush() + self.table = table + self.icol = self.table.cols.icol + # A full index with maximum optlevel should always be completely sorted + self.icol.create_index( + optlevel=self.optlevel, kind="full", _blocksizes=small_blocksizes + ) + + def test01_readSorted1(self): + """Testing the Table.read_sorted() method with no arguments.""" + + table = self.table + sortedtable = np.sort(table[:], order="icol") + sortedtable2 = table.read_sorted("icol") + if common.verbose: + print("Sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + # Compare with the sorted read table because we have no + # guarantees that read_sorted returns a completely sorted table + self.assertTrue( + common.allequal(sortedtable, np.sort(sortedtable2, order="icol")) + ) + + def test01_readSorted2(self): + """Testing the Table.read_sorted() method with no arguments + (re-open).""" + + self._reopen() + table = self.h5file.root.table + sortedtable = np.sort(table[:], order="icol") + sortedtable2 = table.read_sorted("icol") + if common.verbose: + print("Sorted table:", sortedtable) + print("The values from read_sorted:", sortedtable2) + # Compare with the sorted read table because we have no + # guarantees that read_sorted returns a completely sorted table + self.assertTrue( + common.allequal(sortedtable, np.sort(sortedtable2, order="icol")) + ) + + def test02_copy_sorted1(self): + """Testing the Table.copy(sortby) method.""" + + table = self.table + # Copy to another table + table.nrowsinbuf = self.nrowsinbuf + table2 = table.copy("/", "table2", sortby="icol") + sortedtable = np.sort(table[:], order="icol") + sortedtable2 = np.sort(table2[:], order="icol") + if common.verbose: + print("Original table:", table2[:]) + print("The sorted values from copy:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + def test02_copy_sorted2(self): + """Testing the Table.copy(sortby) method after table re-opening.""" + + self._reopen(mode="a") + table = self.h5file.root.table + # Copy to another table + table.nrowsinbuf = self.nrowsinbuf + table2 = table.copy("/", "table2", sortby="icol") + sortedtable = np.sort(table[:], order="icol") + sortedtable2 = np.sort(table2[:], order="icol") + if common.verbose: + print("Original table:", table2[:]) + print("The sorted values from copy:", sortedtable2) + self.assertTrue(common.allequal(sortedtable, sortedtable2)) + + +class ReadSortedIndex0(ReadSortedIndexTestCase): + optlevel = 0 + + +class ReadSortedIndex3(ReadSortedIndexTestCase): + optlevel = 3 + + +class ReadSortedIndex6(ReadSortedIndexTestCase): + optlevel = 6 + + +class ReadSortedIndex9(ReadSortedIndexTestCase): + optlevel = 9 + + +class Issue156TestBase(common.TempFileMixin, common.PyTablesTestCase): + # field name in table according to which test_copysort() sorts the table + sort_field = None + + def setUp(self): + super().setUp() + + # create nested table + class Foo(tb.IsDescription): + frame = tb.UInt16Col() + + class Bar(tb.IsDescription): + code = tb.UInt16Col() + + table = self.h5file.create_table( + "/", "foo", Foo, filters=tb.Filters(3, "zlib"), createparents=True + ) + + self.h5file.flush() + + # fill table with 10 random numbers + for k in range(10): + row = table.row + row["frame"] = np.random.randint(0, 2**16 - 1) + row["Bar/code"] = np.random.randint(0, 2**16 - 1) + row.append() + + self.h5file.flush() + + def test_copysort(self): + # copy table + oldNode = self.h5file.get_node("/foo") + + # create completely sorted index on a main column + oldNode.colinstances[self.sort_field].create_csindex() + + # this fails on ade2ba123efd267fd31 + # see gh-156 + new_node = oldNode.copy( + newname="foo2", + overwrite=True, + sortby=self.sort_field, + checkCSI=True, + propindexes=True, + ) + + # check column is sorted + self.assertTrue( + np.all( + new_node.col(self.sort_field) + == sorted(oldNode.col(self.sort_field)) + ) + ) + # check index is available + self.assertIn(self.sort_field, new_node.colindexes) + # check CSI was propagated + self.assertTrue(new_node.colindexes[self.sort_field].is_csi) + + +class Issue156TestCase01(Issue156TestBase): + # sort by field from non nested entry + sort_field = "frame" + + +class Issue156TestCase02(Issue156TestBase): + # sort by field from nested entry + sort_field = "Bar/code" + + +class Issue119Time32ColTestCase(common.TempFileMixin, common.PyTablesTestCase): + """TimeCol not properly indexing.""" + + col_typ = tb.Time32Col + values = [ + 0.93240451618785880, + 0.76322375510776170, + 0.16695030056300875, + 0.91259117097807850, + 0.93977847053454630, + 0.51450406513503090, + 0.24452129962257563, + 0.85475938924825230, + 0.32512326762476930, + 0.75127635627046820, + ] + + def setUp(self): + super().setUp() + + class Descr(tb.IsDescription): + when = self.col_typ(pos=1) + value = tb.Float32Col(pos=2) + + self.table = self.h5file.create_table("/", "test", Descr) + + self.t = 1321031471.0 # 11/11/11 11:11:11 + data = [(self.t + i, item) for i, item in enumerate(self.values)] + self.table.append(data) + self.h5file.flush() + + def test_timecol_issue(self): + tbl = self.table + t = self.t + + wherestr = "(when >= %d) & (when < %d)" % (t, t + 5) + + no_index = tbl.read_where(wherestr) + + tbl.cols.when.create_index(_verbose=False) + with_index = tbl.read_where(wherestr) + + self.assertTrue((no_index == with_index).all()) + + +class Issue119Time64ColTestCase(Issue119Time32ColTestCase): + col_typ = tb.Time64Col + + +class TestIndexingNans(common.TempFileMixin, common.PyTablesTestCase): + def test_issue_282(self): + trMap = {"index": tb.Int64Col(), "values": tb.FloatCol()} + table = self.h5file.create_table("/", "table", trMap) + + r = table.row + for i in range(5): + r["index"] = i + r["values"] = np.nan if i == 0 else i + r.append() + table.flush() + + table.cols.values.create_index() + + # retrieve + result = table.read_where("(values >= 0)") + self.assertEqual(len(result), 4) + + def test_issue_327(self): + table = self.h5file.create_table( + "/", + "table", + dict( + index=tb.Int64Col(), + values=tb.FloatCol(shape=()), + values2=tb.FloatCol(shape=()), + ), + ) + + r = table.row + for i in range(5): + r["index"] = i + r["values"] = np.nan if i == 2 or i == 3 else i + r["values2"] = i + r.append() + table.flush() + + table.cols.values.create_index() + table.cols.values2.create_index() + + results2 = table.read_where("(values2 > 0)") + self.assertEqual(len(results2), 4) + + results = table.read_where("(values > 0)") + self.assertEqual(len(results), 2) + + def test_issue_327_b(self): + table = self.h5file.create_table( + "/", + "table", + dict( + index=tb.Int64Col(), + values=tb.FloatCol(shape=()), + values2=tb.FloatCol(shape=()), + ), + ) + + r = table.row + for _ in range(100): + for i in range(5): + r["index"] = i + r["values"] = np.nan if i == 2 or i == 3 else i + r["values2"] = i + r.append() + table.flush() + + table.cols.values.create_index(_blocksizes=small_blocksizes) + table.cols.values2.create_index(_blocksizes=small_blocksizes) + + results2 = table.read_where("(values2 > 0)") + self.assertEqual(len(results2), 400) + + results = table.read_where("(values > 0)") + self.assertEqual(len(results), 200) + + def test_csindex_nans(self): + table = self.h5file.create_table( + "/", + "table", + dict( + index=tb.Int64Col(), + values=tb.FloatCol(shape=()), + values2=tb.FloatCol(shape=()), + ), + ) + + r = table.row + for x in range(100): + for i in range(5): + r["index"] = i + r["values"] = np.nan if i == 2 or i == 3 else i + r["values2"] = i + r.append() + table.flush() + + table.cols.values.create_csindex(_blocksizes=small_blocksizes) + table.cols.values2.create_csindex(_blocksizes=small_blocksizes) + + results2 = table.read_where("(values2 > 0)") + self.assertEqual(len(results2), 100 * 4) + + results = table.read_where("(values > 0)") + self.assertEqual(len(results), 100 * 2) + + +def suite(): + theSuite = common.unittest.TestSuite() + + niter = 1 + # heavy = 1 # Uncomment this only for testing purposes! + + for n in range(niter): + theSuite.addTest(common.make_suite(BasicReadTestCase)) + theSuite.addTest(common.make_suite(ZlibReadTestCase)) + theSuite.addTest(common.make_suite(BloscReadTestCase)) + theSuite.addTest(common.make_suite(LZOReadTestCase)) + theSuite.addTest(common.make_suite(Bzip2ReadTestCase)) + theSuite.addTest(common.make_suite(ShuffleReadTestCase)) + theSuite.addTest(common.make_suite(Fletcher32ReadTestCase)) + theSuite.addTest(common.make_suite(ShuffleFletcher32ReadTestCase)) + theSuite.addTest(common.make_suite(OneHalfTestCase)) + theSuite.addTest(common.make_suite(UpperBoundTestCase)) + theSuite.addTest(common.make_suite(LowerBoundTestCase)) + theSuite.addTest(common.make_suite(AI1TestCase)) + theSuite.addTest(common.make_suite(AI2TestCase)) + theSuite.addTest(common.make_suite(AI9TestCase)) + theSuite.addTest(common.make_suite(DeepTableIndexTestCase)) + theSuite.addTest(common.make_suite(IndexPropsChangeTestCase)) + theSuite.addTest(common.make_suite(IndexFiltersTestCase)) + theSuite.addTest(common.make_suite(OldIndexTestCase)) + theSuite.addTest(common.make_suite(CompletelySortedIndexTestCase)) + theSuite.addTest(common.make_suite(ManyNodesTestCase)) + theSuite.addTest(common.make_suite(ReadSortedIndex0)) + theSuite.addTest(common.make_suite(ReadSortedIndex3)) + theSuite.addTest(common.make_suite(ReadSortedIndex6)) + theSuite.addTest(common.make_suite(ReadSortedIndex9)) + theSuite.addTest(common.make_suite(Issue156TestCase01)) + theSuite.addTest(common.make_suite(Issue156TestCase02)) + theSuite.addTest(common.make_suite(Issue119Time32ColTestCase)) + theSuite.addTest(common.make_suite(Issue119Time64ColTestCase)) + theSuite.addTest(common.make_suite(TestIndexingNans)) + if common.heavy: + # These are too heavy for normal testing + theSuite.addTest(common.make_suite(AI4bTestCase)) + theSuite.addTest(common.make_suite(AI5TestCase)) + theSuite.addTest(common.make_suite(AI6TestCase)) + theSuite.addTest(common.make_suite(AI7TestCase)) + theSuite.addTest(common.make_suite(AI8TestCase)) + theSuite.addTest(common.make_suite(AI10TestCase)) + theSuite.addTest(common.make_suite(AI11TestCase)) + theSuite.addTest(common.make_suite(AI12TestCase)) + + return theSuite + + +if __name__ == "__main__": + import sys + + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_indexvalues.py b/venv/Lib/site-packages/tables/tests/test_indexvalues.py new file mode 100644 index 0000000..4d83525 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_indexvalues.py @@ -0,0 +1,3573 @@ +import random +import tempfile +from pathlib import Path + +import numpy as np + +import tables as tb +from tables.tests import common + +# An alias for frozenset +fzset = frozenset + +# To make the values in the tests reproducible +random.seed(19) + +# Sensible parameters for indexing with small blocksizes +small_blocksizes = (16, 8, 4, 2) # The smaller set of parameters... +# The size for medium indexes +minRowIndex = 1000 + + +class Small(tb.IsDescription): + var1 = tb.StringCol(itemsize=4, dflt=b"") + var2 = tb.BoolCol(dflt=0) + var3 = tb.IntCol(dflt=0) + var4 = tb.FloatCol(dflt=0) + + +class SelectValuesTestCase(common.TempFileMixin, common.PyTablesTestCase): + compress = 1 + complib = "zlib" + shuffle = 1 + fletcher32 = 0 + chunkshape = 10 + buffersize = 0 + random = 0 + values = None + reopen = False + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + if common.verbose: + print("Checking index kind-->", self.kind) + self.rootgroup = self.h5file.root + self.populateFile() + + def populateFile(self): + # Set a seed for the random generator if needed. + # This is useful when one needs reproducible results. + if self.random and hasattr(self, "seed"): + random.seed(self.seed) + group = self.rootgroup + # Create a table + title = "This is the IndexArray title" + filters = tb.Filters( + complevel=self.compress, + complib=self.complib, + shuffle=self.shuffle, + fletcher32=self.fletcher32, + ) + table1 = self.h5file.create_table( + group, + "table1", + Small, + title, + filters, + self.nrows, + chunkshape=(self.chunkshape,), + ) + table2 = self.h5file.create_table( + group, + "table2", + Small, + title, + filters, + self.nrows, + chunkshape=(self.chunkshape,), + ) + count = 0 + for i in range(0, self.nrows, self.nrep): + for j in range(self.nrep): + if self.random: + k = random.randrange(self.nrows) + elif self.values is not None: + lenvalues = len(self.values) + if i >= lenvalues: + i %= lenvalues + k = self.values[i] + else: + k = i + bk = str(k).encode("ascii") + table1.row["var1"] = bk + table2.row["var1"] = bk + table1.row["var2"] = k % 2 + table2.row["var2"] = k % 2 + table1.row["var3"] = k + table2.row["var3"] = k + table1.row["var4"] = float(self.nrows - k - 1) + table2.row["var4"] = float(self.nrows - k - 1) + table1.row.append() + table2.row.append() + count += 1 + table1.flush() + table2.flush() + if self.buffersize: + # Change the buffersize by default + table1.nrowsinbuf = self.buffersize + # Make sure nrowsinbuf is a multiple of chunkshape + table1.nrowsinbuf -= table1.nrowsinbuf % self.chunkshape + # Index all entries: + for col in table1.colinstances.values(): + indexrows = col.create_index( + kind=self.kind, _blocksizes=self.blocksizes + ) + if common.verbose: + print("Number of written rows:", table1.nrows) + print("Number of indexed rows:", indexrows) + + if self.reopen: + self._reopen(mode="a") # flavor changes + self.table1 = self.h5file.root.table1 + self.table2 = self.h5file.root.table1 + + def test01a(self): + """Checking selecting values from an Index (string flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + # First selection + t1var1 = table1.cols.var1 + results1 = [ + p["var1"] for p in table1.where("(il<=t1var1)&(t1var1<=sl)") + ] + results2 = [p["var1"] for p in table2 if il <= p["var1"] <= sl] + results1.sort() + results2.sort() + if common.verbose: + print("Should look like:", results2) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Second selection + t1var1 = table1.cols.var1 + results1 = [ + p["var1"] for p in table1.where("(il<=t1var1)&(t1var1 sl")] + results2 = [p["var1"] for p in table2 if p["var1"] > sl] + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection + t1var1 = table1.cols.var1 + self.assertIsNotNone(t1var1) + results1 = [p["var1"] for p in table1.where("t1var1 >= sl")] + results2 = [p["var1"] for p in table2 if p["var1"] >= sl] + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test02a(self): + """Checking selecting values from an Index (bool flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Do some selections and check the results + t1var2 = table1.cols.var2 + self.assertIsNotNone(t1var2) + results1 = [p["var2"] for p in table1.where("t1var2 == True")] + results2 = [p["var2"] for p in table2 if p["var2"] is True] + if common.verbose: + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test02b(self): + """Checking selecting values from an Index (bool flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02b..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Do some selections and check the results + t1var2 = table1.cols.var2 + self.assertIsNotNone(t1var2) + results1 = [p["var2"] for p in table1.where("t1var2 == False")] + results2 = [p["var2"] for p in table2 if p["var2"] is False] + if common.verbose: + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test03a(self): + """Checking selecting values from an Index (int flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = int(self.il) + sl = int(self.sl) + + # Do some selections and check the results + t1col = table1.cols.var3 + self.assertIsNotNone(t1col) + + # First selection + results1 = [p["var3"] for p in table1.where("(il<=t1col)&(t1col<=sl)")] + results2 = [p["var3"] for p in table2 if il <= p["var3"] <= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Second selection + results1 = [p["var3"] for p in table1.where("(il<=t1col)&(t1col sl")] + results2 = [p["var3"] for p in table2 if p["var3"] > sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection + results1 = [p["var3"] for p in table1.where("t1col >= sl")] + results2 = [p["var3"] for p in table2 if p["var3"] >= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test03c(self): + """Checking selecting values from an Index (long flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03c..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + # il = long(self.il) + sl = int(self.sl) + + # Do some selections and check the results + t1col = table1.cols.var3 + self.assertIsNotNone(t1col) + + # First selection + results1 = [p["var3"] for p in table1.where("t1col < sl")] + results2 = [p["var3"] for p in table2 if p["var3"] < sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Second selection + results1 = [p["var3"] for p in table1.where("t1col <= sl")] + results2 = [p["var3"] for p in table2 if p["var3"] <= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Third selection + results1 = [p["var3"] for p in table1.where("t1col > sl")] + results2 = [p["var3"] for p in table2 if p["var3"] > sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection + results1 = [p["var3"] for p in table1.where("t1col >= sl")] + results2 = [p["var3"] for p in table2 if p["var3"] >= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test03d(self): + """Checking selecting values from an Index (long and int flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03d..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + # il = int(self.il) + sl = int(self.sl) + + # Do some selections and check the results + t1col = table1.cols.var3 + self.assertIsNotNone(t1col) + + # First selection + results1 = [p["var3"] for p in table1.where("t1col < sl")] + results2 = [p["var3"] for p in table2 if p["var3"] < sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Second selection + results1 = [p["var3"] for p in table1.where("t1col <= sl")] + results2 = [p["var3"] for p in table2 if p["var3"] <= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Third selection + results1 = [p["var3"] for p in table1.where("t1col > sl")] + results2 = [p["var3"] for p in table2 if p["var3"] > sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection + results1 = [p["var3"] for p in table1.where("t1col >= sl")] + results2 = [p["var3"] for p in table2 if p["var3"] >= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test04a(self): + """Checking selecting values from an Index (float flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = float(self.il) + sl = float(self.sl) + + # Do some selections and check the results + t1col = table1.cols.var4 + self.assertIsNotNone(t1col) + + # First selection + results1 = [p["var4"] for p in table1.where("(il<=t1col)&(t1col<=sl)")] + results2 = [p["var4"] for p in table2 if il <= p["var4"] <= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1.sort(), results2.sort()) + + # Second selection + results1 = [p["var4"] for p in table1.where("(il<=t1col)&(t1col sl")] + results2 = [p["var4"] for p in table2 if p["var4"] > sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection + results1 = [p["var4"] for p in table1.where("t1col >= sl")] + results2 = [p["var4"] for p in table2 if p["var4"] >= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test05a(self): + """Checking get_where_list & itersequence (string, python flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + t1col = table1.cols.var1 + # First selection + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + table1.flavor = "python" + rowList1 = table1.get_where_list(condition) + results1 = [p["var1"] for p in table1.itersequence(rowList1)] + results2 = [p["var1"] for p in table2 if il <= p["var1"] <= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1.sort(), results2.sort()) + + # Second selection + condition = "(il<=t1col)&(t1col sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection + condition = "t1col>=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + rowList1 = table1.get_where_list(condition) + results1 = [p["var1"] for p in table1.itersequence(rowList1)] + results2 = [p["var1"] for p in table2 if p["var1"] >= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test06a(self): + """Checking get_where_list & itersequence (bool flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Do some selections and check the results + t1var2 = table1.cols.var2 + condition = "t1var2==True" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1var2.pathname]) + ) + table1.flavor = "python" + rowList1 = table1.get_where_list(condition) + results1 = [p["var2"] for p in table1.itersequence(rowList1)] + results2 = [p["var2"] for p in table2 if p["var2"] is True] + if common.verbose: + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test06b(self): + """Checking get_where_list & itersequence (numpy bool limits & + flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06b..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Do some selections and check the results + t1var2 = table1.cols.var2 + false = np.bool_(False) + self.assertFalse(false) # silence pyflakes + condition = "t1var2==false" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1var2.pathname]) + ) + table1.flavor = "python" + rowList1 = table1.get_where_list(condition) + results1 = [p["var2"] for p in table1.itersequence(rowList1)] + results2 = [p["var2"] for p in table2 if p["var2"] is False] + if common.verbose: + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test07a(self): + """Checking get_where_list & itersequence (int flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test07a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = int(self.il) + sl = int(self.sl) + + # Do some selections and check the results + t1col = table1.cols.var3 + # First selection + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + table1.flavor = "python" + rowList1 = table1.get_where_list(condition) + results1 = [p["var3"] for p in table1.itersequence(rowList1)] + results2 = [p["var3"] for p in table2 if il <= p["var3"] <= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1.sort(), results2.sort()) + + # Second selection + condition = "(il<=t1col)&(t1col sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection + condition = "t1col>=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + rowList1 = table1.get_where_list(condition) + results1 = [p["var3"] for p in table1.itersequence(rowList1)] + results2 = [p["var3"] for p in table2 if p["var3"] >= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test08a(self): + """Checking get_where_list & itersequence (float flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = float(self.il) + sl = float(self.sl) + + # Do some selections and check the results + t1col = table1.cols.var4 + # First selection + condition = "(il<=t1col)&(t1col<=sl)" + # results1 = [p["var4"] for p in table1.where(condition)] + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + table1.flavor = "python" + rowList1 = table1.get_where_list(condition) + results1 = [p["var4"] for p in table1.itersequence(rowList1)] + results2 = [p["var4"] for p in table2 if il <= p["var4"] <= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1.sort(), results2.sort()) + + # Second selection + condition = "(il<=t1col)&(t1col sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection + condition = "t1col>=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + rowList1 = table1.get_where_list(condition) + results1 = [p["var4"] for p in table1.itersequence(rowList1)] + results2 = [p["var4"] for p in table2 if p["var4"] >= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test09a(self): + """Checking non-indexed where() (string flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + table1._disable_indexing_in_queries() + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + t1col = table1.cols.var1 + self.assertIsNotNone(t1col) + + # First selection + condition = "t1col<=sl" + self.assertTrue(not table1.will_query_use_indexing(condition)) + results1 = [ + p["var1"] for p in table1.where(condition, start=2, stop=10) + ] + results2 = [ + p["var1"] for p in table2.iterrows(2, 10) if p["var1"] <= sl + ] + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Second selection + condition = "(il p["var1"] > sl) + ] + if common.verbose: + print("Limits:", il, sl) + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # This selection to be commented out + # condition = 't1col>=sl' + # self.assertTrue(not table1.will_query_use_indexing(condition)) + # results1 = [p['var1'] for p in table1.where(condition,start=2, + # stop=-1,step=1)] + # results2 = [p["var1"] for p in table2.iterrows(2, -1, 1) + # if p["var1"] >= sl] + # if verbose: + # print "Limit:", sl + # print "Selection results (in-kernel):", results1 + # print "Should look like:", results2 + # print "Length results:", len(results1) + # print "Should be:", len(results2) + # self.assertEqual(len(results1), len(results2)) + # self.assertEqual(results1, results2) + + # Fourth selection + # results1 = [p['var1'] for p in + # table1.where(condition,start=2,stop=-1,step=3)] + condition = "t1col>=sl" + self.assertTrue(not table1.will_query_use_indexing(condition)) + results1 = [ + p["var1"] + for p in table1.where(condition, start=2, stop=-1, step=3) + ] + results2 = [ + p["var1"] for p in table2.iterrows(2, -1, 3) if p["var1"] >= sl + ] + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Re-enable the indexing in queries basically to unnail the + # condition cache and not raising the performance warning + # about some indexes being dirty + table1._enable_indexing_in_queries() + + def test09b(self): + """Checking non-indexed where() (float flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09b..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + table1._disable_indexing_in_queries() + + # Convert the limits to the appropriate type + il = float(self.il) + sl = float(self.sl) + + # Do some selections and check the results + t1col = table1.cols.var4 + self.assertIsNotNone(t1col) + + # First selection + condition = "t1col= sl + ] + if common.verbose: + print("Limit:", sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Re-enable the indexing in queries basically to unnail the + # condition cache and not raising the performance warning + # about some indexes being dirty + table1._enable_indexing_in_queries() + + def test09c(self): + """Check non-indexed where() w/ ranges, changing step + (string flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09c..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + table1._disable_indexing_in_queries() + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + t1col = table1.cols.var1 + self.assertIsNotNone(t1col) + + # First selection + condition = "t1col>=sl" + self.assertTrue(not table1.will_query_use_indexing(condition)) + results1 = [ + p["var1"] + for p in table1.where(condition, start=2, stop=-1, step=3) + ] + results2 = [ + p["var1"] for p in table2.iterrows(2, -1, 3) if p["var1"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Second selection + condition = "t1col>=sl" + self.assertTrue(not table1.will_query_use_indexing(condition)) + results1 = [ + p["var1"] + for p in table1.where(condition, start=5, stop=-1, step=10) + ] + results2 = [ + p["var1"] for p in table2.iterrows(5, -1, 10) if p["var1"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Third selection + condition = "t1col>=sl" + self.assertTrue(not table1.will_query_use_indexing(condition)) + results1 = [ + p["var1"] + for p in table1.where(condition, start=5, stop=-3, step=11) + ] + results2 = [ + p["var1"] for p in table2.iterrows(5, -3, 11) if p["var1"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection + condition = "t1col>=sl" + self.assertTrue(not table1.will_query_use_indexing(condition)) + results1 = [ + p["var1"] + for p in table1.where(condition, start=2, stop=-1, step=300) + ] + results2 = [ + p["var1"] for p in table2.iterrows(2, -1, 300) if p["var1"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Re-enable the indexing in queries basically to unnail the + # condition cache and not raising the performance warning + # about some indexes being dirty + table1._enable_indexing_in_queries() + + def test09d(self): + """Checking non-indexed where() w/ ranges, changing step + (int flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09d..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + table1._disable_indexing_in_queries() + + # Convert the limits to the appropriate type + il = int(self.il) + sl = int(self.sl) + + # Do some selections and check the results + t3col = table1.cols.var3 + self.assertIsNotNone(t3col) + + # First selection + condition = "t3col>=sl" + self.assertTrue(not table1.will_query_use_indexing(condition)) + results1 = [ + p["var3"] + for p in table1.where(condition, start=2, stop=-1, step=3) + ] + results2 = [ + p["var3"] for p in table2.iterrows(2, -1, 3) if p["var3"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Second selection + condition = "t3col>=sl" + self.assertTrue(not table1.will_query_use_indexing(condition)) + results1 = [ + p["var3"] + for p in table1.where(condition, start=5, stop=-1, step=10) + ] + results2 = [ + p["var3"] for p in table2.iterrows(5, -1, 10) if p["var3"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Third selection + condition = "t3col>=sl" + self.assertTrue(not table1.will_query_use_indexing(condition)) + results1 = [ + p["var3"] + for p in table1.where(condition, start=5, stop=-3, step=11) + ] + results2 = [ + p["var3"] for p in table2.iterrows(5, -3, 11) if p["var3"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection + condition = "t3col>=sl" + self.assertTrue(not table1.will_query_use_indexing(condition)) + results1 = [ + p["var3"] + for p in table1.where(condition, start=2, stop=-1, step=300) + ] + results2 = [ + p["var3"] for p in table2.iterrows(2, -1, 300) if p["var3"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Re-enable the indexing in queries basically to unnail the + # condition cache and not raising the performance warning + # about some indexes being dirty + table1._enable_indexing_in_queries() + + def test10a(self): + """Checking indexed where() with ranges (string flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test10a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + t1col = table1.cols.var1 + # First selection + condition = "t1col<=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] for p in table1.where(condition, start=2, stop=10) + ] + results2 = [ + p["var1"] for p in table2.iterrows(2, 10) if p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Second selection + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=2, stop=30, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(2, 30, 1) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Repeat second selection (testing caches) + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=2, stop=30, step=2) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(2, 30, 2) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Selection results (indexed):", results1) + print("Should look like:", results2) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Third selection + condition = "(il= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test10b(self): + """Checking indexed where() with ranges (int flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test10b..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = int(self.il) + sl = int(self.sl) + + # Do some selections and check the results + t3col = table1.cols.var3 + # First selection + condition = "t3col<=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t3col.pathname]) + ) + results1 = [ + p["var3"] for p in table1.where(condition, start=2, stop=10) + ] + results2 = [ + p["var3"] for p in table2.iterrows(2, 10) if p["var3"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Second selection + condition = "(il<=t3col)&(t3col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t3col.pathname]) + ) + results1 = [ + p["var3"] + for p in table1.where(condition, start=2, stop=30, step=2) + ] + results2 = [ + p["var3"] + for p in table2.iterrows(2, 30, 2) + if il <= p["var3"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Third selection + condition = "(il= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test10c(self): + """Checking indexed where() with ranges, changing step (string + flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test10c..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + t1col = table1.cols.var1 + + # First selection + condition = "t1col>=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=2, stop=-1, step=3) + ] + results2 = [ + p["var1"] for p in table2.iterrows(2, -1, 3) if p["var1"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Second selection + condition = "t1col>=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=5, stop=-1, step=10) + ] + results2 = [ + p["var1"] for p in table2.iterrows(5, -1, 10) if p["var1"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Third selection + condition = "t1col>=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=5, stop=-3, step=11) + ] + results2 = [ + p["var1"] for p in table2.iterrows(5, -3, 11) if p["var1"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection + condition = "t1col>=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=2, stop=-1, step=300) + ] + results2 = [ + p["var1"] for p in table2.iterrows(2, -1, 300) if p["var1"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test10d(self): + """Checking indexed where() with ranges, changing step (int flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test10d..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = int(self.il) + sl = int(self.sl) + + # Do some selections and check the results + t3col = table1.cols.var3 + + # First selection + condition = "t3col>=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t3col.pathname]) + ) + results1 = [ + p["var3"] + for p in table1.where(condition, start=2, stop=-1, step=3) + ] + results2 = [ + p["var3"] for p in table2.iterrows(2, -1, 3) if p["var3"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Second selection + condition = "t3col>=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t3col.pathname]) + ) + results1 = [ + p["var3"] + for p in table1.where(condition, start=5, stop=-1, step=10) + ] + results2 = [ + p["var3"] for p in table2.iterrows(5, -1, 10) if p["var3"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Third selection + condition = "t3col>=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t3col.pathname]) + ) + results1 = [ + p["var3"] + for p in table1.where(condition, start=5, stop=-3, step=11) + ] + results2 = [ + p["var3"] for p in table2.iterrows(5, -3, 11) if p["var3"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection + condition = "t3col>=sl" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t3col.pathname]) + ) + results1 = [ + p["var3"] + for p in table1.where(condition, start=2, stop=-1, step=300) + ] + results2 = [ + p["var3"] for p in table2.iterrows(2, -1, 300) if p["var3"] >= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test11a(self): + """Checking selecting values from an Index via read_coordinates()""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test11a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do a selection and check the result + t1var1 = table1.cols.var1 + condition = "(il<=t1var1)&(t1var1<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1var1.pathname]) + ) + coords1 = table1.get_where_list(condition) + table1.flavor = "python" + results1 = table1.read_coordinates(coords1, field="var1") + results2 = [p["var1"] for p in table2 if il <= p["var1"] <= sl] + results1.sort() + results2.sort() + if common.verbose: + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test12a(self): + """Checking selecting values after a Table.append() operation.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test12a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Append more rows in already created indexes + count = 0 + for i in range(0, self.nrows // 2, self.nrep): + for j in range(self.nrep): + if self.random: + k = random.randrange(self.nrows) + elif self.values is not None: + lenvalues = len(self.values) + if i >= lenvalues: + i %= lenvalues + k = self.values[i] + else: + k = i + table1.row["var1"] = str(k) + table2.row["var1"] = str(k) + table1.row["var2"] = k % 2 + table2.row["var2"] = k % 2 + table1.row["var3"] = k + table2.row["var3"] = k + table1.row["var4"] = float(self.nrows - k - 1) + table2.row["var4"] = float(self.nrows - k - 1) + table1.row.append() + table2.row.append() + count += 1 + table1.flush() + table2.flush() + + t1var1 = table1.cols.var1 + t1var2 = table1.cols.var2 + t1var3 = table1.cols.var3 + t1var4 = table1.cols.var4 + self.assertFalse(t1var1.index.dirty) + self.assertFalse(t1var2.index.dirty) + self.assertFalse(t1var3.index.dirty) + self.assertFalse(t1var4.index.dirty) + + # Do some selections and check the results + # First selection: string + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + results1 = [ + p["var1"] for p in table1.where("(il<=t1var1)&(t1var1<=sl)") + ] + results2 = [p["var1"] for p in table2 if il <= p["var1"] <= sl] + results1.sort() + results2.sort() + if common.verbose: + print("Should look like:", results2) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Second selection: bool + results1 = [p["var2"] for p in table1.where("t1var2 == True")] + results2 = [p["var2"] for p in table2 if p["var2"] is True] + t2var1_vals = [p["var1"] for p in table2] + t2var2_vals = [p["var2"] for p in table2] + msg = ( + f"Incorrect results for t1var2[n] == True where\n" + f"t2var1_vals={repr(t2var1_vals)}\nt2var2_vals={repr(t2var2_vals)}\n" + f"\n{results1=}\n{results2=}" + ) + self.assertEqual(len(results1), len(results2), msg=msg) + self.assertEqual(results1, results2, msg=msg) + + # Third selection: int + # Convert the limits to the appropriate type + il = int(self.il) + sl = int(self.sl) + + t1var3 = table1.cols.var3 + results1 = [ + p["var3"] for p in table1.where("(il<=t1var3)&(t1var3<=sl)") + ] + results2 = [p["var3"] for p in table2 if il <= p["var3"] <= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Fourth selection: float + # Convert the limits to the appropriate type + il = float(self.il) + sl = float(self.sl) + + # Do some selections and check the results + results1 = [ + p["var4"] for p in table1.where("(il<=t1var4)&(t1var4<=sl)") + ] + results2 = [p["var4"] for p in table2 if il <= p["var4"] <= sl] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1.sort(), results2.sort()) + + def test13a(self): + """Checking repeated queries (checking caches)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test13a..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + t1col = table1.cols.var1 + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=2, stop=30, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(2, 30, 1) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Repeat the selection (testing caches) + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=2, stop=30, step=2) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(2, 30, 2) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test13b(self): + """Checking repeated queries, varying step (checking caches)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test13b..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + t1col = table1.cols.var1 + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=2, stop=30, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(2, 30, 1) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Repeat the selection (testing caches) + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=2, stop=30, step=2) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(2, 30, 2) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test13c(self): + """Checking repeated queries, varying start, stop, step.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test13c..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + t1col = table1.cols.var1 + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] for p in table1.where(condition, start=0, stop=1, step=2) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(0, 1, 2) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Repeat the selection (testing caches) + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] for p in table1.where(condition, start=0, stop=5, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(0, 5, 1) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test13d(self): + """Checking repeated queries, varying start, stop, step (another + twist)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test13d..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + t1col = table1.cols.var1 + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] for p in table1.where(condition, start=0, stop=1, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(0, 1, 1) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Repeat the selection (testing caches) + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] for p in table1.where(condition, start=0, stop=1, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(0, 1, 1) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test13e(self): + """Checking repeated queries, with varying condition.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test13e..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + t1col = table1.cols.var1 + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=0, stop=10, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(0, 10, 1) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Repeat the selection with a more complex condition + t2col = table1.cols.var2 + condition = "(il<=t1col)&(t1col<=sl)&(t2col==True)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname, t2col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=0, stop=10, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(0, 10, 1) + if il <= p["var1"] <= sl and p["var2"] is True + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test13f(self): + """Checking repeated queries, with varying condition.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test13f..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Remove indexes in var2 column + table1.cols.var2.remove_index() + table2.cols.var2.remove_index() + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + t1col = table1.cols.var1 + t2col = table1.cols.var2 + self.assertIsNotNone(t2col) + condition = "(il<=t1col)&(t1col<=sl)&(t2col==True)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=0, stop=10, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(0, 10, 1) + if il <= p["var1"] <= sl and p["var2"] is True + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Repeat the selection with a simpler condition + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=0, stop=10, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(0, 10, 1) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Repeat again with the original condition, but with a constant + constant = True + condition = "(il<=t1col)&(t1col<=sl)&(t2col==constant)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=0, stop=10, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(0, 10, 1) + if il <= p["var1"] <= sl and p["var2"] == constant + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + def test13g(self): + """Checking repeated queries, with different limits.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test13g..." % self.__class__.__name__) + + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + # Convert the limits to the appropriate type + il = str(self.il).encode("ascii") + sl = str(self.sl).encode("ascii") + + # Do some selections and check the results + t1col = table1.cols.var1 + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=0, stop=10, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(0, 10, 1) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + # Repeat the selection with different limits + il, sl = ( + str(self.il + 1).encode("ascii"), + str(self.sl - 2).encode("ascii"), + ) + t2col = table1.cols.var2 + self.assertIsNotNone(t2col) + condition = "(il<=t1col)&(t1col<=sl)" + self.assertTrue( + table1.will_query_use_indexing(condition) + == fzset([t1col.pathname]) + ) + results1 = [ + p["var1"] + for p in table1.where(condition, start=0, stop=10, step=1) + ] + results2 = [ + p["var1"] + for p in table2.iterrows(0, 10, 1) + if il <= p["var1"] <= sl + ] + # sort lists (indexing does not guarantee that rows are returned in + # order) + results1.sort() + results2.sort() + if common.verbose: + print("Limits:", il, sl) + print("Length results:", len(results1)) + print("Should be:", len(results2)) + self.assertEqual(len(results1), len(results2)) + self.assertEqual(results1, results2) + + +class SV1aTestCase(SelectValuesTestCase): + blocksizes = small_blocksizes + chunkshape = 1 + buffersize = 2 + ss = blocksizes[2] + nrows = ss + reopen = 0 + nrep = ss + il = 0 + sl = ss + + +class SV1bTestCase(SV1aTestCase): + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + chunkshape = blocksizes[2] // 2**9 + buffersize = chunkshape * 5 + + +class SV2aTestCase(SelectValuesTestCase): + blocksizes = small_blocksizes + chunkshape = 2 + buffersize = 2 + ss = blocksizes[2] + nrows = ss * 2 - 1 + reopen = 1 + nrep = 1 + il = 0 + sl = 2 + + +class SV2bTestCase(SV2aTestCase): + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + chunkshape = blocksizes[2] // 2**7 + buffersize = chunkshape * 20 + + +class SV3aTestCase(SelectValuesTestCase): + blocksizes = small_blocksizes + chunkshape = 2 + buffersize = 3 + ss = blocksizes[2] + nrows = ss * 5 - 1 + reopen = 1 + nrep = 3 + il = 0 + sl = 3 + + +class SV3bTestCase(SV3aTestCase): + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + # chunkshape = 4 + # buffersize = 16 + chunkshape = 3 + buffersize = 9 + + +class SV4aTestCase(SelectValuesTestCase): + blocksizes = small_blocksizes + buffersize = 10 + ss = blocksizes[2] + nrows = ss * 3 + reopen = 0 + nrep = 1 + # il = nrows-cs + il = 0 + sl = nrows + + +class SV4bTestCase(SV4aTestCase): + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + chunkshape = 500 + buffersize = 1000 + + +class SV5aTestCase(SelectValuesTestCase): + blocksizes = small_blocksizes + ss = blocksizes[2] + nrows = ss * 5 + reopen = 0 + nrep = 1 + il = 0 + sl = nrows + + +class SV5bTestCase(SV5aTestCase): + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + + +class SV6aTestCase(SelectValuesTestCase): + blocksizes = small_blocksizes + ss = blocksizes[2] + nrows = ss * 5 + 1 + reopen = 0 + cs = blocksizes[3] + nrep = cs + 1 + il = -1 + sl = nrows + + +class SV6bTestCase(SV6aTestCase): + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + + +class SV7aTestCase(SelectValuesTestCase): + random = 1 + blocksizes = small_blocksizes + ss = blocksizes[2] + nrows = ss * 5 + 3 + reopen = 0 + cs = blocksizes[3] + nrep = cs - 1 + il = -10 + sl = nrows + + +class SV7bTestCase(SV7aTestCase): + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + + +class SV8aTestCase(SelectValuesTestCase): + random = 0 + chunkshape = 1 + blocksizes = small_blocksizes + ss = blocksizes[2] + nrows = ss * 5 - 3 + reopen = 0 + cs = blocksizes[3] + nrep = cs - 1 + il = 10 + sl = nrows - 10 + + +class SV8bTestCase(SV8aTestCase): + random = 0 + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + + +class SV9aTestCase(SelectValuesTestCase): + random = 1 + blocksizes = small_blocksizes + ss = blocksizes[2] + nrows = ss * 5 + 11 + reopen = 0 + cs = blocksizes[3] + nrep = cs - 1 + il = 10 + sl = nrows - 10 + + +class SV9bTestCase(SV9aTestCase): + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + + +class SV10aTestCase(SelectValuesTestCase): + random = 1 + blocksizes = small_blocksizes + chunkshape = 1 + buffersize = 1 + ss = blocksizes[2] + nrows = ss + reopen = 0 + nrep = ss + il = 0 + sl = ss + + +class SV10bTestCase(SV10aTestCase): + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + chunkshape = 5 + buffersize = 6 + + +class SV11aTestCase(SelectValuesTestCase): + # This checks a special case that failed. It was discovered in a + # random test above (SV10a). It is explicitely put here as a way + # to always check that specific case. + values = [1, 7, 6, 7, 0, 7, 4, 4, 9, 5] + blocksizes = small_blocksizes + chunkshape = 1 + buffersize = 1 + ss = blocksizes[2] + nrows = ss + reopen = 0 + nrep = ss + il = 0 + sl = ss + + +class SV11bTestCase(SelectValuesTestCase): + # This checks a special case that failed. It was discovered in a + # random test above (SV10a). It is explicitely put here as a way + # to always check that specific case. + values = [1, 7, 6, 7, 0, 7, 4, 4, 9, 5] + chunkshape = 2 + buffersize = 2 + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + ss = blocksizes[2] + nrows = ss + reopen = 0 + nrep = ss + il = 0 + sl = ss + + +class SV12aTestCase(SelectValuesTestCase): + # This checks a special case that failed. It was discovered in a + # random test above (SV10b). It is explicitely put here as a way + # to always check that specific case. + # values = [0, 7, 0, 6, 5, 1, 6, 7, 0, 0] + values = [4, 4, 1, 5, 2, 0, 1, 4, 3, 9] + blocksizes = small_blocksizes + chunkshape = 1 + buffersize = 1 + ss = blocksizes[2] + nrows = ss + reopen = 0 + nrep = ss + il = 0 + sl = ss + + +class SV12bTestCase(SelectValuesTestCase): + # This checks a special case that failed. It was discovered in a + # random test above (SV10b). It is explicitely put here as a way + # to always check that specific case. + # values = [0, 7, 0, 6, 5, 1, 6, 7, 0, 0] + values = [4, 4, 1, 5, 2, 0, 1, 4, 3, 9] + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + chunkshape = 2 + buffersize = 2 + ss = blocksizes[2] + nrows = ss + reopen = 1 + nrep = ss + il = 0 + sl = ss + + +class SV13aTestCase(SelectValuesTestCase): + values = [0, 7, 0, 6, 5, 1, 6, 7, 0, 0] + blocksizes = small_blocksizes + chunkshape = 3 + buffersize = 5 + ss = blocksizes[2] + nrows = ss + reopen = 0 + nrep = ss + il = 0 + sl = ss + + +class SV13bTestCase(SelectValuesTestCase): + values = [0, 7, 0, 6, 5, 1, 6, 7, 0, 0] + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + chunkshape = 5 + buffersize = 10 + ss = blocksizes[2] + nrows = ss + reopen = 1 + nrep = ss + il = 0 + sl = ss + + +class SV14aTestCase(SelectValuesTestCase): + values = [1, 7, 6, 7, 0, 7, 4, 4, 9, 5] + blocksizes = small_blocksizes + chunkshape = 2 + buffersize = 5 + ss = blocksizes[2] + nrows = ss + reopen = 0 + cs = blocksizes[3] + nrep = cs + il = -5 + sl = 500 + + +class SV14bTestCase(SelectValuesTestCase): + values = [1, 7, 6, 7, 0, 7, 4, 4, 9, 5] + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + chunkshape = 9 + buffersize = 10 + ss = blocksizes[2] + nrows = ss + reopen = 1 + nrep = 9 + il = 0 + cs = blocksizes[3] + sl = ss - cs + 1 + + +class SV15aTestCase(SelectValuesTestCase): + # Test that checks for case where there are not valid values in + # the indexed part, but they exist in the non-indexed region. + # At least, test01b takes account of that + random = 1 + # Both values of seed below triggers a fail in indexing code + # seed = 1885 + seed = 183 + blocksizes = small_blocksizes + ss = blocksizes[2] + nrows = ss * 5 + 1 + reopen = 0 + cs = blocksizes[3] + nrep = cs - 1 + il = -10 + sl = nrows + + +class SV15bTestCase(SelectValuesTestCase): + # Test that checks for case where there are not valid values in + # the indexed part, but they exist in the non-indexed region. + # At least, test01b takes account of that + random = 1 + # Both values of seed below triggers a fail in indexing code + seed = 1885 + # seed = 183 + blocksizes = tb.idxutils.calc_chunksize(minRowIndex, memlevel=1) + ss = blocksizes[2] + nrows = ss * 5 + 1 + reopen = 1 + cs = blocksizes[3] + nrep = cs - 1 + il = -10 + sl = nrows + + +class LastRowReuseBuffers(common.PyTablesTestCase): + # Test that checks for possible reuse of buffers coming + # from last row in the sorted part of indexes + nelem = 1221 + np.random.seed(1) + random.seed(1) + + class Record(tb.IsDescription): + id1 = tb.Int16Col() + + def setUp(self): + super().setUp() + self.h5fname = tempfile.mktemp(".h5") + self.h5file = None + + def tearDown(self): + if self.h5file is not None: + self.h5file.close() + if Path(self.h5fname).is_file(): + Path(self.h5fname).unlink() + super().tearDown() + + def test00_lrucache(self): + self.h5file = tb.open_file(self.h5fname, "w", node_cache_slots=64) + ta = self.h5file.create_table( + "/", "table", self.Record, filters=tb.Filters(1) + ) + id1 = np.random.randint(0, 2**15, self.nelem) + ta.append([id1]) + + ta.cols.id1.create_index() + + for i in range(self.nelem): + nrow = random.randrange(self.nelem) + value = id1[nrow] + idx = ta.get_where_list("id1 == %s" % value) + self.assertGreater(len(idx), 0, f"idx--> {idx} {i} {nrow} {value}") + self.assertTrue( + nrow in idx, f"nrow not found: {idx} != {nrow}, {value}" + ) + + def test01_nocache(self): + self.h5file = tb.open_file(self.h5fname, "w", node_cache_slots=0) + ta = self.h5file.create_table( + "/", "table", self.Record, filters=tb.Filters(1) + ) + id1 = np.random.randint(0, 2**15, self.nelem) + ta.append([id1]) + + ta.cols.id1.create_index() + + for i in range(self.nelem): + nrow = random.randrange(self.nelem) + value = id1[nrow] + idx = ta.get_where_list("id1 == %s" % value) + self.assertGreater(len(idx), 0, f"idx--> {idx} {i} {nrow} {value}") + self.assertTrue( + nrow in idx, f"nrow not found: {idx} != {nrow}, {value}" + ) + + def test02_dictcache(self): + self.h5file = tb.open_file(self.h5fname, "w", node_cache_slots=-64) + ta = self.h5file.create_table( + "/", "table", self.Record, filters=tb.Filters(1) + ) + id1 = np.random.randint(0, 2**15, self.nelem) + ta.append([id1]) + + ta.cols.id1.create_index() + + for i in range(self.nelem): + nrow = random.randrange(self.nelem) + value = id1[nrow] + idx = ta.get_where_list("id1 == %s" % value) + self.assertGreater(len(idx), 0, f"idx--> {idx} {i} {nrow} {value}") + self.assertTrue( + nrow in idx, f"nrow not found: {idx} != {nrow}, {value}" + ) + + +normal_tests = ( + "SV1aTestCase", + "SV2aTestCase", + "SV3aTestCase", +) + +heavy_tests = ( + # The next are too hard to be in the 'normal' suite + "SV1bTestCase", + "SV2bTestCase", + "SV3bTestCase", + "SV4aTestCase", + "SV5aTestCase", + "SV6aTestCase", + "SV7aTestCase", + "SV8aTestCase", + "SV9aTestCase", + "SV10aTestCase", + "SV11aTestCase", + "SV12aTestCase", + "SV13aTestCase", + "SV14aTestCase", + "SV15aTestCase", + # This are properly heavy + "SV4bTestCase", + "SV5bTestCase", + "SV6bTestCase", + "SV7bTestCase", + "SV8bTestCase", + "SV9bTestCase", + "SV10bTestCase", + "SV11bTestCase", + "SV12bTestCase", + "SV13bTestCase", + "SV14bTestCase", + "SV15bTestCase", +) + + +# Base classes for the different type indexes. +class UltraLightITableMixin: + kind = "ultralight" + + +class LightITableMixin: + kind = "light" + + +class MediumITableMixin: + kind = "medium" + + +class FullITableMixin: + kind = "full" + + +# Parameters for indexed queries. +ckinds = ["UltraLight", "Light", "Medium", "Full"] +testlevels = ["Normal", "Heavy"] + +# Indexed queries: ``[ULMF]I[NH]SVXYTestCase``, where: +# +# 1. U is for 'UltraLight', L for 'Light', M for 'Medium', F for 'Full' indexes +# 2. N is for 'Normal', H for 'Heavy' tests + + +def iclassdata(): + for ckind in ckinds: + for ctest in normal_tests + heavy_tests: + classname = f"{ckind[0]}I{testlevels[common.heavy][0]}{ctest}" + # Uncomment the next one and comment the past one if one + # don't want to include the methods (testing purposes only) + # cbasenames = ( '%sITableMixin' % ckind, "object") + cbasenames = ("%sITableMixin" % ckind, ctest) + classdict = dict(heavy=bool(ctest in heavy_tests)) + yield (classname, cbasenames, classdict) + + +# Create test classes. +for cname, cbasenames, cdict in iclassdata(): + cbases = tuple(eval(cbase) for cbase in cbasenames) + class_ = type(cname, cbases, cdict) + exec("%s = class_" % cname) + + +# Test case for issue #319 +class BuffersizeMultipleChunksize( + common.TempFileMixin, common.PyTablesTestCase +): + open_mode = "w" + + def test01(self): + np.random.seed(2) + n = 700_000 + cs = 50_000 + nchunks = n // cs + + arr = np.zeros( + (n,), dtype=[("index", "i8"), ("o", "i8"), ("value", "f8")] + ) + arr["index"] = np.arange(n) + arr["o"] = np.random.randint(-20_000, -15_000, size=n) + arr["value"] = np.random.randn(n) + + node = self.h5file.create_group("/", "foo") + table = self.h5file.create_table( + node, + "table", + dict( + index=tb.Int64Col(), + o=tb.Int64Col(), + value=tb.FloatCol(shape=()), + ), + expectedrows=10_000_000, + ) + + table.append(arr) + + self._reopen("a") + + v1 = np.unique(arr["o"])[0] + v2 = np.unique(arr["o"])[1] + res = np.array([v1, v2]) + selector = f"((o == {v1}) | (o == {v2}))" + if common.verbose: + print("selecting values: %s" % selector) + + table = self.h5file.root.foo.table + + result = np.unique(table.read_where(selector)["o"]) + np.testing.assert_almost_equal(result, res) + if common.verbose: + print("select entire table:") + print(f"result: {result}\texpected: {res}") + + if common.verbose: + print("index the column o") + table.cols.o.create_index() # this was triggering the issue + + if common.verbose: + print("select via chunks") + for i in range(nchunks): + result = table.read_where( + selector, start=i * cs, stop=(i + 1) * cs + ) + result = np.unique(result["o"]) + np.testing.assert_almost_equal(np.unique(result), res) + if common.verbose: + print(f"result: {result}\texpected: {res}") + + +# Test case for issue #441 +class SideEffectNumPyQuicksort(common.PyTablesTestCase): + + def test01(self): + bug_file = common.test_filename("bug-idx.h5") + tmp_file = tempfile.mktemp(".h5") + tb.copy_file(bug_file, tmp_file) + h5 = tb.open_file(tmp_file, "a") + o = h5.root.table + vals = o.cols.path[:] + npvals = set(np.where(vals == 6)[0]) + + # Setting the chunkshape is critical for reproducing the bug + t = o.copy(newname="table2", chunkshape=2730) + t.cols.path.create_index() + indexed = {r.nrow for r in t.where("path == 6")} + + if common.verbose: + diffs = sorted(npvals - indexed) + print("ndiff:", len(diffs), diffs) + self.assertEqual(len(npvals), len(indexed)) + + h5.close() + if Path(tmp_file).is_file(): + Path(tmp_file).unlink() + + +# ----------------------------- + + +def suite(): + theSuite = common.unittest.TestSuite() + + niter = 1 + + for n in range(niter): + for cdata in iclassdata(): + class_ = eval(cdata[0]) + if not class_.heavy: + suite_ = common.make_suite(class_) + theSuite.addTest(suite_) + elif common.heavy: + suite_ = common.make_suite(class_) + theSuite.addTest(suite_) + theSuite.addTest(common.make_suite(LastRowReuseBuffers)) + theSuite.addTest(common.make_suite(BuffersizeMultipleChunksize)) + theSuite.addTest(common.make_suite(SideEffectNumPyQuicksort)) + return theSuite + + +if __name__ == "__main__": + import sys + + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_large_tables.py b/venv/Lib/site-packages/tables/tests/test_large_tables.py new file mode 100644 index 0000000..9b8c6c4 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_large_tables.py @@ -0,0 +1,110 @@ +import sys + +import numpy as np + +import tables as tb +from tables.tests import common + + +class LargeTable(tb.IsDescription): + time = tb.Int32Col() + + +class BasicTestCase(common.TempFileMixin, common.PyTablesTestCase): + # file = "test.h5" + open_mode = "w" + title = "This is the table title" + dim1, dim2, dim3 = 24, 721, 1440 + nrows = dim1 * dim2 * dim3 # rows for a day + chunkshape = nrows + complib = "blosc2" # default + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + self.populateFile() + self.h5file.close() + + def populateFile(self): + group = self.h5file.root + table = self.h5file.create_table( + group, + "table", + LargeTable, + "Large table", + tb.Filters(complevel=1, complib=self.complib), + chunkshape=self.chunkshape, + ) + + # Structured NumPy buffer for every day + self.day_block = day_block = np.empty(self.nrows, dtype=table.dtype) + day_block["time"] = np.arange(self.nrows) + + # Append groups of rows ("days") so that we have more than 2**31 + # (see https://github.com/PyTables/PyTables/issues/995) + self.ndays = ndays = 90 + self.assertTrue(ndays * self.nrows > 2**31) + if common.verbose: + print(f"Writing {ndays} days...") + for day in range(ndays): + table.append(day_block) + table.flush() + + def test00_values(self): + """Check that written values are correct.""" + + self.h5file = tb.open_file(self.h5fname) + table = self.h5file.root.table + nrows = self.nrows + day_block = self.day_block + if common.verbose: + print(f"Checking {self.ndays} days...") + for nday in range(self.ndays): + day_block2 = table[nday * nrows : (nday + 1) * nrows] + self.assertEqual( + np.sum(day_block2["time"] == day_block["time"]), + nrows, + f"Values differ in day {nday}", + ) + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscTestCase(BasicTestCase): + title = "Blosc table" + complib = "blosc" + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class Blosc2TestCase(BasicTestCase): + title = "Blosc2 table" + complib = "blosc2" + + +class ZlibTestCase(BasicTestCase): + title = "Zlib table" + complib = "zlib" + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + # common.heavy = 1 # Uncomment this only for testing purposes + + for n in range(niter): + theSuite.addTest(common.make_suite(BloscTestCase)) + theSuite.addTest(common.make_suite(Blosc2TestCase)) + if common.heavy: + theSuite.addTest(common.make_suite(ZlibTestCase)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_links.py b/venv/Lib/site-packages/tables/tests/test_links.py new file mode 100644 index 0000000..ed1f977 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_links.py @@ -0,0 +1,654 @@ +"""Test module for different kind of links under PyTables.""" + +import re +import tempfile +from pathlib import Path + +import tables as tb +from tables.tests import common + + +# Test for hard links +class HardLinkTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + self._createFile() + + def _createFile(self): + self.h5file.create_array("/", "arr1", [1, 2]) + group1 = self.h5file.create_group("/", "group1") + arr2 = self.h5file.create_array(group1, "arr2", [1, 2, 3]) + lgroup1 = self.h5file.create_hard_link("/", "lgroup1", "/group1") + self.assertIsNotNone(lgroup1) + larr1 = self.h5file.create_hard_link(group1, "larr1", "/arr1") + self.assertIsNotNone(larr1) + larr2 = self.h5file.create_hard_link("/", "larr2", arr2) + self.assertIsNotNone(larr2) + + def test00_create(self): + """Creating hard links.""" + + self._checkEqualityGroup( + self.h5file.root.group1, self.h5file.root.lgroup1, hardlink=True + ) + self._checkEqualityLeaf( + self.h5file.root.arr1, self.h5file.root.group1.larr1, hardlink=True + ) + self._checkEqualityLeaf( + self.h5file.root.lgroup1.arr2, + self.h5file.root.larr2, + hardlink=True, + ) + + def test01_open(self): + """Opening a file with hard links.""" + + self._reopen() + self._checkEqualityGroup( + self.h5file.root.group1, self.h5file.root.lgroup1, hardlink=True + ) + self._checkEqualityLeaf( + self.h5file.root.arr1, self.h5file.root.group1.larr1, hardlink=True + ) + self._checkEqualityLeaf( + self.h5file.root.lgroup1.arr2, + self.h5file.root.larr2, + hardlink=True, + ) + + def test02_removeLeaf(self): + """Removing a hard link to a Leaf.""" + + # First delete the initial link + self.h5file.root.arr1.remove() + self.assertNotIn("/arr1", self.h5file) + # The second link should still be there + if common.verbose: + print("Remaining link:", self.h5file.root.group1.larr1) + self.assertIn("/group1/larr1", self.h5file) + # Remove the second link + self.h5file.root.group1.larr1.remove() + self.assertNotIn("/group1/larr1", self.h5file) + + def test03_removeGroup(self): + """Removing a hard link to a Group.""" + + if common.verbose: + print("Original object tree:", self.h5file) + # First delete the initial link + self.h5file.root.group1._f_remove(force=True) + self.assertNotIn("/group1", self.h5file) + # The second link should still be there + if common.verbose: + print("Remaining link:", self.h5file.root.lgroup1) + print("Object tree:", self.h5file) + self.assertIn("/lgroup1", self.h5file) + # Remove the second link + self.h5file.root.lgroup1._g_remove(recursive=True) + self.assertNotIn("/lgroup1", self.h5file) + if common.verbose: + print("Final object tree:", self.h5file) + + +# Test for soft links +class SoftLinkTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + self._createFile() + + def _createFile(self): + self.h5file.create_array("/", "arr1", [1, 2]) + group1 = self.h5file.create_group("/", "group1") + arr2 = self.h5file.create_array(group1, "arr2", [1, 2, 3]) + lgroup1 = self.h5file.create_soft_link("/", "lgroup1", "/group1") + self.assertIsNotNone(lgroup1) + larr1 = self.h5file.create_soft_link(group1, "larr1", "/arr1") + self.assertIsNotNone(larr1) + larr2 = self.h5file.create_soft_link("/", "larr2", arr2) + self.assertIsNotNone(larr2) + + def test00_create(self): + """Creating soft links.""" + + self._checkEqualityGroup( + self.h5file.root.group1, self.h5file.root.lgroup1() + ) + self._checkEqualityLeaf( + self.h5file.root.arr1, self.h5file.root.group1.larr1() + ) + self._checkEqualityLeaf( + self.h5file.root.lgroup1().arr2, self.h5file.root.larr2() + ) + + def test01_open(self): + """Opening a file with soft links.""" + + self._reopen() + self._checkEqualityGroup( + self.h5file.root.group1, self.h5file.root.lgroup1() + ) + self._checkEqualityLeaf( + self.h5file.root.arr1, self.h5file.root.group1.larr1() + ) + self._checkEqualityLeaf( + self.h5file.root.lgroup1().arr2, self.h5file.root.larr2() + ) + + def test02_remove(self): + """Removing a soft link.""" + + # First delete the referred link + self.h5file.root.arr1.remove() + self.assertNotIn("/arr1", self.h5file) + # The soft link should still be there (but dangling) + if common.verbose: + print("Dangling link:", self.h5file.root.group1.larr1) + self.assertIn("/group1/larr1", self.h5file) + # Remove the soft link itself + self.h5file.root.group1.larr1.remove() + self.assertNotIn("/group1/larr1", self.h5file) + + def test03_copy(self): + """Copying a soft link.""" + + # Copy the link into another location + root = self.h5file.root + lgroup1 = root.lgroup1 + lgroup2 = lgroup1.copy("/", "lgroup2") + self.assertIn("/lgroup1", self.h5file) + self.assertIn("/lgroup2", self.h5file) + self.assertIn("lgroup2", root._v_children) + self.assertIn("lgroup2", root._v_links) + if common.verbose: + print("Copied link:", lgroup2) + # Remove the first link + lgroup1.remove() + self._checkEqualityGroup( + self.h5file.root.group1, self.h5file.root.lgroup2() + ) + + def test03_overwrite(self): + """Overwrite a soft link.""" + + # Copy the link into another location + root = self.h5file.root + lgroup1 = root.lgroup1 + lgroup2 = lgroup1.copy("/", "lgroup2") + lgroup2 = lgroup1.copy("/", "lgroup2", overwrite=True) + self.assertIn("/lgroup1", self.h5file) + self.assertIn("/lgroup2", self.h5file) + self.assertIn("lgroup2", root._v_children) + self.assertIn("lgroup2", root._v_links) + if common.verbose: + print("Copied link:", lgroup2) + # Remove the first link + lgroup1.remove() + self._checkEqualityGroup( + self.h5file.root.group1, self.h5file.root.lgroup2() + ) + + def test04_move(self): + """Moving a soft link.""" + + # Move the link into another location + lgroup1 = self.h5file.root.lgroup1 + group2 = self.h5file.create_group("/", "group2") + lgroup1.move(group2, "lgroup2") + lgroup2 = self.h5file.root.group2.lgroup2 + if common.verbose: + print("Moved link:", lgroup2) + self.assertNotIn("/lgroup1", self.h5file) + self.assertIn("/group2/lgroup2", self.h5file) + self._checkEqualityGroup( + self.h5file.root.group1, self.h5file.root.group2.lgroup2() + ) + + def test05_rename(self): + """Renaming a soft link.""" + + # Rename the link + lgroup1 = self.h5file.root.lgroup1 + lgroup1.rename("lgroup2") + lgroup2 = self.h5file.root.lgroup2 + if common.verbose: + print("Moved link:", lgroup2) + self.assertNotIn("/lgroup1", self.h5file) + self.assertIn("/lgroup2", self.h5file) + self._checkEqualityGroup( + self.h5file.root.group1, self.h5file.root.lgroup2() + ) + + def test06a_relative_path(self): + """Using soft links with relative paths.""" + + # Create new group + self.h5file.create_group("/group1", "group3") + # ... and relative link + lgroup3 = self.h5file.create_soft_link("/group1", "lgroup3", "group3") + if common.verbose: + print("Relative path link:", lgroup3) + self.assertIn("/group1/lgroup3", self.h5file) + self._checkEqualityGroup( + self.h5file.root.group1.group3, self.h5file.root.group1.lgroup3() + ) + + def test06b_relative_path(self): + """Using soft links with relative paths (./ version)""" + + # Create new group + self.h5file.create_group("/group1", "group3") + # ... and relative link + lgroup3 = self.h5file.create_soft_link( + "/group1", "lgroup3", "./group3" + ) + if common.verbose: + print("Relative path link:", lgroup3) + self.assertIn("/group1/lgroup3", self.h5file) + self._checkEqualityGroup( + self.h5file.root.group1.group3, self.h5file.root.group1.lgroup3() + ) + + def test07_walkNodes(self): + """Checking `walk_nodes` with `classname` option.""" + + links = [ + node._v_pathname + for node in self.h5file.walk_nodes("/", classname="Link") + ] + if common.verbose: + print("detected links (classname='Link'):", links) + self.assertEqual(links, ["/larr2", "/lgroup1", "/group1/larr1"]) + links = [ + node._v_pathname + for node in self.h5file.walk_nodes("/", classname="SoftLink") + ] + if common.verbose: + print("detected links (classname='SoftLink'):", links) + self.assertEqual(links, ["/larr2", "/lgroup1", "/group1/larr1"]) + + def test08__v_links(self): + """Checking `Group._v_links`.""" + + links = [node for node in self.h5file.root._v_links] + if common.verbose: + print("detected links (under root):", links) + self.assertEqual(len(links), 2) + links = [node for node in self.h5file.root.group1._v_links] + if common.verbose: + print("detected links (under /group1):", links) + self.assertEqual(links, ["larr1"]) + + def test09_link_to_link(self): + """Checking linked links.""" + + # Create a link to another existing link + lgroup2 = self.h5file.create_soft_link("/", "lgroup2", "/lgroup1") + # Dereference it once: + self.assertIs(lgroup2(), self.h5file.get_node("/lgroup1")) + if common.verbose: + print("First dereference is correct:", lgroup2()) + # Dereference it twice: + self.assertIs(lgroup2()(), self.h5file.get_node("/group1")) + if common.verbose: + print("Second dereference is correct:", lgroup2()()) + + def test10_copy_link_to_file(self): + """Checking copying a link to another file.""" + + fname = tempfile.mktemp(".h5") + h5f = tb.open_file(fname, "a") + h5f.create_array("/", "arr1", [1, 2]) + h5f.create_group("/", "group1") + lgroup1 = self.h5file.root.lgroup1 + lgroup1_ = lgroup1.copy(h5f.root, "lgroup1") + self.assertIn("/lgroup1", self.h5file) + self.assertIn("/lgroup1", h5f) + self.assertIn(lgroup1_, h5f) + if common.verbose: + print("Copied link:", lgroup1_, "in:", lgroup1_._v_file.filename) + h5f.close() + Path(fname).unlink() + + def test11_direct_attribute_access(self): + """Check direct get/set attributes via link-->target.attribute""" + + larr1 = self.h5file.get_node("/lgroup1/larr1") + arr1 = self.h5file.get_node("/arr1") + # get + self.assertEqual(larr1.shape, (2,)) + self.assertEqual(larr1[:], [1, 2]) + # set + larr1[0] = -1 + self.assertEqual(arr1[:], [-1, 2]) + + def test12_access_child_node_attributes(self): + """Check get/set attributes via link-->target.child.attribute""" + + lgroup1 = self.h5file.get_node("/lgroup1") + arr2 = self.h5file.get_node("/group1/arr2") + # get child attribute + self.assertEqual(lgroup1.arr2[:], [1, 2, 3]) + # set child attribute + lgroup1.arr2[0] = -1 + self.assertEqual(arr2[:], [-1, 2, 3]) + + def test13_direct_attribute_access_via_chained_softlinks(self): + """Check get/set access via link2-->link1-->target.child.attribute""" + + self.h5file.get_node("/lgroup1") + arr2 = self.h5file.get_node("/group1/arr2") + # multiple chained links + l_lgroup1 = self.h5file.create_soft_link("/", "l_lgroup1", "/lgroup1") + # get child attribute + self.assertEqual(l_lgroup1.arr2[:], [1, 2, 3]) + # set child attribute + l_lgroup1.arr2[0] = -1 + self.assertEqual(arr2[:], [-1, 2, 3]) + + def test14_child_of_softlink_to_group(self): + """Create an array whose parent is a softlink to another group""" + + self.h5file.get_node("/group1") + lgroup1 = self.h5file.get_node("/lgroup1") + self.h5file.create_array(lgroup1, "new_arr", obj=[1, 2, 3]) + new_arr2 = self.h5file.get_node("/group1/new_arr") + self.assertEqual(new_arr2[:], [1, 2, 3]) + + def test_str(self): + s = str(self.h5file) + self.assertEqual(len(re.findall(r"\(SoftLink\)", s)), 3) + self.assertEqual(len(re.findall(r"\(dangling\)", s)), 0) + + def test_str_with_dangling_link(self): + self.h5file.root.group1.arr2.remove() + s = str(self.h5file) + self.assertEqual(len(re.findall(r"\(SoftLink\)", s)), 3) + self.assertEqual(len(re.findall(r"\(dangling\)", s)), 1) + + +# Test for external links +@common.unittest.skipIf( + tb.file._FILE_OPEN_POLICY == "strict", 'FILE_OPEN_POLICY = "strict"' +) +class ExternalLinkTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + + self.extfname = tempfile.mktemp(".h5") + self.exth5file = tb.open_file(self.extfname, "w") + self._createFile() + + def tearDown(self): + """Remove ``extfname``.""" + + extfname = self.extfname + self.exth5file.close() + super().tearDown() + + # open_files = tables.file._open_files + # if self.extfname in open_files: + # #assert False + # for handler in open_files.get_handlers_by_name(self.extfname): + # handler.close() + + Path(extfname).unlink() # comment this for debugging purposes only + + def _createFile(self): + self.h5file.create_array("/", "arr1", [1, 2]) + group1 = self.h5file.create_group("/", "group1") + self.h5file.create_array(group1, "arr2", [1, 2, 3]) + + # The external file + extarr1 = self.exth5file.create_array("/", "arr1", [1, 2]) + self.assertIsNotNone(extarr1) + extgroup1 = self.exth5file.create_group("/", "group1") + extarr2 = self.exth5file.create_array(extgroup1, "arr2", [1, 2, 3]) + + # Create external links + lgroup1 = self.h5file.create_external_link( + "/", "lgroup1", "%s:/group1" % self.extfname + ) + self.assertIsNotNone(lgroup1) + larr1 = self.h5file.create_external_link( + group1, "larr1", "%s:/arr1" % self.extfname + ) + self.assertIsNotNone(larr1) + larr2 = self.h5file.create_external_link("/", "larr2", extarr2) + self.assertIsNotNone(larr2) + + # Re-open the external file in 'r'ead-only mode + self.exth5file.close() + self.exth5file = tb.open_file(self.extfname, "r") + + def test00_create(self): + """Creating soft links.""" + + self._checkEqualityGroup( + self.exth5file.root.group1, self.h5file.root.lgroup1() + ) + self._checkEqualityLeaf( + self.exth5file.root.arr1, self.h5file.root.group1.larr1() + ) + self._checkEqualityLeaf( + self.h5file.root.lgroup1().arr2, self.h5file.root.larr2() + ) + + def test01_open(self): + """Opening a file with soft links.""" + + self._reopen() + self._checkEqualityGroup( + self.exth5file.root.group1, self.h5file.root.lgroup1() + ) + self._checkEqualityLeaf( + self.exth5file.root.arr1, self.h5file.root.group1.larr1() + ) + self._checkEqualityLeaf( + self.h5file.root.lgroup1().arr2, self.h5file.root.larr2() + ) + + def test02_remove(self): + """Removing an external link.""" + + # Re-open the external file in 'a'ppend mode + self.exth5file.close() + self.exth5file = tb.open_file(self.extfname, "a") + + # First delete the referred link + self.exth5file.root.arr1.remove() + self.assertNotIn("/arr1", self.exth5file) + + # The external link should still be there (but dangling) + if common.verbose: + print("Dangling link:", self.h5file.root.group1.larr1) + self.assertIn("/group1/larr1", self.h5file) + + # Remove the external link itself + self.h5file.root.group1.larr1.remove() + self.assertNotIn("/group1/larr1", self.h5file) + + def test03_copy(self): + """Copying an external link.""" + + # Copy the link into another location + root = self.h5file.root + lgroup1 = root.lgroup1 + lgroup2 = lgroup1.copy("/", "lgroup2") + self.assertIn("/lgroup1", self.h5file) + self.assertIn("/lgroup2", self.h5file) + self.assertIn("lgroup2", root._v_children) + self.assertIn("lgroup2", root._v_links) + if common.verbose: + print("Copied link:", lgroup2) + + # Remove the first link + lgroup1.remove() + self._checkEqualityGroup( + self.exth5file.root.group1, self.h5file.root.lgroup2() + ) + + def test03_overwrite(self): + """Overwrite an external link.""" + + # Copy the link into another location + root = self.h5file.root + lgroup1 = root.lgroup1 + lgroup2 = lgroup1.copy("/", "lgroup2") + lgroup2 = lgroup1.copy("/", "lgroup2", overwrite=True) + self.assertIn("/lgroup1", self.h5file) + self.assertIn("/lgroup2", self.h5file) + self.assertIn("lgroup2", root._v_children) + self.assertIn("lgroup2", root._v_links) + if common.verbose: + print("Copied link:", lgroup2) + + # Remove the first link + lgroup1.remove() + self._checkEqualityGroup( + self.exth5file.root.group1, self.h5file.root.lgroup2() + ) + + def test04_move(self): + """Moving an external link.""" + + # Move the link into another location + lgroup1 = self.h5file.root.lgroup1 + group2 = self.h5file.create_group("/", "group2") + lgroup1.move(group2, "lgroup2") + lgroup2 = self.h5file.root.group2.lgroup2 + if common.verbose: + print("Moved link:", lgroup2) + self.assertNotIn("/lgroup1", self.h5file) + self.assertIn("/group2/lgroup2", self.h5file) + self._checkEqualityGroup( + self.exth5file.root.group1, self.h5file.root.group2.lgroup2() + ) + + def test05_rename(self): + """Renaming an external link.""" + + # Rename the link + lgroup1 = self.h5file.root.lgroup1 + lgroup1.rename("lgroup2") + lgroup2 = self.h5file.root.lgroup2 + if common.verbose: + print("Moved link:", lgroup2) + self.assertNotIn("/lgroup1", self.h5file) + self.assertIn("/lgroup2", self.h5file) + self._checkEqualityGroup( + self.exth5file.root.group1, self.h5file.root.lgroup2() + ) + + def test07_walkNodes(self): + """Checking `walk_nodes` with `classname` option.""" + + # Create a new soft link + self.h5file.create_soft_link("/group1", "lgroup3", "./group3") + links = [ + node._v_pathname + for node in self.h5file.walk_nodes("/", classname="Link") + ] + if common.verbose: + print("detected links (classname='Link'):", links) + self.assertEqual( + links, ["/larr2", "/lgroup1", "/group1/larr1", "/group1/lgroup3"] + ) + links = [ + node._v_pathname + for node in self.h5file.walk_nodes("/", classname="ExternalLink") + ] + if common.verbose: + print("detected links (classname='ExternalLink'):", links) + self.assertEqual(links, ["/larr2", "/lgroup1", "/group1/larr1"]) + + def test08__v_links(self): + """Checking `Group._v_links`.""" + + links = [node for node in self.h5file.root._v_links] + if common.verbose: + print("detected links (under root):", links) + self.assertEqual(len(links), 2) + links = [node for node in self.h5file.root.group1._v_links] + if common.verbose: + print("detected links (under /group1):", links) + self.assertEqual(links, ["larr1"]) + + def test09_umount(self): + """Checking `umount()` method.""" + + link = self.h5file.root.lgroup1 + self.assertIsNone(link.extfile) + + # Dereference an external node (and hence, 'mount' a file) + enode = link() + self.assertIsNotNone(enode) + self.assertIsNotNone(link.extfile) + + # Umount the link + link.umount() + self.assertIsNone(link.extfile) + + def test10_copy_link_to_file(self): + """Checking copying a link to another file.""" + + h5fname2 = tempfile.mktemp(".h5") + try: + with tb.open_file(h5fname2, "a") as h5file2: + h5file2.create_array("/", "arr1", [1, 2]) + h5file2.create_group("/", "group1") + lgroup1 = self.h5file.root.lgroup1 + lgroup1_ = lgroup1.copy(h5file2.root, "lgroup1") + self.assertIn("/lgroup1", self.h5file) + self.assertIn("/lgroup1", h5file2) + self.assertIn(lgroup1_, h5file2) + if common.verbose: + print( + "Copied link:", + lgroup1_, + "in:", + lgroup1_._v_file.filename, + ) + finally: + if Path(h5fname2).is_file(): + Path(h5fname2).unlink() + + def test11_copy_entire_file_with_hardlink_option(self): + """Checking copying the entire file (that contains external links) + in a similar way ptrepack does (with hardlink kwargs activated)""" + + h5fname2 = tempfile.mktemp(".h5") + try: + with tb.open_file(h5fname2, "a") as h5file2: + self.h5file.root._f_copy_children( + h5file2.root, recursive=True, use_hardlinks=True + ) + self.assertIn("/lgroup1", h5file2) + finally: + if Path(h5fname2).is_file(): + Path(h5fname2).unlink() + + +def suite(): + """Return a test suite consisting of all the test cases in the module.""" + + theSuite = common.unittest.TestSuite() + niter = 1 + # common.heavy = 1 # uncomment this only for testing purposes + + for i in range(niter): + theSuite.addTest(common.make_suite(HardLinkTestCase)) + theSuite.addTest(common.make_suite(SoftLinkTestCase)) + theSuite.addTest(common.make_suite(ExternalLinkTestCase)) + + return theSuite + + +if __name__ == "__main__": + import sys + + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_lists.py b/venv/Lib/site-packages/tables/tests/test_lists.py new file mode 100644 index 0000000..1d6748c --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_lists.py @@ -0,0 +1,475 @@ +import tempfile +from pathlib import Path + +import tables as tb +from tables.tests import common + + +def WriteRead(filename, testTuple): + if common.verbose: + print("\n", "-=" * 30) + print("Running test for object %s" % type(testTuple)) + + # Create an instance of HDF5 Table + fileh = tb.open_file(filename, mode="w") + root = fileh.root + try: + # Create the array under root and name 'somearray' + a = testTuple + fileh.create_array(root, "somearray", a, "Some array") + finally: + # Close the file + fileh.close() + + # Re-open the file in read-only mode + fileh = tb.open_file(filename, mode="r") + root = fileh.root + + # Read the saved array + try: + b = root.somearray.read() + # Compare them. They should be equal. + if not a == b and common.verbose: + print("Write and read lists/tuples differ!") + print("Object written:", a) + print("Object read:", b) + + # Check strictly the array equality + assert a == b + finally: + # Close the file + fileh.close() + + +class BasicTestCase(common.PyTablesTestCase): + def setUp(self): + super().setUp() + self.h5fname = tempfile.mktemp(".h5") + self.h5file = None + + def tearDown(self): + if self.h5file is not None: + self.h5file.close() + if Path(self.h5fname).is_file(): + Path(self.h5fname).unlink() + super().tearDown() + + def test00_char(self): + """Data integrity during recovery (character types)""" + + a = self.charList + WriteRead(self.h5fname, a) + + def test01_types(self): + """Data integrity during recovery (numerical types)""" + + a = self.numericalList + WriteRead(self.h5fname, a) + + +class Basic0DOneTestCase(BasicTestCase): + # Scalar case + title = "Rank-0 case 1" + numericalList = 3 + charList = b"3" + + +class Basic0DTwoTestCase(BasicTestCase): + # Scalar case + title = "Rank-0 case 2" + numericalList = 33.34 + charList = b"33" * 500 + + +# This does not work anymore because I've split the chunked arrays to happen +# mainly in EArray objects +# class Basic1DZeroTestCase(BasicTestCase): +# title = "Rank-1 case 0" +# numericalList = [] +# charList = [] + + +class Basic1DOneTestCase(BasicTestCase): + # 1D case + title = "Rank-1 case 1" + numericalList = [3] + charList = [b"a"] + + +class Basic1DTwoTestCase(BasicTestCase): + # 1D case + title = "Rank-1 case 2" + numericalList = [3.2, 4.2] + charList = [b"aaa"] + + +class Basic2DTestCase(BasicTestCase): + # 2D case + title = "Rank-2 case 1" + numericalList = [[1, 2]] * 5 + charList = [[b"qq", b"zz"]] * 5 + + +class Basic10DTestCase(BasicTestCase): + # 10D case + title = "Rank-10 case 1" + numericalList = [[[[[[[[[[1, 2], [3, 4]]]]]]]]]] * 5 + # Dimensions greater than 6 in strings gives some warnings + charList = [[[[[[[[[[b"a", b"b"], [b"qq", b"zz"]]]]]]]]]] * 5 + + +class ExceptionTestCase(common.PyTablesTestCase): + def setUp(self): + super().setUp() + self.h5fname = tempfile.mktemp(".h5") + self.h5file = None + + def tearDown(self): + if self.h5file is not None: + self.h5file.close() + if Path(self.h5fname).is_file(): + Path(self.h5fname).unlink() + super().tearDown() + + def test00_char(self): + """Non supported lists objects (character objects)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running test for %s" % (self.title)) + a = self.charList + with self.assertRaises((ValueError, TypeError)): + WriteRead(self.h5fname, a) + + def test01_types(self): + """Non supported lists object (numerical types)""" + + a = self.numericalList + with self.assertRaises((ValueError, TypeError)): + WriteRead(self.h5fname, a) + + +class Basic1DFourTestCase(ExceptionTestCase): + title = "Rank-1 case 4 (non-regular list)" + numericalList = [3, [4, 5.2]] + charList = [b"aaa", [b"bbb", b"ccc"]] + + +class GetItemTestCase(common.TempFileMixin, common.PyTablesTestCase): + def test00_single(self): + """Single element access (character types)""" + + # Create the array under root and name 'somearray' + a = self.charList + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + if common.verbose: + print("Original first element:", a[0]) + print("Read first element:", arr[0]) + self.assertEqual(a[0], arr[0]) + + def test01_single(self): + """Single element access (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalList + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + if common.verbose: + print("Original first element:", a[0]) + print("Read first element:", arr[0]) + self.assertEqual(a[0], arr[0]) + + def test02_range(self): + """Range element access (character types)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + if common.verbose: + print("Original elements:", a[1:4]) + print("Read elements:", arr[1:4]) + self.assertEqual(a[1:4], arr[1:4]) + + def test03_range(self): + """Range element access (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + if common.verbose: + print("Original elements:", a[1:4]) + print("Read elements:", arr[1:4]) + self.assertEqual(a[1:4], arr[1:4]) + + def test04_range(self): + """Range element access, strided (character types)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + if common.verbose: + print("Original elements:", a[1:4:2]) + print("Read elements:", arr[1:4:2]) + self.assertEqual(a[1:4:2], arr[1:4:2]) + + def test05_range(self): + """Range element access (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + if common.verbose: + print("Original elements:", a[1:4:2]) + print("Read elements:", arr[1:4:2]) + self.assertEqual(a[1:4:2], arr[1:4:2]) + + def test06_negativeIndex(self): + """Negative Index element access (character types)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + if common.verbose: + print("Original last element:", a[-1]) + print("Read last element:", arr[-1]) + self.assertEqual(a[-1], arr[-1]) + + def test07_negativeIndex(self): + """Negative Index element access (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + if common.verbose: + print("Original before last element:", a[-2]) + print("Read before last element:", arr[-2]) + self.assertEqual(a[-2], arr[-2]) + + def test08_negativeRange(self): + """Negative range element access (character types)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + if common.verbose: + print("Original last elements:", a[-4:-1]) + print("Read last elements:", arr[-4:-1]) + self.assertEqual(a[-4:-1], arr[-4:-1]) + + def test09_negativeRange(self): + """Negative range element access (numerical types)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + if common.verbose: + print("Original last elements:", a[-4:-1]) + print("Read last elements:", arr[-4:-1]) + self.assertEqual(a[-4:-1], arr[-4:-1]) + + +class GI1ListTestCase(GetItemTestCase): + title = "Rank-1 case 1 (lists)" + numericalList = [3] + numericalListME = [3, 2, 1, 0, 4, 5, 6] + charList = [b"3"] + charListME = [b"321", b"221", b"121", b"021", b"421", b"521", b"621"] + + +class GI2ListTestCase(GetItemTestCase): + # A more complex example + title = "Rank-1,2 case 2 (lists)" + numericalList = [3, 4] + numericalListME = [ + [3, 2, 1, 0, 4, 5, 6], + [2, 1, 0, 4, 5, 6, 7], + [4, 3, 2, 1, 0, 4, 5], + [3, 2, 1, 0, 4, 5, 6], + [3, 2, 1, 0, 4, 5, 6], + ] + + charList = [b"a", b"b"] + charListME = [ + [b"321", b"221", b"121", b"021", b"421", b"521", b"621"], + [b"21", b"21", b"11", b"02", b"42", b"21", b"61"], + [b"31", b"21", b"12", b"21", b"41", b"51", b"621"], + [b"321", b"221", b"121", b"021", b"421", b"521", b"621"], + [b"3241", b"2321", b"13216", b"0621", b"4421", b"5421", b"a621"], + [b"a321", b"s221", b"d121", b"g021", b"b421", b"5vvv21", b"6zxzxs21"], + ] + + +class GeneratorTestCase(common.TempFileMixin, common.PyTablesTestCase): + def test00a_single(self): + """Testing generator access to Arrays, single elements (char)""" + + # Create the array under root and name 'somearray' + a = self.charList + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + ga = [i for i in a] + garr = [i for i in arr] + if common.verbose: + print("Result of original iterator:", ga) + print("Result of read generator:", garr) + self.assertEqual(ga, garr) + + def test00b_me(self): + """Testing generator access to Arrays, multiple elements (char)""" + + # Create the array under root and name 'somearray' + a = self.charListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + if isinstance(a[0], tuple): + ga = [list(i) for i in a] + else: + ga = [i for i in a] + garr = [i for i in arr] + if common.verbose: + print("Result of original iterator:", ga) + print("Result of read generator:", garr) + self.assertEqual(ga, garr) + + def test01a_single(self): + """Testing generator access to Arrays, single elements (numeric)""" + + # Create the array under root and name 'somearray' + a = self.numericalList + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + ga = [i for i in a] + garr = [i for i in arr] + if common.verbose: + print("Result of original iterator:", ga) + print("Result of read generator:", garr) + self.assertEqual(ga, garr) + + def test01b_me(self): + """Testing generator access to Arrays, multiple elements (numeric)""" + + # Create the array under root and name 'somearray' + a = self.numericalListME + arr = self.h5file.create_array( + self.h5file.root, "somearray", a, "Some array" + ) + + # Get and compare an element + if isinstance(a[0], tuple): + ga = [list(i) for i in a] + else: + ga = [i for i in a] + garr = [i for i in arr] + if common.verbose: + print("Result of original iterator:", ga) + print("Result of read generator:", garr) + self.assertEqual(ga, garr) + + +class GE1ListTestCase(GeneratorTestCase): + # Scalar case + title = "Rank-1 case 1 (lists)" + numericalList = [3] + numericalListME = [3, 2, 1, 0, 4, 5, 6] + charList = [b"3"] + charListME = [b"321", b"221", b"121", b"021", b"421", b"521", b"621"] + + +class GE2ListTestCase(GeneratorTestCase): + # Scalar case + title = "Rank-1,2 case 2 (lists)" + numericalList = [3, 4] + numericalListME = [ + [3, 2, 1, 0, 4, 5, 6], + [2, 1, 0, 4, 5, 6, 7], + [4, 3, 2, 1, 0, 4, 5], + [3, 2, 1, 0, 4, 5, 6], + [3, 2, 1, 0, 4, 5, 6], + ] + + charList = [b"a", b"b"] + charListME = [ + [b"321", b"221", b"121", b"021", b"421", b"521", b"621"], + [b"21", b"21", b"11", b"02", b"42", b"21", b"61"], + [b"31", b"21", b"12", b"21", b"41", b"51", b"621"], + [b"321", b"221", b"121", b"021", b"421", b"521", b"621"], + [b"3241", b"2321", b"13216", b"0621", b"4421", b"5421", b"a621"], + [b"a321", b"s221", b"d121", b"g021", b"b421", b"5vvv21", b"6zxzxs21"], + ] + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + + for i in range(niter): + theSuite.addTest(common.make_suite(Basic0DOneTestCase)) + theSuite.addTest(common.make_suite(Basic0DTwoTestCase)) + # theSuite.addTest(make_suite(Basic1DZeroTestCase)) + theSuite.addTest(common.make_suite(Basic1DOneTestCase)) + theSuite.addTest(common.make_suite(Basic1DTwoTestCase)) + theSuite.addTest(common.make_suite(Basic1DFourTestCase)) + theSuite.addTest(common.make_suite(Basic2DTestCase)) + theSuite.addTest(common.make_suite(Basic10DTestCase)) + theSuite.addTest(common.make_suite(GI1ListTestCase)) + theSuite.addTest(common.make_suite(GI2ListTestCase)) + theSuite.addTest(common.make_suite(GE1ListTestCase)) + theSuite.addTest(common.make_suite(GE2ListTestCase)) + + return theSuite + + +if __name__ == "__main__": + import sys + + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_nestedtypes.py b/venv/Lib/site-packages/tables/tests/test_nestedtypes.py new file mode 100644 index 0000000..97314cd --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_nestedtypes.py @@ -0,0 +1,1706 @@ +"""Test module for nested types under PyTables.""" + +import sys +import itertools + +import numpy as np + +import tables as tb +from tables.tests import common + +minRowIndex = 10 + + +# This is the structure of the table used for testing (DON'T PANIC!): +# +# +-+---------------------------------+-----+----------+-+-+ +# |x|Info |color|info |y|z| +# | +-----+--+----------------+----+--+ +----+-----+ | | +# | |value|y2|Info2 |name|z2| |Name|Value| | | +# | | | +----+-----+--+--+ | | | | | | | +# | | | |name|value|y3|z3| | | | | | | | +# +-+-----+--+----+-----+--+--+----+--+-----+----+-----+-+-+ +# +# Please note that some fields are explicitly ordered while others are +# ordered alphabetically by name. +# The declaration of the nested table: +class Info(tb.IsDescription): + _v_pos = 3 + Name = tb.StringCol(itemsize=2) + Value = tb.ComplexCol(itemsize=16) + + +class TestTDescr(tb.IsDescription): + """A description that has several nested columns.""" + + x = tb.Int32Col(dflt=0, shape=2, pos=0) # 0 + y = tb.Float64Col(dflt=1, shape=(2, 2)) + z = tb.UInt8Col(dflt=1) + color = tb.StringCol(itemsize=2, dflt=b" ", pos=2) + info = Info() + + class Info(tb.IsDescription): # 1 + _v_pos = 1 + name = tb.StringCol(itemsize=2) + value = tb.ComplexCol(itemsize=16, pos=0) # 0 + y2 = tb.Float64Col(dflt=1, pos=1) # 1 + z2 = tb.UInt8Col(dflt=1) + + class Info2(tb.IsDescription): + y3 = tb.Time64Col(dflt=1, shape=2) + z3 = tb.EnumCol({"r": 4, "g": 2, "b": 1}, "r", "int32", shape=2) + name = tb.StringCol(itemsize=2) + value = tb.ComplexCol(itemsize=16, shape=2) + + +# The corresponding nested array description: +testADescr = [ + ("x", "(2,)int32"), + ( + "Info", + [ + ("value", "complex128"), + ("y2", "float64"), + ( + "Info2", + [ + ("name", "S2"), + ("value", "(2,)complex128"), + ("y3", "(2,)float64"), + ("z3", "(2,)int32"), + ], + ), + ("name", "S2"), + ("z2", "uint8"), + ], + ), + ("color", "S2"), + ("info", [("Name", "S2"), ("Value", "complex128")]), + ("y", "(2,2)float64"), + ("z", "uint8"), +] + +# The corresponding nested array description (brief version): +testADescr2 = [ + ("x", "(2,)i4"), + ( + "Info", + [ + ("value", "()c16"), + ("y2", "()f8"), + ( + "Info2", + [ + ("name", "()S2"), + ("value", "(2,)c16"), + ("y3", "(2,)f8"), + ("z3", "(2,)i4"), + ], + ), + ("name", "()S2"), + ("z2", "()u1"), + ], + ), + ("color", "()S2"), + ("info", [("Name", "()S2"), ("Value", "()c16")]), + ("y", "(2, 2)f8"), + ("z", "()u1"), +] + +# A nested array for testing: +testABuffer = [ + # x Info color info y z + # value y2 Info2 name z2 Name Value + # name value y3 z3 + ( + (3, 2), + (6j, 6.0, ("nn", (6j, 4j), (6.0, 4.0), (1, 2)), "NN", 8), + "cc", + ("NN", 6j), + ((6.0, 4.0), (6.0, 4.0)), + 8, + ), + ( + (4, 3), + (7j, 7.0, ("oo", (7j, 5j), (7.0, 5.0), (2, 1)), "OO", 9), + "dd", + ("OO", 7j), + ((7.0, 5.0), (7.0, 5.0)), + 9, + ), +] +testAData = np.array(testABuffer, dtype=testADescr) +# The name of the column to be searched: +testCondCol = "Info/z2" +# The name of a nested column (it can not be searched): +testNestedCol = "Info" +# The condition to be applied on the column (all but the last row match it): +testCondition = "(2 < col) & (col < 9)" + + +def areDescriptionsEqual(desc1, desc2): + """Are both `desc1` and `desc2` equivalent descriptions? + + The arguments may be description objects (``IsDescription``, + ``Description``) or dictionaries. + + """ + + if isinstance(desc1, tb.Col): + # This is a rough comparison but it suffices here. + return ( + desc1.type == desc2.type + and desc2.dtype == desc2.dtype + and desc1._v_pos == desc2._v_pos + # and desc1.dflt == desc2.dflt) + and common.areArraysEqual(desc1.dflt, desc2.dflt) + ) + + if hasattr(desc1, "_v_colobjects"): # quacks like a Description + cols1 = desc1._v_colobjects + elif hasattr(desc1, "columns"): # quacks like an IsDescription + cols1 = desc1.columns + else: # hope it quacks like a dictionary + cols1 = desc1 + + if hasattr(desc2, "_v_colobjects"): # quacks like a Description + cols2 = desc2._v_colobjects + elif hasattr(desc2, "columns"): # quacks like an IsDescription + cols2 = desc2.columns + else: # hope it quacks like a dictionary + cols2 = desc2 + + if len(cols1) != len(cols2): + return False + + for colName, colobj1 in cols1.items(): + colobj2 = cols2[colName] + if colName == "_v_pos": + # The comparison may not be quite exhaustive! + return colobj1 == colobj2 + if not areDescriptionsEqual(colobj1, colobj2): + return False + + return True + + +# Test creating nested column descriptions +class DescriptionTestCase(common.PyTablesTestCase): + _TestTDescr = TestTDescr + _testADescr = testADescr + _testADescr2 = testADescr2 + _testAData = testAData + + def test00_instance(self): + """Creating an instance of a nested description.""" + + self.assertTrue( + areDescriptionsEqual(self._TestTDescr, self._TestTDescr()), + "Table description does not match the given one.", + ) + + def test01_instance(self): + """Checking attrs of an instance of a nested description.""" + + descr = tb.description.Description(self._TestTDescr().columns) + if common.verbose: + print("Generated description:", descr._v_nested_descr) + print("Should look like:", self._testADescr2) + self.assertEqual( + self._testADescr2, + descr._v_nested_descr, + "Description._v_nested_descr does not match.", + ) + + +# Test creating a nested table and opening it +class CreateTestCase(common.TempFileMixin, common.PyTablesTestCase): + _TestTDescr = TestTDescr + _testABuffer = testABuffer + _testAData = testAData + + def _checkColumns(self, cols, desc): + """Check that `cols` has all the accessors for `self._TestTDescr`.""" + + # ``_desc`` is a leaf column and ``cols`` a ``Column``. + if isinstance(desc, tb.Col): + return isinstance(cols, tb.Column) + + # ``_desc`` is a description object and ``cols`` a ``Cols``. + descColumns = desc._v_colobjects + for colName in descColumns: + if colName not in cols._v_colnames: + return False + if not self._checkColumns( + cols._f_col(colName), descColumns[colName] + ): + return False + + return True + + def _checkDescription(self, table): + """Check that description of `table` matches `self._TestTDescr`.""" + + # Compare descriptions. + self.assertTrue( + areDescriptionsEqual(self._TestTDescr, table.description), + "Table description does not match the given one.", + ) + # Check access to columns. + self._checkColumns(table.cols, table.description) + + def _checkColinstances(self, table): + """Check that ``colinstances`` and ``cols`` of `table` match.""" + for colpathname in table.description._v_pathnames: + self.assertTrue( + table.colinstances[colpathname] + is table.cols._f_col(colpathname) + ) + + def test00_create(self): + """Creating a nested table.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + self._checkDescription(tbl) + self._checkColinstances(tbl) + + def test01_open(self): + """Opening a nested table.""" + + self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + self._reopen() + tbl = self.h5file.root.test + self._checkDescription(tbl) + self._checkColinstances(tbl) + + def test02_NestedRecArrayCompat(self): + """Creating a compatible nested record array``.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + + nrarr = np.array(testABuffer, dtype=tbl.description._v_nested_descr) + self.assertTrue( + common.areArraysEqual(nrarr, self._testAData), + "Can not create a compatible structured array.", + ) + + def test03_NRA(self): + """Creating a table from a nested record array object.""" + + tbl = self.h5file.create_table( + "/", "test", self._testAData, title=self._getMethodName() + ) + tbl.flush() + readAData = tbl.read() + if common.verbose: + print("Read data:", readAData) + print("Should look like:", self._testAData) + self.assertTrue( + common.areArraysEqual(self._testAData, readAData), + "Written and read values differ.", + ) + + def test04_NRA2(self): + """Creating a table from a generated nested record array object.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + readAData = tbl.read() + + tbl2 = self.h5file.create_table( + "/", "test2", readAData, title=self._getMethodName() + ) + readAData2 = tbl2.read() + + self.assertTrue( + common.areArraysEqual(self._testAData, readAData2), + "Written and read values differ.", + ) + + +# Test writing data in a nested table +class WriteTestCase(common.TempFileMixin, common.PyTablesTestCase): + _TestTDescr = TestTDescr + _testAData = testAData + _testCondition = testCondition + _testCondCol = testCondCol + _testNestedCol = testNestedCol + + def _testCondVars(self, table): + """Get condition variables for the given `table`.""" + return {"col": table.cols._f_col(self._testCondCol)} + + def _testNestedCondVars(self, table): + """Get condition variables for the given `table`.""" + return {"col": table.cols._f_col(self._testNestedCol)} + + def _appendRow(self, row, index): + """ + Append the `index`-th row in `self._testAData` to `row`. + + Values are set field-by-field (be it nested or not). + """ + + record = self._testAData[index] + for fieldName in self._testAData.dtype.names: + row[fieldName] = record[fieldName] + row.append() + + def test00_append(self): + """Appending a set of rows.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + tbl.flush() + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + readAData = tbl.read() + self.assertTrue( + common.areArraysEqual(self._testAData, readAData), + "Written and read values differ.", + ) + + def test01_row(self): + """Appending individual rows.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + + row = tbl.row + # Add the first row + self._appendRow(row, 0) + # Add the rest of the rows field by field. + for i in range(1, len(self._testAData)): + self._appendRow(row, i) + tbl.flush() + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + readAData = tbl.read() + self.assertTrue( + common.areArraysEqual(self._testAData, readAData), + "Written and read values differ.", + ) + + def test02_where(self): + """Searching nested data.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + tbl.flush() + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + searchedCoords = tbl.get_where_list( + self._testCondition, self._testCondVars(tbl) + ) + + # All but the last row match the condition. + searchedCoords.sort() + self.assertEqual( + searchedCoords.tolist(), + list(range(len(self._testAData) - 1)), + "Search returned incorrect results.", + ) + + def test02b_whereAppend(self): + """Searching nested data and appending it to another table.""" + + tbl1 = self.h5file.create_table( + "/", "test1", self._TestTDescr, title=self._getMethodName() + ) + tbl1.append(self._testAData) + tbl1.flush() + + tbl2 = self.h5file.create_table( + "/", "test2", self._TestTDescr, title=self._getMethodName() + ) + tbl1.append_where(tbl2, self._testCondition, self._testCondVars(tbl1)) + + if self.reopen: + self._reopen() + tbl1 = self.h5file.root.test1 + tbl2 = self.h5file.root.test2 + + searchedCoords = tbl2.get_where_list( + self._testCondition, self._testCondVars(tbl2) + ) + + # All but the last row match the condition. + searchedCoords.sort() + self.assertEqual( + searchedCoords.tolist(), + list(range(len(self._testAData) - 1)), + "Search returned incorrect results.", + ) + + def test03_colscond(self): + """Searching on a column with nested columns.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + tbl.flush() + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + self.assertRaises( + TypeError, + tbl.get_where_list, + self._testCondition, + self._testNestedCondVars(tbl), + ) + + def test04_modifyColumn(self): + """Modifying one single nested column (modify_column).""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + tbl.flush() + + nColumn = self._testNestedCol + # Get the nested column data and swap the first and last rows. + raTable = self._testAData.copy() + raColumn = raTable[nColumn] + # The next will not work until NestedRecords supports copies + raColumn[0], raColumn[-1] = (raColumn[-1], raColumn[0]) + + # Write the resulting column and re-read the whole table. + tbl.modify_column(colname=nColumn, column=raColumn) + tbl.flush() + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + raReadTable = tbl.read() + if common.verbose: + print("Table read:", raReadTable) + print("Should look like:", raTable) + + # Compare it to the written one. + self.assertTrue( + common.areArraysEqual(raTable, raReadTable), + "Written and read values differ.", + ) + + def test05a_modifyColumns(self): + """Modifying one nested column (modify_columns).""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + tbl.flush() + + nColumn = self._testNestedCol + # Get the nested column data and swap the first and last rows. + raTable = self._testAData.copy() + raColumn = raTable[nColumn] + raColumn[0], raColumn[-1] = (raColumn[-1].copy(), raColumn[0].copy()) + newdtype = np.dtype([(nColumn, raTable.dtype.fields[nColumn][0])]) + self.assertIsNotNone(newdtype) + + # Write the resulting column and re-read the whole table. + tbl.modify_columns(names=[nColumn], columns=raColumn) + tbl.flush() + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + raReadTable = tbl.read() + if common.verbose: + print("Table read:", raReadTable) + print("Should look like:", raTable) + + # Compare it to the written one. + self.assertTrue( + common.areArraysEqual(raTable, raReadTable), + "Written and read values differ.", + ) + + def test05b_modifyColumns(self): + """Modifying two nested columns (modify_columns).""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + tbl.flush() + + # Get the nested column data and swap the first and last rows. + colnames = ["x", "color"] # Get the first two columns + raCols = np.rec.fromarrays( + [self._testAData["x"].copy(), self._testAData["color"].copy()], + dtype=[("x", "(2,)i4"), ("color", "S2")], + ) + # descr=tbl.description._v_nested_descr[0:2]) + # or... + # names=tbl.description._v_nested_names[0:2], + # formats=tbl.description._v_nested_formats[0:2]) + raCols[0], raCols[-1] = (raCols[-1].copy(), raCols[0].copy()) + + # Write the resulting columns + tbl.modify_columns(names=colnames, columns=raCols) + tbl.flush() + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + # Re-read the appropriate columns + raCols2 = np.rec.fromarrays( + [tbl.cols._f_col("x"), tbl.cols._f_col("color")], + dtype=raCols.dtype, + ) + if common.verbose: + print("Table read:", raCols2) + print("Should look like:", raCols) + + # Compare it to the written one. + self.assertTrue( + common.areArraysEqual(raCols, raCols2), + "Written and read values differ.", + ) + + def test06_modifyRows(self): + """Checking modifying several rows at once (using nested rec array)""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + tbl.flush() + + # Get the nested record and swap the first and last rows. + raTable = self._testAData.copy() + raTable[0], raTable[-1] = (raTable[-1].copy(), raTable[0].copy()) + + # Write the resulting nested record and re-read the whole table. + tbl.modify_rows(start=0, stop=2, rows=raTable) + tbl.flush() + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + raReadTable = tbl.read() + if common.verbose: + print("Table read:", raReadTable) + print("Should look like:", raTable) + + # Compare it to the written one. + self.assertTrue( + common.areArraysEqual(raTable, raReadTable), + "Written and read values differ.", + ) + + def test07_index(self): + """Checking indexes of nested columns.""" + + tbl = self.h5file.create_table( + "/", + "test", + self._TestTDescr, + title=self._getMethodName(), + expectedrows=minRowIndex * 2, + ) + for i in range(minRowIndex): + tbl.append(self._testAData) + tbl.flush() + coltoindex = tbl.cols._f_col(self._testCondCol) + indexrows = coltoindex.create_index() + self.assertIsNotNone(indexrows) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + coltoindex = tbl.cols._f_col(self._testCondCol) + + if common.verbose: + print("Number of written rows:", tbl.nrows) + print("Number of indexed rows:", coltoindex.index.nelements) + + # Check indexing flags: + self.assertEqual(tbl.indexed, True, "Table not indexed") + self.assertNotEqual(coltoindex.index, None, "Column not indexed") + self.assertTrue( + tbl.colindexed[self._testCondCol], "Column not indexed" + ) + # Do a look-up for values + searchedCoords = tbl.get_where_list( + self._testCondition, self._testCondVars(tbl) + ) + searchedCoords.sort() + + expectedCoords = np.arange(0, minRowIndex * 2, 2, tb.utils.SizeType) + if common.verbose: + print("Searched coords:", searchedCoords) + print("Expected coords:", expectedCoords) + # All even rows match the condition. + self.assertEqual( + searchedCoords.tolist(), + expectedCoords.tolist(), + "Search returned incorrect results.", + ) + + def test08_setNestedField(self): + """Checking modifying a nested field via natural naming.""" + # See ticket #93 (http://www.pytables.org/trac/ticket/93). + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + tbl.flush() + + oldvalue = tbl.cols.Info.z2[0] + tbl.cols.Info.z2[0] = oldvalue + 1 + tbl.flush() + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + newvalue = tbl.cols.Info.z2[0] + self.assertEqual(newvalue, oldvalue + 1) + + +class WriteNoReopen(WriteTestCase): + reopen = 0 + + +class WriteReopen(WriteTestCase): + reopen = 1 + + +class ReadTestCase(common.TempFileMixin, common.PyTablesTestCase): + _TestTDescr = TestTDescr + _testABuffer = testABuffer + _testAData = testAData + _testNestedCol = testNestedCol + + def test00a_repr(self): + """Checking representation of a nested Table.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title="test00" + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + if common.verbose: + print("str(tbl)-->", str(tbl)) + print("repr(tbl)-->", repr(tbl)) + + self.assertEqual( + str(tbl), f"/test (Table({np.int64(2)!r},)) {np.str_('test00')!r}" + ) + tblrepr = repr(tbl) + # Remove the platform-dependent information (i.e. byteorder) + tblrepr = "\n".join(tblrepr.split("\n")[:-2]) + "\n" + template = f"""/test (Table({np.int64(2)!r},)) {np.str_('test00')!r} + description := {{ + "x": Int32Col(shape=({np.int64(2)!r},), dflt={np.int32(0)!r}, pos=0), + "Info": {{ + "value": ComplexCol(itemsize=16, shape=(), dflt={np.complex128(0j)!r}, pos=0), + "y2": Float64Col(shape=(), dflt={np.float64(1.0)!r}, pos=1), + "Info2": {{ + "name": StringCol(itemsize=2, shape=(), dflt={np.bytes_(b'')!r}, pos=0), + "value": ComplexCol(itemsize=16, shape=({np.int64(2)!r},), dflt={np.complex128(0j)!r}, pos=1), + "y3": Time64Col(shape=({np.int64(2)!r},), dflt={np.float64(1.0)!r}, pos=2), + "z3": EnumCol(enum=Enum({{%(value)s}}), dflt='%(default)s', base=Int32Atom(shape=(), dflt={np.int32(0)!r}), shape=({np.int64(2)!r},), pos=3)}}, + "name": StringCol(itemsize=2, shape=(), dflt={np.bytes_(b'')!r}, pos=3), + "z2": UInt8Col(shape=(), dflt={np.uint8(1)!r}, pos=4)}}, + "color": StringCol(itemsize=2, shape=(), dflt={np.bytes_(b' ')!r}, pos=2), + "info": {{ + "Name": StringCol(itemsize=2, shape=(), dflt={np.bytes_(b'')!r}, pos=0), + "Value": ComplexCol(itemsize=16, shape=(), dflt={np.complex128(0j)!r}, pos=1)}}, + "y": Float64Col(shape=({np.int64(2)!r}, {np.int64(2)!r}), dflt={np.float64(1.0)!r}, pos=4), + "z": UInt8Col(shape=(), dflt={np.uint8(1)!r}, pos=5)}} +""" + + # The problem here is that the order in which items are stored in a + # dict can't be assumed to be stable. + # From python 3.3 on it is actually no more stable since the + # "Hash randomization" feature is enable by default. + # + # For this reason we generate a representation string for each of the + # permutations of the Enum items. + # + # Also the default value of enum types is not preserved in HDF5. + # It is assumed that the default value is the first one in the array + # of Enum names and hence it is also affected by the issue related to + # the "Hash randomization" feature. + # + # Also in this case it is generated a representation string for each + # of the possible default values. + enums = [ + ", ".join(items) + for items in itertools.permutations( + ( + f"'r': {np.int32(4)!r}", + f"'b': {np.int32(1)!r}", + f"'g': {np.int32(2)!r}", + ) + if self.reopen + else ("'r': 4", "'b': 1", "'g': 2") + ) + ] + defaults = ("r", "b", "g") + values = [ + template % {"value": v, "default": d} + for v, d in itertools.product(enums, defaults) + ] + self.assertIn(tblrepr, values) + + def test00b_repr(self): + """Checking representation of a root Column.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title="test00" + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + if common.verbose: + print("str(tbl.cols.y)-->'%s'" % str(tbl.cols.y)) + print("repr(tbl.cols.y)-->'%s'" % repr(tbl.cols.y)) + + self.assertEqual( + str(tbl.cols.y), + f"/test.cols.y (Column({np.int64(2)!r}, 2, 2), float64, idx=None)", + ) + self.assertEqual( + repr(tbl.cols.y), + f"/test.cols.y (Column({np.int64(2)!r}, 2, 2), float64, idx=None)", + ) + + def test00c_repr(self): + """Checking representation of a nested Column.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title="test00" + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + if common.verbose: + print("str(tbl.cols.Info.z2)-->'%s'" % str(tbl.cols.Info.z2)) + print("repr(tbl.cols.Info.z2)-->'%s'" % repr(tbl.cols.Info.z2)) + + self.assertEqual( + str(tbl.cols.Info.z2), + f"/test.cols.Info.z2 (Column({np.int64(2)!r},), uint8, idx=None)", + ) + self.assertEqual( + repr(tbl.cols.Info.z2), + f"/test.cols.Info.z2 (Column({np.int64(2)!r},), uint8, idx=None)", + ) + + def test01_read(self): + """Checking Table.read with subgroups with a range index with step.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + nrarr = np.rec.array( + testABuffer, dtype=tbl.description._v_nested_descr + ) + tblcols = tbl.read(start=0, step=2, field="Info") + nrarrcols = nrarr["Info"][0::2] + if common.verbose: + print("Read cols:", tblcols) + print("Should look like:", nrarrcols) + self.assertTrue( + common.areArraysEqual(nrarrcols, tblcols), + "Original array are retrieved doesn't match.", + ) + + def test01_read_out_arg(self): + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + nrarr = np.rec.array( + testABuffer, dtype=tbl.description._v_nested_descr + ) + # When reading an entire nested column, the output array must contain + # all fields in the table. The output buffer will contain the contents + # of all fields. The selected column alone will be returned from the + # method call. + all_cols = np.empty(1, tbl.dtype) + tblcols = tbl.read(start=0, step=2, field="Info", out=all_cols) + nrarrcols = nrarr["Info"][0::2] + if common.verbose: + print("Read cols:", tblcols) + print("Should look like:", nrarrcols) + self.assertTrue( + common.areArraysEqual(nrarrcols, tblcols), + "Original array are retrieved doesn't match.", + ) + self.assertTrue( + common.areArraysEqual(nrarr[0::2], all_cols), + "Output buffer does not match full table.", + ) + + def test02_read(self): + """Checking Table.read with a nested Column.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + tblcols = tbl.read(start=0, step=2, field="Info/value") + nrarr = np.rec.array( + testABuffer, dtype=tbl.description._v_nested_descr + ) + nrarrcols = nrarr["Info"]["value"][0::2] + self.assertTrue( + common.areArraysEqual(nrarrcols, tblcols), + "Original array are retrieved doesn't match.", + ) + + def test02_read_out_arg(self): + """Checking Table.read with a nested Column.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + tblcols = np.empty(1, dtype="c16") + tbl.read(start=0, step=2, field="Info/value", out=tblcols) + nrarr = np.rec.array( + testABuffer, dtype=tbl.description._v_nested_descr + ) + nrarrcols = nrarr["Info"]["value"][0::2] + self.assertTrue( + common.areArraysEqual(nrarrcols, tblcols), + "Original array are retrieved doesn't match.", + ) + + +class ReadNoReopen(ReadTestCase): + reopen = 0 + + +class ReadReopen(ReadTestCase): + reopen = 1 + + +# Checking the Table.Cols accessor +class ColsTestCase(common.TempFileMixin, common.PyTablesTestCase): + _TestTDescr = TestTDescr + _testABuffer = testABuffer + _testAData = testAData + _testNestedCol = testNestedCol + + def test00a_repr(self): + """Checking string representation of Cols.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title="test00" + ) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + if common.verbose: + print("str(tbl.cols)-->", str(tbl.cols)) + print("repr(tbl.cols)-->", repr(tbl.cols)) + + self.assertEqual(str(tbl.cols), "/test.cols (Cols), 6 columns") + try: + self.assertEqual( + repr(tbl.cols), + """/test.cols (Cols), 6 columns + x (Column(0, 2), ('int32',(2,))) + Info (Cols(), Description) + color (Column(0,), |S2) + info (Cols(), Description) + y (Column(0, 2, 2), ('float64',(2, 2))) + z (Column(0,), uint8) +""", + ) + except AssertionError: + self.assertEqual( + repr(tbl.cols), + f"""/test.cols (Cols), 6 columns + x (Column({np.int64(0)!r}, 2), ('{np.int32(0).dtype.str}', (2,))) + Info (Cols(), Description) + color (Column({np.int64(0)!r},), |S2) + info (Cols(), Description) + y (Column({np.int64(0)!r}, 2, 2), ('{np.float64(0).dtype.str}', (2, 2))) + z (Column({np.int64(0)!r},), uint8) +""", + ) + + def test00b_repr(self): + """Checking string representation of nested Cols.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + if common.verbose: + print("str(tbl.cols.Info)-->", str(tbl.cols.Info)) + print("repr(tbl.cols.Info)-->", repr(tbl.cols.Info)) + + self.assertEqual( + str(tbl.cols.Info), "/test.cols.Info (Cols), 5 columns" + ) + self.assertEqual( + repr(tbl.cols.Info), + f"""/test.cols.Info (Cols), 5 columns + value (Column({np.int64(0)!r},), complex128) + y2 (Column({np.int64(0)!r},), float64) + Info2 (Cols(), Description) + name (Column({np.int64(0)!r},), |S2) + z2 (Column({np.int64(0)!r},), uint8) +""", + ) + + def test01a_f_col(self): + """Checking cols._f_col() with a subgroup.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + tblcol = tbl.cols._f_col(self._testNestedCol) + if common.verbose: + print("Column group name:", tblcol._v_desc._v_pathname) + self.assertEqual( + tblcol._v_desc._v_pathname, + self._testNestedCol, + "Column group name doesn't match.", + ) + + def test01b_f_col(self): + """Checking cols._f_col() with a column.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + tblcol = tbl.cols._f_col(self._testNestedCol + "/name") + if common.verbose: + print("Column name:", tblcol.name) + self.assertEqual(tblcol.name, "name", "Column name doesn't match.") + + def test01c_f_col(self): + """Checking cols._f_col() with a nested subgroup.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + + tblcol = tbl.cols._f_col(self._testNestedCol + "/Info2") + if common.verbose: + print("Column group name:", tblcol._v_desc._v_pathname) + self.assertEqual( + tblcol._v_desc._v_pathname, + self._testNestedCol + "/Info2", + "Column group name doesn't match.", + ) + + def test02a__len__(self): + """Checking cols.__len__() in root level.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + length = len(tbl.cols) + if common.verbose: + print("Column group length:", length) + self.assertEqual( + length, len(tbl.colnames), "Column group length doesn't match." + ) + + def test02b__len__(self): + """Checking cols.__len__() in subgroup level.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + length = len(tbl.cols.Info) + if common.verbose: + print("Column group length:", length) + self.assertEqual( + length, + len(tbl.cols.Info._v_colnames), + "Column group length doesn't match.", + ) + + def test03a__getitem__(self): + """Checking cols.__getitem__() with a single index.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + nrarr = np.array(testABuffer, dtype=tbl.description._v_nested_descr) + tblcols = tbl.cols[1] + nrarrcols = nrarr[1] + if common.verbose: + print("Read cols:", tblcols) + print("Should look like:", nrarrcols) + self.assertTrue( + common.areArraysEqual(nrarrcols, tblcols), + "Original array are retrieved doesn't match.", + ) + + def test03b__getitem__(self): + """Checking cols.__getitem__() with a range index.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + nrarr = np.array(testABuffer, dtype=tbl.description._v_nested_descr) + tblcols = tbl.cols[0:2] + nrarrcols = nrarr[0:2] + if common.verbose: + print("Read cols:", tblcols) + print("Should look like:", nrarrcols) + self.assertTrue( + common.areArraysEqual(nrarrcols, tblcols), + "Original array are retrieved doesn't match.", + ) + + def test03c__getitem__(self): + """Checking cols.__getitem__() with a range index with step.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + nrarr = np.array(testABuffer, dtype=tbl.description._v_nested_descr) + tblcols = tbl.cols[0::2] + nrarrcols = nrarr[0::2] + if common.verbose: + print("Read cols:", tblcols) + print("Should look like:", nrarrcols) + self.assertTrue( + common.areArraysEqual(nrarrcols, tblcols), + "Original array are retrieved doesn't match.", + ) + + def test04a__getitem__(self): + """Checking cols.__getitem__() with subgroups with a single index.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + nrarr = np.array(testABuffer, dtype=tbl.description._v_nested_descr) + tblcols = tbl.cols._f_col("Info")[1] + nrarrcols = nrarr["Info"][1] + if common.verbose: + print("Read cols:", tblcols) + print("Should look like:", nrarrcols) + self.assertTrue( + common.areArraysEqual(nrarrcols, tblcols), + "Original array are retrieved doesn't match.", + ) + + def test04b__getitem__(self): + """Checking cols.__getitem__() with subgroups with a range index.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + nrarr = np.array(testABuffer, dtype=tbl.description._v_nested_descr) + tblcols = tbl.cols._f_col("Info")[0:2] + nrarrcols = nrarr["Info"][0:2] + if common.verbose: + print("Read cols:", tblcols) + print("Should look like:", nrarrcols) + self.assertTrue( + common.areArraysEqual(nrarrcols, tblcols), + "Original array are retrieved doesn't match.", + ) + + def test04c__getitem__(self): + """Checking cols.__getitem__() with subgroups with a range index with + step.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + nrarr = np.array(testABuffer, dtype=tbl.description._v_nested_descr) + tblcols = tbl.cols._f_col("Info")[0::2] + nrarrcols = nrarr["Info"][0::2] + if common.verbose: + print("Read cols:", tblcols) + print("Should look like:", nrarrcols) + self.assertTrue( + common.areArraysEqual(nrarrcols, tblcols), + "Original array are retrieved doesn't match.", + ) + + def test05a__getitem__(self): + """Checking cols.__getitem__() with a column with a single index.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + nrarr = np.array(testABuffer, dtype=tbl.description._v_nested_descr) + tblcols = tbl.cols._f_col("Info/value")[1] + nrarrcols = nrarr["Info"]["value"][1] + if common.verbose: + print("Read cols:", tblcols) + print("Should look like:", nrarrcols) + self.assertEqual( + nrarrcols, tblcols, "Original array are retrieved doesn't match." + ) + + def test05b__getitem__(self): + """Checking cols.__getitem__() with a column with a range index.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + nrarr = np.array(testABuffer, dtype=tbl.description._v_nested_descr) + tblcols = tbl.cols._f_col("Info/value")[0:2] + nrarrcols = nrarr["Info"]["value"][0:2] + if common.verbose: + print("Read cols:", tblcols) + print("Should look like:", nrarrcols) + self.assertTrue( + common.areArraysEqual(nrarrcols, tblcols), + "Original array are retrieved doesn't match.", + ) + + def test05c__getitem__(self): + """Checking cols.__getitem__() with a column with a range index with + step.""" + + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + nrarr = np.array(testABuffer, dtype=tbl.description._v_nested_descr) + tblcols = tbl.cols._f_col("Info/value")[0::2] + nrarrcols = nrarr["Info"]["value"][0::2] + if common.verbose: + print("Read cols:", tblcols) + print("Should look like:", nrarrcols) + self.assertTrue( + common.areArraysEqual(nrarrcols, tblcols), + "Original array are retrieved doesn't match.", + ) + + def test_01a__iter__(self): + tbl = self.h5file.create_table( + "/", "test", self._TestTDescr, title=self._getMethodName() + ) + tbl.append(self._testAData) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + nrarr = np.array(testABuffer, dtype=tbl.description._v_nested_descr) + row_num = 0 + for item in tbl.cols.Info.value: + self.assertEqual(item, nrarr["Info"]["value"][row_num]) + row_num += 1 + self.assertEqual(row_num, len(nrarr)) + + +class ColsNoReopen(ColsTestCase): + reopen = 0 + + +class ColsReopen(ColsTestCase): + reopen = 1 + + +class Nested(tb.IsDescription): + uid = tb.IntCol(pos=1) + value = tb.FloatCol(pos=2) + + +class A_Candidate(tb.IsDescription): + nested1 = Nested() + nested2 = Nested() + + +class B_Candidate(tb.IsDescription): + nested1 = Nested + nested2 = Nested + + +class C_Candidate(tb.IsDescription): + nested1 = Nested() + nested2 = Nested + + +Dnested = { + "uid": tb.IntCol(pos=1), + "value": tb.FloatCol(pos=2), +} + +D_Candidate = { + "nested1": Dnested, + "nested2": Dnested, +} + +E_Candidate = { + "nested1": Nested, + "nested2": Dnested, +} + +F_Candidate = { + "nested1": Nested(), + "nested2": Dnested, +} + +# Checking several nested columns declared in the same way + + +class SameNestedTestCase(common.TempFileMixin, common.PyTablesTestCase): + correct_names = [ + "", # The root of columns + "nested1", + "nested1/uid", + "nested1/value", + "nested2", + "nested2/uid", + "nested2/value", + ] + + def test01a(self): + """Checking same nested columns (instance flavor).""" + + tbl = self.h5file.create_table( + "/", "test", A_Candidate, title=self._getMethodName() + ) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + names = [ + col._v_pathname for col in tbl.description._f_walk(type="All") + ] + if common.verbose: + print("Pathnames of columns:", names) + print("Should look like:", self.correct_names) + self.assertEqual( + names, self.correct_names, "Column nested names doesn't match." + ) + + def test01b(self): + """Checking same nested columns (class flavor).""" + + tbl = self.h5file.create_table( + "/", "test", B_Candidate, title=self._getMethodName() + ) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + names = [ + col._v_pathname for col in tbl.description._f_walk(type="All") + ] + if common.verbose: + print("Pathnames of columns:", names) + print("Should look like:", self.correct_names) + self.assertEqual( + names, self.correct_names, "Column nested names doesn't match." + ) + + def test01c(self): + """Checking same nested columns (mixed instance/class flavor).""" + + tbl = self.h5file.create_table( + "/", "test", C_Candidate, title=self._getMethodName() + ) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + names = [ + col._v_pathname for col in tbl.description._f_walk(type="All") + ] + if common.verbose: + print("Pathnames of columns:", names) + print("Should look like:", self.correct_names) + self.assertEqual( + names, self.correct_names, "Column nested names doesn't match." + ) + + def test01d(self): + """Checking same nested columns (dictionary flavor).""" + + tbl = self.h5file.create_table( + "/", "test", D_Candidate, title=self._getMethodName() + ) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + names = [ + col._v_pathname for col in tbl.description._f_walk(type="All") + ] + if common.verbose: + print("Pathnames of columns:", names) + print("Should look like:", self.correct_names) + self.assertEqual( + names, self.correct_names, "Column nested names doesn't match." + ) + + def test01e(self): + """Checking same nested columns (mixed dictionary/class flavor).""" + + tbl = self.h5file.create_table( + "/", "test", E_Candidate, title=self._getMethodName() + ) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + names = [ + col._v_pathname for col in tbl.description._f_walk(type="All") + ] + if common.verbose: + print("Pathnames of columns:", names) + print("Should look like:", self.correct_names) + self.assertEqual( + names, self.correct_names, "Column nested names doesn't match." + ) + + def test01f(self): + """Checking same nested columns (mixed dictionary/instance flavor).""" + + tbl = self.h5file.create_table( + "/", "test", F_Candidate, title=self._getMethodName() + ) + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + + names = [ + col._v_pathname for col in tbl.description._f_walk(type="All") + ] + if common.verbose: + print("Pathnames of columns:", names) + print("Should look like:", self.correct_names) + self.assertEqual( + names, self.correct_names, "Column nested names doesn't match." + ) + + def test02a(self): + """Indexing two simple columns under the same nested column.""" + + desc = {"nested": {"i1": tb.Int32Col(), "i2": tb.Int32Col()}} + + i1 = "nested/i1" + i2 = "nested/i2" + tbl = self.h5file.create_table( + "/", "test", desc, title=self._getMethodName() + ) + + row = tbl.row + for i in range(1000): + row[i1] = i + row[i2] = i * 2 + row.append() + tbl.flush() + + cols = { + "i1": tbl.cols.nested.i1, + "i2": tbl.cols.nested.i2, + } + cols["i1"].create_index() + cols["i2"].create_index() + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + # Redefine the cols dictionary + cols = { + "i1": tbl.cols.nested.i1, + "i2": tbl.cols.nested.i2, + } + + i1res = [r[i1] for r in tbl.where("i1 < 10", cols)] + i2res = [r[i2] for r in tbl.where("i2 < 10", cols)] + + if common.verbose: + print("Retrieved values (i1):", i1res) + print("Should look like:", list(range(10))) + print("Retrieved values (i2):", i2res) + print("Should look like:", list(range(0, 10, 2))) + + self.assertEqual( + i1res, + list(range(10)), + "Select for nested column (i1) doesn't match.", + ) + self.assertEqual( + i2res, + list(range(0, 10, 2)), + "Select for nested column (i2) doesn't match.", + ) + + def test02b(self): + """Indexing two simple columns under the same (very) nested column.""" + + desc = { + "nested1": { + "nested2": { + "nested3": {"i1": tb.Int32Col(), "i2": tb.Int32Col()} + } + } + } + + i1 = "nested1/nested2/nested3/i1" + i2 = "nested1/nested2/nested3/i2" + + tbl = self.h5file.create_table( + "/", "test", desc, title=self._getMethodName() + ) + + row = tbl.row + for i in range(1000): + row[i1] = i + row[i2] = i * 2 + row.append() + tbl.flush() + + cols = { + "i1": tbl.cols.nested1.nested2.nested3.i1, + "i2": tbl.cols.nested1.nested2.nested3.i2, + } + cols["i1"].create_index() + cols["i2"].create_index() + + if self.reopen: + self._reopen() + tbl = self.h5file.root.test + # Redefine the cols dictionary + cols = { + "i1": tbl.cols.nested1.nested2.nested3.i1, + "i2": tbl.cols.nested1.nested2.nested3.i2, + } + + i1res = [r[i1] for r in tbl.where("i1 < 10", cols)] + i2res = [r[i2] for r in tbl.where("i2 < 10", cols)] + + if common.verbose: + print("Retrieved values (i1):", i1res) + print("Should look like:", list(range(10))) + print("Retrieved values (i2):", i2res) + print("Should look like:", list(range(0, 10, 2))) + + self.assertEqual( + i1res, + list(range(10)), + "Select for nested column (i1) doesn't match.", + ) + self.assertEqual( + i2res, + list(range(0, 10, 2)), + "Select for nested column (i2) doesn't match.", + ) + + +class SameNestedNoReopen(SameNestedTestCase): + reopen = 0 + + +class SameNestedReopen(SameNestedTestCase): + reopen = 1 + + +class NestedTypesWithGaps(common.TestFileMixin, common.PyTablesTestCase): + h5fname = common.test_filename("nested-type-with-gaps.h5") + + correct_descr = f"""{{ + "float": Float32Col(shape=(), dflt={np.float32(0.0)!r}, pos=0), + "compound": {{ + "char": Int8Col(shape=(), dflt={np.int8(0)!r}, pos=0), + "double": Float64Col(shape=(), dflt={np.float64(0.0)!r}, pos=1)}}}}""" + + def test01(self): + """Opening a table with nested types with gaps.""" + + tbl = self.h5file.get_node("/nestedtype") + type_descr = repr(tbl.description) + if common.verbose: + print("Type size with gaps:", tbl.description._v_itemsize) + print("And should be: 16") + print("Representation of the nested type:\n", type_descr) + print("And should be:\n", self.correct_descr) + print("Here are the offsets: ", tbl.description._v_offsets) + + self.assertEqual(tbl.description._v_itemsize, 16) + self.assertEqual(type_descr, self.correct_descr) + + if common.verbose: + print("Great! Nested types with gaps recognized correctly.") + + +def suite(): + """Return a test suite consisting of all the test cases in the module.""" + + theSuite = common.unittest.TestSuite() + niter = 1 + # common.heavy = 1 # uncomment this only for testing purposes + + for i in range(niter): + theSuite.addTest(common.make_suite(DescriptionTestCase)) + theSuite.addTest(common.make_suite(CreateTestCase)) + theSuite.addTest(common.make_suite(WriteNoReopen)) + theSuite.addTest(common.make_suite(WriteReopen)) + theSuite.addTest(common.make_suite(ColsNoReopen)) + theSuite.addTest(common.make_suite(ColsReopen)) + theSuite.addTest(common.make_suite(ReadNoReopen)) + theSuite.addTest(common.make_suite(ReadReopen)) + theSuite.addTest(common.make_suite(SameNestedNoReopen)) + theSuite.addTest(common.make_suite(SameNestedReopen)) + theSuite.addTest(common.make_suite(NestedTypesWithGaps)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_numpy.py b/venv/Lib/site-packages/tables/tests/test_numpy.py new file mode 100644 index 0000000..aca750b --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_numpy.py @@ -0,0 +1,1444 @@ +import sys +import tempfile +from pathlib import Path + +import numpy as np + +import tables as tb +from tables.tests import common + +typecodes = ["b", "h", "i", "l", "q", "f", "d"] +# UInt64 checking disabled on win platforms +# because this type is not supported +if sys.platform != "win32": + typecodes += ["B", "H", "I", "L", "Q", "F", "D"] +else: + typecodes += ["B", "H", "I", "L", "F", "D"] +typecodes += ["b1"] # boolean + +if hasattr(tb, "Float16Atom"): + typecodes.append("e") +if hasattr(tb, "Float96Atom") or hasattr(tb, "Float128Atom"): + typecodes.append("g") +if hasattr(tb, "Complex192Atom") or hasattr(tb, "Conplex256Atom"): + typecodes.append("G") + +byteorder = {"little": "<", "big": ">"}[sys.byteorder] + + +class BasicTestCase(common.PyTablesTestCase): + """Basic test for all the supported typecodes present in NumPy. + + All of them are included on PyTables. + + """ + + endiancheck = 0 + + def WriteRead(self, testArray): + if common.verbose: + print("\n", "-=" * 30) + print( + "Running test for array with typecode '%s'" + % testArray.dtype.char, + end=" ", + ) + print("for class check:", self.title) + + # Create an instance of HDF5 Table + self.h5fname = tempfile.mktemp(".h5") + try: + with tb.open_file(self.h5fname, mode="w") as self.h5file: + self.root = self.h5file.root + + # Create the array under root and name 'somearray' + a = testArray + self.h5file.create_array( + self.root, "somearray", a, "Some array" + ) + + # Re-open the file in read-only mode + with tb.open_file(self.h5fname, mode="r") as self.h5file: + self.root = self.h5file.root + + # Read the saved array + b = self.root.somearray.read() + + # For cases that read returns a python type instead of a + # numpy type + if not hasattr(b, "shape"): + b = np.np.array(b, dtype=a.dtype.str) + + # Compare them. They should be equal. + # if not allequal(a,b, "numpy") and common.verbose: + if common.verbose: + print("Array written:", a) + print("Array written shape:", a.shape) + print("Array written itemsize:", a.itemsize) + print("Array written type:", a.dtype.char) + print("Array read:", b) + print("Array read shape:", b.shape) + print("Array read itemsize:", b.itemsize) + print("Array read type:", b.dtype.char) + + type_ = self.root.somearray.atom.type + + # Check strictly the array equality + self.assertEqual(type(a), type(b)) + self.assertEqual(a.shape, b.shape) + self.assertEqual(a.shape, self.root.somearray.shape) + self.assertEqual(a.dtype, b.dtype) + if a.dtype.char[0] == "S": + self.assertEqual(type_, "string") + else: + self.assertEqual(a.dtype.base.name, type_) + + self.assertTrue(common.allequal(a, b, "numpy")) + finally: + # Then, delete the file + if Path(self.h5fname).is_file(): + Path(self.h5fname).unlink() + + def test00_char(self): + """Data integrity during recovery (character objects)""" + + a = np.array(self.tupleChar, "S" + str(len(self.tupleChar))) + self.WriteRead(a) + + def test01_char_nc(self): + """Data integrity during recovery (non-contiguous character objects)""" + + a = np.array(self.tupleChar, "S" + str(len(self.tupleChar))) + if a.shape == (): + b = a # We cannot use the indexing notation + else: + b = a[::2] + # Ensure that this numpy string is non-contiguous + if a.shape[0] > 2: + self.assertEqual(b.flags["CONTIGUOUS"], False) + self.WriteRead(b) + + def test02_types(self): + """Data integrity during recovery (numerical types)""" + + for typecode in typecodes: + if self.tupleInt.shape: + a = self.tupleInt.astype(typecode) + else: + # shape is the empty tuple () + a = np.array(self.tupleInt, dtype=typecode) + self.WriteRead(a) + + def test03_types_nc(self): + """Data integrity during recovery (non-contiguous numerical types)""" + + for typecode in typecodes: + if self.tupleInt.shape: + a = self.tupleInt.astype(typecode) + else: + # shape is the empty tuple () + a = np.array(self.tupleInt, dtype=typecode) + + # This should not be tested for the rank-0 case + if len(a.shape) == 0: + raise common.unittest.SkipTest + b = a[::2] + + # Ensure that this array is non-contiguous (for non-trivial case) + if a.shape[0] > 2: + self.assertEqual(b.flags["CONTIGUOUS"], False) + self.WriteRead(b) + + +class Basic0DOneTestCase(BasicTestCase): + # Rank-0 case + title = "Rank-0 case 1" + tupleInt = np.array(3) + tupleChar = "4" + + +class Basic0DTwoTestCase(BasicTestCase): + # Rank-0 case + title = "Rank-0 case 2" + tupleInt = np.array(33) + tupleChar = "44" + + +class Basic1DOneTestCase(BasicTestCase): + # 1D case + title = "Rank-1 case 1" + tupleInt = np.array((3,)) + tupleChar = ("a",) + + +class Basic1DTwoTestCase(BasicTestCase): + # 1D case + title = "Rank-1 case 2" + tupleInt = np.array((0, 4)) + tupleChar = ("aaa",) + + +class Basic1DThreeTestCase(BasicTestCase): + # 1D case + title = "Rank-1 case 3" + tupleInt = np.array((3, 4, 5)) + tupleChar = ( + "aaaa", + "bbb", + ) + + +class Basic2DTestCase(BasicTestCase): + # 2D case + title = "Rank-2 case 1" + # tupleInt = reshape(np.array(np.arange((4)**2)), (4,)*2) + tupleInt = np.ones((4,) * 2) + tupleChar = [["aaa", "ddddd"], ["d", "ss"], ["s", "tt"]] + + +class Basic10DTestCase(BasicTestCase): + # 10D case + title = "Rank-10 case 1" + # tupleInt = reshape(np.array(np.arange((2)**10)), (2,)*10) + tupleInt = np.ones((2,) * 10) + # tupleChar = reshape(np.array([1],dtype="S1"),(1,)*10) + # The next tuple consumes far more time, so this + # test should be run in common.heavy mode. + tupleChar = np.array(tupleInt, dtype="S1") + + +# class Basic32DTestCase(BasicTestCase): +# # 32D case (maximum) +# tupleInt = reshape(np.array((22,)), (1,)*32) +# # Strings seems to be very slow with somewhat large dimensions +# # This should not be run unless the numarray people address this problem +# # F. Alted 2006-01-04 +# tupleChar = np.array(tupleInt, dtype="S1") + + +class GroupsArrayTestCase(common.TempFileMixin, common.PyTablesTestCase): + """This test class checks combinations of arrays with groups. + + It also uses arrays ranks which ranges until 10. + + """ + + def test00_iterativeGroups(self): + """Checking combinations of arrays with groups + + It also uses arrays ranks which ranges until 10. + + """ + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test00_iterativeGroups..." + % self.__class__.__name__ + ) + + # Get the root group + group = self.h5file.root + + i = 1 + for typecode in typecodes: + # Create an array of typecode, with incrementally bigger ranges + a = np.ones((2,) * i, typecode) + # Save it on the HDF5 file + dsetname = "array_" + typecode + if common.verbose: + print("Creating dataset:", group._g_join(dsetname)) + self.h5file.create_array(group, dsetname, a, "Large array") + # Create a new group + group = self.h5file.create_group(group, "group" + str(i)) + # increment the range for next iteration + i += 1 + + self._reopen() + + # Get the root group + group = self.h5file.root + + # Get the metadata on the previosly saved arrays + for i in range(1, len(typecodes)): + # Create an array for later comparison + a = np.ones((2,) * i, typecodes[i - 1]) + # Get the dset object hanging from group + dset = getattr(group, "array_" + typecodes[i - 1]) + # Get the actual array + b = dset.read() + if not common.allequal(a, b, "numpy") and common.verbose: + print("Array a original. Shape: ==>", a.shape) + print("Array a original. Data: ==>", a) + print("Info from dataset:", dset._v_pathname) + print(" shape ==>", dset.shape, end=" ") + print(" dtype ==> %s" % dset.dtype) + print("Array b read from file. Shape: ==>", b.shape, end=" ") + print(". Type ==> %s" % b.dtype.char) + + self.assertEqual(a.shape, b.shape) + if np.dtype("l").itemsize == 4: + if a.dtype.char == "i" or a.dtype.char == "l": + # Special expection. We have no way to distinguish between + # "l" and "i" typecode, and we can consider them the same + # to all practical effects + self.assertIn(b.dtype.char, ("l", "i")) + elif a.dtype.char == "I" or a.dtype.char == "L": + # Special expection. We have no way to distinguish between + # "L" and "I" typecode, and we can consider them the same + # to all practical effects + self.assertIn(b.dtype.char, ("L", "I")) + else: + self.assertTrue(common.allequal(a, b, "numpy")) + elif np.dtype("l").itemsize == 8: + if a.dtype.char == "q" or a.dtype.char == "l": + # Special expection. We have no way to distinguish between + # "q" and "l" typecode in 64-bit platforms, and we can + # consider them the same to all practical effects + self.assertIn(b.dtype.char, ("l", "q")) + elif a.dtype.char == "Q" or a.dtype.char == "L": + # Special expection. We have no way to distinguish between + # "Q" and "L" typecode in 64-bit platforms, and we can + # consider them the same to all practical effects + self.assertIn(b.dtype.char, ("L", "Q")) + else: + self.assertTrue(common.allequal(a, b, "numpy")) + + # Iterate over the next group + group = getattr(group, "group" + str(i)) + + def test01_largeRankArrays(self): + """Checking creation of large rank arrays (0 < rank <= 32) + + It also uses arrays ranks which ranges until maxrank. + + """ + + # maximum level of recursivity (deepest group level) achieved: + # maxrank = 32 (for an effective maximum rank of 32) + # This limit is due to a limit in the HDF5 library. + minrank = 1 + maxrank = 32 + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01_largeRankArrays..." + % self.__class__.__name__ + ) + print("Maximum rank for tested arrays:", maxrank) + + group = self.h5file.root + if common.verbose: + print("Rank array writing progress: ", end=" ") + for rank in range(minrank, maxrank + 1): + # Create an array of integers, with incrementally bigger ranges + a = np.ones((1,) * rank, "i") + if common.verbose: + print("%3d," % (rank), end=" ") + self.h5file.create_array(group, "array", a, "Rank: %s" % rank) + group = self.h5file.create_group(group, "group" + str(rank)) + + # Flush the buffers + self.h5file.flush() + + self._reopen() + + group = self.h5file.root + if common.verbose: + print() + print("Rank array reading progress: ") + # Get the metadata on the previously saved arrays + for rank in range(minrank, maxrank + 1): + # Create an array for later comparison + a = np.ones((1,) * rank, "i") + # Get the actual array + b = group.array.read() + if common.verbose: + print("%3d," % (rank), end=" ") + if not a.tolist() == b.tolist() and common.verbose: + dset = group.array + print("Info from dataset:", dset._v_pathname) + print(" Shape: ==>", dset.shape, end=" ") + print(" typecode ==> %c" % dset.typecode) + print("Array b read from file. Shape: ==>", b.shape, end=" ") + print(". Type ==> %c" % b.dtype.char) + self.assertEqual(a.shape, b.shape) + if a.dtype.char == "i": + # Special expection. We have no way to distinguish between + # "l" and "i" typecode, and we can consider them the same + # to all practical effects + self.assertIn(b.dtype.char, ("l", "i")) + else: + self.assertEqual(a.dtype.char, b.dtype.char) + + self.assertEqual(a, b) + + # Iterate over the next group + group = self.h5file.get_node(group, "group" + str(rank)) + + if common.verbose: + print() # This flush the stdout buffer + + +# Test Record class +class Record(tb.IsDescription): + var1 = tb.StringCol(itemsize=4, dflt=b"abcd", pos=0) + var2 = tb.StringCol(itemsize=1, dflt=b"a", pos=1) + var3 = tb.BoolCol(dflt=1) + var4 = tb.Int8Col(dflt=1) + var5 = tb.UInt8Col(dflt=1) + var6 = tb.Int16Col(dflt=1) + var7 = tb.UInt16Col(dflt=1) + var8 = tb.Int32Col(dflt=1) + var9 = tb.UInt32Col(dflt=1) + var10 = tb.Int64Col(dflt=1) + var11 = tb.Float32Col(dflt=1.0) + var12 = tb.Float64Col(dflt=1.0) + var13 = tb.ComplexCol(itemsize=8, dflt=(1.0 + 0.0j)) + var14 = tb.ComplexCol(itemsize=16, dflt=(1.0 + 0.0j)) + if hasattr(tb, "Float16Col"): + var15 = tb.Float16Col(dflt=1.0) + if hasattr(tb, "Float96Col"): + var16 = tb.Float96Col(dflt=1.0) + if hasattr(tb, "Float128Col"): + var17 = tb.Float128Col(dflt=1.0) + if hasattr(tb, "Complex196Col"): + var18 = tb.ComplexCol(itemsize=24, dflt=(1.0 + 0.0j)) + if hasattr(tb, "Complex256Col"): + var19 = tb.ComplexCol(itemsize=32, dflt=(1.0 + 0.0j)) + + +class TableReadTestCase(common.TempFileMixin, common.PyTablesTestCase): + nrows = 100 + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + table = self.h5file.create_table(self.h5file.root, "table", Record) + for i in range(self.nrows): + table.row.append() # Fill 100 rows with default values + + self._reopen(mode="a") + + def test01_readTableChar(self): + """Checking column conversion into NumPy in read(). + + Char flavor + + """ + + table = self.h5file.root.table + table.flavor = "numpy" + for colname in table.colnames: + numcol = table.read(field=colname) + typecol = table.coltypes[colname] + itemsizecol = table.description._v_dtypes[colname].base.itemsize + nctypecode = numcol.dtype.char + if typecol == "string": + if itemsizecol > 1: + orignumcol = np.array(["abcd"] * self.nrows, dtype="S4") + else: + orignumcol = np.array(["a"] * self.nrows, dtype="S1") + if common.verbose: + print("Typecode of NumPy column read:", nctypecode) + print("Should look like:", "c") + print("Itemsize of column:", itemsizecol) + print("Shape of NumPy column read:", numcol.shape) + print("Should look like:", orignumcol.shape) + print("First 3 elements of read col:", numcol[:3]) + # Check that both NumPy objects are equal + self.assertTrue(common.allequal(numcol, orignumcol, "numpy")) + + def test01_readTableNum(self): + """Checking column conversion into NumPy in read(). + + NumPy flavor + + """ + + table = self.h5file.root.table + table.flavor = "numpy" + for colname in table.colnames: + numcol = table.read(field=colname) + typecol = table.coltypes[colname] + nctypecode = np.dtype(numcol.dtype.char[0]).type + if typecol != "string": + if common.verbose: + print("Typecode of NumPy column read:", nctypecode) + print("Should look like:", typecol) + orignumcol = np.ones(shape=self.nrows, dtype=numcol.dtype.char) + # Check that both NumPy objects are equal + self.assertTrue(common.allequal(numcol, orignumcol, "numpy")) + + def test02_readCoordsChar(self): + """Column conversion into NumPy in readCoords(). + + Chars + + """ + + table = self.h5file.root.table + table.flavor = "numpy" + coords = [1, 2, 3] + self.nrows = len(coords) + for colname in table.colnames: + numcol = table.read_coordinates(coords, field=colname) + typecol = table.coltypes[colname] + itemsizecol = table.description._v_dtypes[colname].base.itemsize + nctypecode = numcol.dtype.char + if typecol == "string": + if itemsizecol > 1: + orignumcol = np.array(["abcd"] * self.nrows, dtype="S4") + else: + orignumcol = np.array(["a"] * self.nrows, dtype="S1") + if common.verbose: + print("Typecode of NumPy column read:", nctypecode) + print("Should look like:", "c") + print("Itemsize of column:", itemsizecol) + print("Shape of NumPy column read:", numcol.shape) + print("Should look like:", orignumcol.shape) + print("First 3 elements of read col:", numcol[:3]) + # Check that both NumPy objects are equal + self.assertTrue(common.allequal(numcol, orignumcol, "numpy")) + + def test02_readCoordsNum(self): + """Column conversion into NumPy in read_coordinates(). + + NumPy. + + """ + + table = self.h5file.root.table + table.flavor = "numpy" + coords = [1, 2, 3] + self.nrows = len(coords) + for colname in table.colnames: + numcol = table.read_coordinates(coords, field=colname) + typecol = table.coltypes[colname] + type_ = numcol.dtype.type + if typecol != "string": + if typecol == "int64": + return + if common.verbose: + print("Type of read NumPy column:", type_) + print("Should look like:", typecol) + orignumcol = np.ones(shape=self.nrows, dtype=numcol.dtype.char) + # Check that both NumPy objects are equal + self.assertTrue(common.allequal(numcol, orignumcol, "numpy")) + + def test03_getIndexNumPy(self): + """Getting table rows specified as NumPy scalar integers.""" + + table = self.h5file.root.table + coords = np.array([1, 2, 3], dtype="int8") + for colname in table.colnames: + numcol = [table[coord][colname] for coord in coords] + typecol = table.coltypes[colname] + if typecol != "string": + if typecol == "int64": + return + numcol = np.array(numcol, typecol) + if common.verbose: + type_ = numcol.dtype.type + print("Type of read NumPy column:", type_) + print("Should look like:", typecol) + orignumcol = np.ones( + shape=len(numcol), dtype=numcol.dtype.char + ) + # Check that both NumPy objects are equal + self.assertTrue(common.allequal(numcol, orignumcol, "numpy")) + + def test04_setIndexNumPy(self): + """Setting table rows specified as NumPy integers.""" + + self._reopen(mode="a") + table = self.h5file.root.table + table.flavor = "numpy" + coords = np.array([1, 2, 3], dtype="int8") + # Modify row 1 + # From PyTables 2.0 on, assignments to records can be done + # only as tuples (see http://projects.scipy.org/scipy/numpy/ticket/315) + # table[coords[0]] = ["aasa","x"]+[123]*12 + + n = len(Record.columns) - 2 + + table[coords[0]] = tuple(["aasa", "x"] + [123] * n) # XXX + # record = list(table[coords[0]]) + record = table.read(coords[0], coords[0] + 1) + if common.verbose: + print( + "Original row:\n" + "['aasa', 'x', True, 123, 123, 123, 123, 123, 123L, " + "123, 123.0, 123.0, (123 + 0j), (123+0j), 123.0, " + "(123+0j)]\n" + ) + print("Read row:\n", record) + self.assertEqual(record["var1"], b"aasa") + self.assertEqual(record["var2"], b"x") + self.assertEqual(record["var3"], True) + self.assertEqual(record["var4"], 123) + self.assertEqual(record["var7"], 123) + + +# The declaration of the nested table: +class Info(tb.IsDescription): + _v_pos = 3 + Name = tb.StringCol(itemsize=2) + Value = tb.ComplexCol(itemsize=16) + + +class TestTDescr(tb.IsDescription): + """A description that has several nested columns.""" + + x = tb.Int32Col(dflt=0, shape=2, pos=0) # 0 + y = tb.FloatCol(dflt=1, shape=(2, 2)) + z = tb.UInt8Col(dflt=1) + z3 = tb.EnumCol({"r": 4, "g": 2, "b": 1}, "r", "int32", shape=2) + color = tb.StringCol(itemsize=4, dflt=b"ab", pos=2) + info = Info() + + class Info(tb.IsDescription): # 1 + _v_pos = 1 + name = tb.StringCol(itemsize=2) + value = tb.ComplexCol(itemsize=16, pos=0) # 0 + y2 = tb.FloatCol(pos=1) # 1 + z2 = tb.UInt8Col() + + class Info2(tb.IsDescription): + y3 = tb.Time64Col(shape=2) + name = tb.StringCol(itemsize=2) + value = tb.ComplexCol(itemsize=16, shape=2) + + +class TableNativeFlavorTestCase(common.TempFileMixin, common.PyTablesTestCase): + nrows = 100 + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + table = self.h5file.create_table( + self.h5file.root, "table", TestTDescr, expectedrows=self.nrows + ) + table.flavor = "numpy" + for i in range(self.nrows): + table.row.append() # Fill 100 rows with default values + table.flush() + + def test01a_basicTableRead(self): + """Checking the return of a NumPy in read().""" + + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + data = table[:] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the value of some columns + # A flat column + col = table.cols.x[:3] + self.assertIsInstance(col, np.ndarray) + npcol = np.zeros((3, 2), dtype="int32") + self.assertTrue(common.allequal(col, npcol, "numpy")) + + # A nested column + col = table.cols.Info[:3] + self.assertIsInstance(col, np.ndarray) + dtype = [ + ("value", "c16"), + ("y2", "f8"), + ( + "Info2", + [("name", "S2"), ("value", "c16", (2,)), ("y3", "f8", (2,))], + ), + ("name", "S2"), + ("z2", "u1"), + ] + npcol = np.zeros((3,), dtype=dtype) + self.assertEqual(col.dtype.descr, npcol.dtype.descr) + if common.verbose: + print("col-->", col) + print("npcol-->", npcol) + + # A copy() is needed in case the buffer can be in different segments + self.assertEqual(bytes(col.copy().data), bytes(npcol.data)) + + def test01b_basicTableRead(self): + """Checking the return of a NumPy in read() (strided version).""" + + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + data = table[::3] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the value of some columns + # A flat column + col = table.cols.x[:9:3] + self.assertIsInstance(col, np.ndarray) + npcol = np.zeros((3, 2), dtype="int32") + self.assertTrue(common.allequal(col, npcol, "numpy")) + + # A nested column + col = table.cols.Info[:9:3] + self.assertIsInstance(col, np.ndarray) + dtype = [ + ("value", "%sc16" % byteorder), + ("y2", "%sf8" % byteorder), + ( + "Info2", + [ + ("name", "|S2"), + ("value", "%sc16" % byteorder, (2,)), + ("y3", "%sf8" % byteorder, (2,)), + ], + ), + ("name", "|S2"), + ("z2", "|u1"), + ] + npcol = np.zeros((3,), dtype=dtype) + self.assertEqual(col.dtype.descr, npcol.dtype.descr) + if common.verbose: + print("col-->", col) + print("npcol-->", npcol) + + # A copy() is needed in case the buffer can be in different segments + self.assertEqual(bytes(col.copy().data), bytes(npcol.data)) + + def test02_getWhereList(self): + """Checking the return of NumPy in get_where_list method.""" + + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + data = table.get_where_list("z == 1") + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check that all columns have been selected + self.assertEqual(len(data), 100) + + # Finally, check that the contents are ok + self.assertTrue( + common.allequal(data, np.arange(100, dtype="i8"), "numpy") + ) + + def test03a_readWhere(self): + """Checking the return of NumPy in read_where method (strings).""" + + table = self.h5file.root.table + table.cols.color.create_index() + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + data = table.read_where('color == b"ab"') + if common.verbose: + print("Type of read:", type(data)) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check that all columns have been selected + self.assertEqual(len(data), self.nrows) + + def test03b_readWhere(self): + """Checking the return of NumPy in read_where method (numeric).""" + + table = self.h5file.root.table + table.cols.z.create_index() + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + data = table.read_where("z == 0") + if common.verbose: + print("Type of read:", type(data)) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check that all columns have been selected + self.assertEqual(len(data), 0) + + def test04a_createTable(self): + """Checking the Table creation from a numpy recarray.""" + + dtype = [ + ("value", "%sc16" % byteorder), + ("y2", "%sf8" % byteorder), + ( + "Info2", + [ + ("name", "|S2"), + ("value", "%sc16" % byteorder, (2,)), + ("y3", "%sf8" % byteorder, (2,)), + ], + ), + ("name", "|S2"), + ("z2", "|u1"), + ] + npdata = np.zeros((3,), dtype=dtype) + table = self.h5file.create_table(self.h5file.root, "table2", npdata) + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table2 + data = table[:] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the type + self.assertEqual(data.dtype.descr, npdata.dtype.descr) + if common.verbose: + print("npdata-->", npdata) + print("data-->", data) + + # A copy() is needed in case the buffer would be in different segments + self.assertEqual(bytes(data.copy().data), bytes(npdata.data)) + + def test04b_appendTable(self): + """Checking appending a numpy recarray.""" + + table = self.h5file.root.table + npdata = table[3:6] + table.append(npdata) + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + data = table[-3:] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("Last 3 elements of read:", data[-3:]) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the type + self.assertEqual(data.dtype.descr, npdata.dtype.descr) + if common.verbose: + print("npdata-->", npdata) + print("data-->", data) + + # A copy() is needed in case the buffer would be in different segments + self.assertEqual(bytes(data.copy().data), bytes(npdata.data)) + + def test05a_assignColumn(self): + """Checking assigning to a column.""" + + table = self.h5file.root.table + table.cols.z[:] = np.zeros((100,), dtype="u1") + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + data = table.cols.z[:] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check that all columns have been selected + self.assertEqual(len(data), 100) + + # Finally, check that the contents are ok + self.assertTrue( + common.allequal(data, np.zeros((100,), dtype="u1"), "numpy") + ) + + def test05b_modifyingColumns(self): + """Checking modifying several columns at once.""" + + table = self.h5file.root.table + xcol = np.ones((3, 2), "int32") + ycol = np.zeros((3, 2, 2), "float64") + zcol = np.zeros((3,), "uint8") + table.modify_columns(3, 6, 1, [xcol, ycol, zcol], ["x", "y", "z"]) + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + data = table.cols.y[3:6] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the type + self.assertEqual(data.dtype.descr, ycol.dtype.descr) + if common.verbose: + print("ycol-->", ycol) + print("data-->", data) + + # A copy() is needed in case the buffer would be in different segments + self.assertEqual(data.copy().data, ycol.data) + + def test05c_modifyingColumns(self): + """Checking modifying several columns using a single numpy buffer.""" + + table = self.h5file.root.table + dtype = [("x", "i4", (2,)), ("y", "f8", (2, 2)), ("z", "u1")] + nparray = np.zeros((3,), dtype=dtype) + table.modify_columns(3, 6, 1, nparray, ["x", "y", "z"]) + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + ycol = np.zeros((3, 2, 2), "float64") + data = table.cols.y[3:6] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the type + self.assertEqual(data.dtype.descr, ycol.dtype.descr) + if common.verbose: + print("ycol-->", ycol) + print("data-->", data) + + # A copy() is needed in case the buffer would be in different segments + self.assertEqual(data.copy().data, ycol.data) + + def test06a_assignNestedColumn(self): + """Checking assigning a nested column (using modify_column).""" + + table = self.h5file.root.table + dtype = [ + ("value", "%sc16" % byteorder), + ("y2", "%sf8" % byteorder), + ( + "Info2", + [ + ("name", "|S2"), + ("value", "%sc16" % byteorder, (2,)), + ("y3", "%sf8" % byteorder, (2,)), + ], + ), + ("name", "|S2"), + ("z2", "|u1"), + ] + npdata = np.zeros((3,), dtype=dtype) + data = table.cols.Info[3:6] + table.modify_column(3, 6, 1, column=npdata, colname="Info") + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + data = table.cols.Info[3:6] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the type + self.assertEqual(data.dtype.descr, npdata.dtype.descr) + if common.verbose: + print("npdata-->", npdata) + print("data-->", data) + + # A copy() is needed in case the buffer would be in different segments + self.assertEqual(bytes(data.copy().data), bytes(npdata.data)) + + def test06b_assignNestedColumn(self): + """Checking assigning a nested column (using the .cols accessor).""" + + table = self.h5file.root.table + dtype = [ + ("value", "%sc16" % byteorder), + ("y2", "%sf8" % byteorder), + ( + "Info2", + [ + ("name", "|S2"), + ("value", "%sc16" % byteorder, (2,)), + ("y3", "%sf8" % byteorder, (2,)), + ], + ), + ("name", "|S2"), + ("z2", "|u1"), + ] + npdata = np.zeros((3,), dtype=dtype) + # self.assertRaises(NotImplementedError, + # table.cols.Info.__setitem__, slice(3,6,1), npdata) + table.cols.Info[3:6] = npdata + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + data = table.cols.Info[3:6] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the type + self.assertEqual(data.dtype.descr, npdata.dtype.descr) + if common.verbose: + print("npdata-->", npdata) + print("data-->", data) + + # A copy() is needed in case the buffer would be in different segments + self.assertEqual(bytes(data.copy().data), bytes(npdata.data)) + + def test07a_modifyingRows(self): + """Checking modifying several rows at once (using modify_rows).""" + + table = self.h5file.root.table + + # Read a chunk of the table + chunk = table[0:3] + + # Modify it somewhat + chunk["y"][:] = -1 + table.modify_rows(3, 6, 1, rows=chunk) + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + ycol = np.zeros((3, 2, 2), "float64") - 1 + data = table.cols.y[3:6] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the type + self.assertEqual(data.dtype.descr, ycol.dtype.descr) + if common.verbose: + print("ycol-->", ycol) + print("data-->", data) + self.assertTrue(common.allequal(ycol, data, "numpy")) + + def test07b_modifyingRows(self): + """Checking modifying several rows at once (using cols accessor).""" + + table = self.h5file.root.table + + # Read a chunk of the table + chunk = table[0:3] + + # Modify it somewhat + chunk["y"][:] = -1 + table.cols[3:6] = chunk + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + + # Check that some column has been actually modified + ycol = np.zeros((3, 2, 2), "float64") - 1 + data = table.cols.y[3:6] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the type + self.assertEqual(data.dtype.descr, ycol.dtype.descr) + if common.verbose: + print("ycol-->", ycol) + print("data-->", data) + self.assertTrue(common.allequal(ycol, data, "numpy")) + + def test08a_modifyingRows(self): + """Checking modifying just one row at once (using modify_rows).""" + + table = self.h5file.root.table + + # Read a chunk of the table + chunk = table[3:4] + + # Modify it somewhat + chunk["y"][:] = -1 + table.modify_rows(6, 7, 1, chunk) + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + + # Check that some column has been actually modified + ycol = np.zeros((2, 2), "float64") - 1 + data = table.cols.y[6] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the type + self.assertEqual(data.dtype.descr, ycol.dtype.descr) + if common.verbose: + print("ycol-->", ycol) + print("data-->", data) + self.assertTrue(common.allequal(ycol, data, "numpy")) + + def test08b_modifyingRows(self): + """Checking modifying just one row at once (using cols accessor).""" + + table = self.h5file.root.table + + # Read a chunk of the table + chunk = table[3:4] + + # Modify it somewhat + chunk["y"][:] = -1 + table.cols[6] = chunk + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + + # Check that some column has been actually modified + ycol = np.zeros((2, 2), "float64") - 1 + data = table.cols.y[6] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + print("Length of the data read:", len(data)) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the type + self.assertEqual(data.dtype.descr, ycol.dtype.descr) + if common.verbose: + print("ycol-->", ycol) + print("data-->", data) + self.assertTrue(common.allequal(ycol, data, "numpy")) + + def test09a_getStrings(self): + """Checking the return of string columns with spaces.""" + + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + rdata = table.get_where_list('color == b"ab"') + data = table.read_coordinates(rdata) + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check that all columns have been selected + self.assertEqual(len(data), 100) + + # Finally, check that the contents are ok + for idata in data["color"]: + self.assertEqual(idata, np.array("ab", dtype="|S4")) + + def test09b_getStrings(self): + """Checking the return of string columns with spaces. + + (modify) + + """ + + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + for i in range(50): + table.cols.color[i] = "a " + table.flush() + data = table[:] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check that all columns have been selected + self.assertEqual(len(data), 100) + + # Finally, check that the contents are ok + for i in range(100): + idata = data["color"][i] + if i >= 50: + self.assertEqual(idata, np.array("ab", dtype="|S4")) + else: + self.assertEqual(idata, np.array("a ", dtype="|S4")) + + def test09c_getStrings(self): + """Checking the return of string columns with spaces. + + (append) + + """ + + if self.close: + self._reopen(mode="a") + table = self.h5file.root.table + row = table.row + for i in range(50): + row["color"] = "a " # note the trailing spaces + row.append() + table.flush() + if self.close: + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, "a") + data = self.h5file.root.table[:] + if common.verbose: + print("Type of read:", type(data)) + print("Description of the record:", data.dtype.descr) + print("First 3 elements of read:", data[:3]) + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check that all columns have been selected + self.assertEqual(len(data), 150) + + # Finally, check that the contents are ok + for i in range(150): + idata = data["color"][i] + if i < 100: + self.assertEqual(idata, np.array("ab", dtype="|S4")) + else: + self.assertEqual(idata, np.array("a ", dtype="|S4")) + + +class TableNativeFlavorOpenTestCase(TableNativeFlavorTestCase): + close = False + + +class TableNativeFlavorCloseTestCase(TableNativeFlavorTestCase): + close = True + + +class AttributesTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + self.h5file.create_group(self.h5file.root, "group") + + def test01_writeAttribute(self): + """Checking the creation of a numpy attribute.""" + + group = self.h5file.root.group + g_attrs = group._v_attrs + g_attrs.numpy1 = np.zeros((1, 1), dtype="int16") + if self.close: + self._reopen(mode="a") + group = self.h5file.root.group + g_attrs = group._v_attrs + + # Check that we can retrieve a numpy object + data = g_attrs.numpy1 + npcomp = np.zeros((1, 1), dtype="int16") + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the type + self.assertEqual(data.dtype.descr, npcomp.dtype.descr) + if common.verbose: + print("npcomp-->", npcomp) + print("data-->", data) + self.assertTrue(common.allequal(npcomp, data, "numpy")) + + def test02_updateAttribute(self): + """Checking the modification of a numpy attribute.""" + + group = self.h5file.root.group + g_attrs = group._v_attrs + g_attrs.numpy1 = np.zeros((1, 2), dtype="int16") + if self.close: + self._reopen(mode="a") + group = self.h5file.root.group + g_attrs = group._v_attrs + + # Update this attribute + g_attrs.numpy1 = np.ones((1, 2), dtype="int16") + + # Check that we can retrieve a numpy object + data = g_attrs.numpy1 + npcomp = np.ones((1, 2), dtype="int16") + + # Check that both NumPy objects are equal + self.assertIsInstance(data, np.ndarray) + + # Check the type + self.assertEqual(data.dtype.descr, npcomp.dtype.descr) + if common.verbose: + print("npcomp-->", npcomp) + print("data-->", data) + self.assertTrue(common.allequal(npcomp, data, "numpy")) + + +class AttributesOpenTestCase(AttributesTestCase): + close = 0 + + +class AttributesCloseTestCase(AttributesTestCase): + close = 1 + + +class StrlenTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + group = self.h5file.create_group(self.h5file.root, "group") + tablelayout = { + "Text": tb.StringCol(itemsize=1000), + } + self.table = self.h5file.create_table(group, "table", tablelayout) + self.table.flavor = "numpy" + row = self.table.row + row["Text"] = "Hello Francesc!" # XXX: check unicode --> bytes + row.append() + row["Text"] = "Hola Francesc!" # XXX: check unicode --> bytes + row.append() + self.table.flush() + + def test01(self): + """Checking the lengths of strings (read field).""" + + if self.close: + self._reopen(mode="a") + self.table = self.h5file.root.group.table + + # Get both strings + str1 = self.table.col("Text")[0] + str2 = self.table.col("Text")[1] + if common.verbose: + print("string1-->", str1) + print("string2-->", str2) + + # Check that both NumPy objects are equal + self.assertEqual(len(str1), len(b"Hello Francesc!")) + self.assertEqual(len(str2), len(b"Hola Francesc!")) + self.assertEqual(str1, b"Hello Francesc!") + self.assertEqual(str2, b"Hola Francesc!") + + def test02(self): + """Checking the lengths of strings (read recarray).""" + + if self.close: + self._reopen(mode="a") + self.table = self.h5file.root.group.table + + # Get both strings + str1 = self.table[:]["Text"][0] + str2 = self.table[:]["Text"][1] + + # Check that both NumPy objects are equal + self.assertEqual(len(str1), len(b"Hello Francesc!")) + self.assertEqual(len(str2), len(b"Hola Francesc!")) + self.assertEqual(str1, b"Hello Francesc!") + self.assertEqual(str2, b"Hola Francesc!") + + def test03(self): + """Checking the lengths of strings (read recarray, row by row).""" + + if self.close: + self._reopen(mode="a") + self.table = self.h5file.root.group.table + + # Get both strings + str1 = self.table[0]["Text"] + str2 = self.table[1]["Text"] + + # Check that both NumPy objects are equal + self.assertEqual(len(str1), len(b"Hello Francesc!")) + self.assertEqual(len(str2), len(b"Hola Francesc!")) + self.assertEqual(str1, b"Hello Francesc!") + self.assertEqual(str2, b"Hola Francesc!") + + +class StrlenOpenTestCase(StrlenTestCase): + close = 0 + + +class StrlenCloseTestCase(StrlenTestCase): + close = 1 + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + + # theSuite.addTest(make_suite(StrlenOpenTestCase)) + # theSuite.addTest(make_suite(Basic0DOneTestCase)) + # theSuite.addTest(make_suite(GroupsArrayTestCase)) + for i in range(niter): + theSuite.addTest(common.make_suite(Basic0DOneTestCase)) + theSuite.addTest(common.make_suite(Basic0DTwoTestCase)) + theSuite.addTest(common.make_suite(Basic1DOneTestCase)) + theSuite.addTest(common.make_suite(Basic1DTwoTestCase)) + theSuite.addTest(common.make_suite(Basic1DThreeTestCase)) + theSuite.addTest(common.make_suite(Basic2DTestCase)) + theSuite.addTest(common.make_suite(GroupsArrayTestCase)) + theSuite.addTest(common.make_suite(TableReadTestCase)) + theSuite.addTest(common.make_suite(TableNativeFlavorOpenTestCase)) + theSuite.addTest(common.make_suite(TableNativeFlavorCloseTestCase)) + theSuite.addTest(common.make_suite(AttributesOpenTestCase)) + theSuite.addTest(common.make_suite(AttributesCloseTestCase)) + theSuite.addTest(common.make_suite(StrlenOpenTestCase)) + theSuite.addTest(common.make_suite(StrlenCloseTestCase)) + if common.heavy: + theSuite.addTest(common.make_suite(Basic10DTestCase)) + # The 32 dimensions case takes forever to run!! + # theSuite.addTest(make_suite(Basic32DTestCase)) + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_queries.py b/venv/Lib/site-packages/tables/tests/test_queries.py new file mode 100644 index 0000000..d0ae85e --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_queries.py @@ -0,0 +1,1377 @@ +"""Test module for queries on datasets.""" + +import re +import sys +import warnings +import functools + +import numpy as np + +import tables as tb +from tables.tests import common + +# Data parameters +# --------------- +row_period = 50 +"""Maximum number of unique rows before they start cycling.""" +md_shape = (2, 2) +"""Shape of multidimensional fields.""" + +_maxnvalue = row_period + np.prod(md_shape, dtype=tb.utils.SizeType) - 1 +_strlen = int(np.log10(_maxnvalue - 1)) + 1 + +str_format = "%%0%dd" % _strlen +"""Format of string values.""" + +small_blocksizes = (300, 60, 20, 5) +# small_blocksizes = (512, 128, 32, 4) # for manual testing only +"""Sensible parameters for indexing with small blocksizes.""" + + +# Type information +# ---------------- +type_info = { + "bool": (np.bool_, bool), + "int8": (np.int8, int), + "uint8": (np.uint8, int), + "int16": (np.int16, int), + "uint16": (np.uint16, int), + "int32": (np.int32, int), + "uint32": (np.uint32, int), + "int64": (np.int64, int), + "uint64": (np.uint64, int), + "float32": (np.float32, float), + "float64": (np.float64, float), + "complex64": (np.complex64, complex), + "complex128": (np.complex128, complex), + "time32": (np.int32, int), + "time64": (np.float64, float), + "enum": (np.uint8, int), # just for these tests + "string": ("S%s" % _strlen, np.bytes_), # just for these tests +} +"""NumPy and Numexpr type for each PyTables type that will be tested.""" + +# globals dict for eval() +func_info = { + "log10": np.log10, + "log": np.log, + "exp": np.exp, + "abs": np.abs, + "sqrt": np.sqrt, + "sin": np.sin, + "cos": np.cos, + "tan": np.tan, + "arcsin": np.arcsin, + "arccos": np.arccos, + "arctan": np.arctan, +} +"""functions and NumPy.ufunc() for each function that will be tested.""" + + +if hasattr(np, "float16"): + type_info["float16"] = (np.float16, float) +# if hasattr(numpy, 'float96'): +# type_info['float96'] = (np.float96, float) +# if hasattr(numpy, 'float128'): +# type_info['float128'] = (np.float128, float) +# if hasattr(numpy, 'complex192'): +# type_info['complex192'] = (np.complex192, complex) +# if hasattr(numpy, 'complex256'): +# type_info['complex256'] = (np.complex256, complex) + +sctype_from_type = {type_: info[0] for (type_, info) in type_info.items()} +"""Maps PyTables types to NumPy scalar types.""" +nxtype_from_type = {type_: info[1] for (type_, info) in type_info.items()} +"""Maps PyTables types to Numexpr types.""" + +heavy_types = frozenset(["uint8", "int16", "uint16", "float32", "complex64"]) +"""PyTables types to be tested only in heavy mode.""" + +enum = tb.Enum({"n%d" % i: i for i in range(_maxnvalue)}) +"""Enumerated type to be used in tests.""" + + +# Table description +# ----------------- +def append_columns(classdict, shape=()): + """Append a ``Col`` of each PyTables data type to the `classdict`. + + A column of a certain TYPE gets called ``c_TYPE``. The number of + added columns is returned. + + """ + heavy = common.heavy + for itype, type_ in enumerate(sorted(type_info)): + if not heavy and type_ in heavy_types: + continue # skip heavy type in non-heavy mode + colpos = itype + 1 + colname = "c_%s" % type_ + if type_ == "enum": + base = tb.Atom.from_sctype(sctype_from_type[type_]) + col = tb.EnumCol(enum, enum(0), base, shape=shape, pos=colpos) + else: + sctype = sctype_from_type[type_] + dtype = np.dtype((sctype, shape)) + col = tb.Col.from_dtype(dtype, pos=colpos) + classdict[colname] = col + ncols = colpos + return ncols + + +def nested_description(classname, pos, shape=()): + """Return a nested column description with all PyTables data types. + + A column of a certain TYPE gets called ``c_TYPE``. The nested + column will be placed in the position indicated by `pos`. + + """ + classdict = {} + append_columns(classdict, shape=shape) + classdict["_v_pos"] = pos + return type(classname, (tb.IsDescription,), classdict) + + +def table_description(classname, nclassname, shape=()): + """Return a table description for testing queries. + + The description consists of all PyTables data types, both in the + top level and in the ``c_nested`` nested column. A column of a + certain TYPE gets called ``c_TYPE``. An extra integer column + ``c_extra`` is also provided. If a `shape` is given, it will be + used for all columns. Finally, an extra indexed column + ``c_idxextra`` is added as well in order to provide some basic + tests for multi-index queries. + + """ + classdict = {} + colpos = append_columns(classdict, shape) + + ndescr = nested_description(nclassname, colpos, shape=shape) + classdict["c_nested"] = ndescr + colpos += 1 + + extracol = tb.IntCol(shape=shape, pos=colpos) + classdict["c_extra"] = extracol + colpos += 1 + + idxextracol = tb.IntCol(shape=shape, pos=colpos) + classdict["c_idxextra"] = idxextracol + colpos += 1 + + return type(classname, (tb.IsDescription,), classdict) + + +TableDescription = table_description("TableDescription", "NestedDescription") +"""Unidimensional table description for testing queries.""" + +MDTableDescription = table_description( + "MDTableDescription", "MDNestedDescription", shape=md_shape +) +"""Multidimensional table description for testing queries.""" + + +# Table data +# ---------- +table_data = {} +"""Cached table data for a given shape and number of rows.""" +# Data is cached because computing it row by row is quite slow. Hop! + + +def fill_table(table, shape, nrows): + """Fill the given `table` with `nrows` rows of data. + + Values in the i-th row (where 0 <= i < `row_period`) for a + multidimensional field with M elements span from i to i + M-1. For + subsequent rows, values repeat cyclically. + + The same goes for the ``c_extra`` column, but values range from + -`row_period`/2 to +`row_period`/2. + + """ + # Reuse already computed data if possible. + tdata = table_data.get((shape, nrows)) + if tdata is not None: + table.append(tdata) + table.flush() + return + + heavy = common.heavy + size = int(np.prod(shape, dtype=tb.utils.SizeType)) + + row, value = table.row, 0 + for nrow in range(nrows): + data = np.arange(value, value + size).reshape(shape) + for type_, sctype in sctype_from_type.items(): + if not heavy and type_ in heavy_types: + continue # skip heavy type in non-heavy mode + colname = "c_%s" % type_ + ncolname = "c_nested/%s" % colname + if type_ == "bool": + coldata = data > (row_period // 2) + elif type_ == "string": + sdata = [str_format % x for x in range(value, value + size)] + coldata = np.array(sdata, dtype=sctype).reshape(shape) + else: + coldata = np.asarray(data, dtype=sctype) + row[ncolname] = row[colname] = coldata + row["c_extra"] = data - (row_period // 2) + row["c_idxextra"] = data - (row_period // 2) + row.append() + value += 1 + if value == row_period: + value = 0 + table.flush() + + # Make computed data reusable. + tdata = table.read() + table_data[(shape, nrows)] = tdata + + +class SilentlySkipTest(common.unittest.SkipTest): + pass + + +# Base test cases +# --------------- +class BaseTableQueryTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Base test case for querying tables. + + Sub-classes must define the following attributes: + + ``tableDescription`` + The description of the table to be created. + ``shape`` + The shape of data fields in the table. + ``nrows`` + The number of data rows to be generated for the table. + + Sub-classes may redefine the following attributes: + + ``indexed`` + Whether columns shall be indexed, if possible. Default is not + to index them. + ``optlevel`` + The level of optimisation of column indexes. Default is 0. + + """ + + indexed = False + optlevel = 0 + + colNotIndexable_re = re.compile(r"\bcan not be indexed\b") + condNotBoolean_re = re.compile(r"\bdoes not have a boolean type\b") + + def create_indexes(self, colname, ncolname, extracolname): + if not self.indexed: + return + try: + kind = self.kind + common.verbosePrint( + f"* Indexing ``{colname}`` columns. Type: {kind}." + ) + for acolname in [colname, ncolname, extracolname]: + acolumn = self.table.colinstances[acolname] + acolumn.create_index( + kind=self.kind, + optlevel=self.optlevel, + _blocksizes=small_blocksizes, + _testmode=True, + ) + + except TypeError as te: + if self.colNotIndexable_re.search(str(te)): + raise SilentlySkipTest( + "Columns of this type can not be indexed." + ) + raise + except NotImplementedError: + raise SilentlySkipTest( + "Indexing columns of this type is not supported yet." + ) + + def setUp(self): + super().setUp() + self.table = self.h5file.create_table( + "/", "test", self.tableDescription, expectedrows=self.nrows + ) + fill_table(self.table, self.shape, self.nrows) + + +class ScalarTableMixin: + tableDescription = TableDescription + shape = () + + +class MDTableMixin: + tableDescription = MDTableDescription + shape = md_shape + + +# Test cases on query data +# ------------------------ +operators = [ + None, + "~", + "<", + "<=", + "==", + "!=", + ">=", + ">", + ("<", "<="), + (">", ">="), +] +"""Comparison operators to check with different types.""" +heavy_operators = frozenset(["~", "<=", ">=", ">", (">", ">=")]) +"""Comparison operators to be tested only in heavy mode.""" +left_bound = row_period // 4 +"""Operand of left side operator in comparisons with operator pairs.""" +right_bound = row_period * 3 // 4 +"""Operand of right side operator in comparisons with operator pairs.""" +func_bound = 0.8 # must be <1 for trig functions to be able to fail +"""Operand of right side operator in comparisons with functions. """ +extra_conditions = [ + "", # uses one index + "& ((c_extra + 1) < 0)", # uses one index + "| (c_idxextra > 0)", # uses two indexes + "| ((c_idxextra > 0) | ((c_extra + 1) > 0))", # can't use indexes +] +"""Extra conditions to append to comparison conditions.""" + + +class TableDataTestCase(BaseTableQueryTestCase): + """Base test case for querying table data. + + Automatically created test method names have the format + ``test_XNNNN``, where ``NNNN`` is the zero-padded test number and + ``X`` indicates whether the test belongs to the light (``l``) or + heavy (``h``) set. + + """ + + _testfmt_light = "test_l%04d" + _testfmt_heavy = "test_h%04d" + + +def _old_repr(o): + if isinstance(o, np.bytes_): + return repr(bytes(o)) + return repr(o) + + +def create_test_method(type_, op, extracond, func=None): + sctype = sctype_from_type[type_] + + # Compute the value of bounds. + condvars = { + "bound": right_bound, + "lbound": left_bound, + "rbound": right_bound, + "func_bound": func_bound, + } + for bname, bvalue in condvars.items(): + if type_ == "string": + bvalue = str_format % bvalue + bvalue = nxtype_from_type[type_](bvalue) + condvars[bname] = bvalue + + # Compute the name of columns. + colname = "c_%s" % type_ + ncolname = "c_nested/%s" % colname + + # Compute the query condition. + if not op: # as is + cond = colname + elif op == "~": # unary + cond = "~(%s)" % colname + elif op == "<" and func is None: # binary variable-constant + cond = f'{colname} {op} {_old_repr(condvars["bound"])}' + elif isinstance(op, tuple): # double binary variable-constant + cond = f"(lbound {op[0]} {colname}) & ({colname} {op[1]} rbound)" + elif func is not None: + cond = f"{func}({colname}) {op} func_bound" + else: # function or binary variable-variable + cond = f"{colname} {op} bound" + if extracond: + cond = f"({cond}) {extracond}" + + def ignore_skipped(oldmethod): + @functools.wraps(oldmethod) + def newmethod(self, *args, **kwargs): + self._verboseHeader() + try: + return oldmethod(self, *args, **kwargs) + except SilentlySkipTest as se: + if se.args: + msg = se.args[0] + else: + msg = "" + common.verbosePrint("\nSkipped test: %s" % msg) + finally: + common.verbosePrint("") # separator line between tests + + return newmethod + + @ignore_skipped + def test_method(self): + common.verbosePrint("* Condition is ``%s``." % cond) + # Replace bitwise operators with their logical counterparts. + pycond = cond + for ptop, pyop in [("&", "and"), ("|", "or"), ("~", "not")]: + pycond = pycond.replace(ptop, pyop) + pycond = compile(pycond, "", "eval") + + table = self.table + self.create_indexes(colname, ncolname, "c_idxextra") + + table_slice = dict(start=1, stop=table.nrows - 5, step=3) + rownos, fvalues = None, None + # Test that both simple and nested columns work as expected. + # Knowing how the table is filled, results must be the same. + for acolname in [colname, ncolname]: + # First the reference Python version. + pyrownos, pyfvalues, pyvars = [], [], condvars.copy() + for row in table.iterrows(**table_slice): + pyvars[colname] = row[acolname] + pyvars["c_extra"] = row["c_extra"] + pyvars["c_idxextra"] = row["c_idxextra"] + try: + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "invalid value encountered in arc(cos|sin)", + RuntimeWarning, + ) + isvalidrow = eval(pycond, func_info, pyvars) + except TypeError: + raise SilentlySkipTest( + "The Python type does not support the operation." + ) + if isvalidrow: + pyrownos.append(row.nrow) + pyfvalues.append(row[acolname]) + pyrownos = np.array(pyrownos) # row numbers already sorted + pyfvalues = np.array(pyfvalues, dtype=sctype) + pyfvalues.sort() + common.verbosePrint( + f"* {len(pyrownos)} rows selected by Python " + f"from ``{acolname}``." + ) + if rownos is None: + rownos = pyrownos # initialise reference results + fvalues = pyfvalues + else: + self.assertTrue(np.all(pyrownos == rownos)) # check + self.assertTrue(np.all(pyfvalues == fvalues)) + + # Then the in-kernel or indexed version. + ptvars = condvars.copy() + ptvars[colname] = table.colinstances[acolname] + ptvars["c_extra"] = table.colinstances["c_extra"] + ptvars["c_idxextra"] = table.colinstances["c_idxextra"] + try: + isidxq = table.will_query_use_indexing(cond, ptvars) + # Query twice to trigger possible query result caching. + ptrownos = [ + table.get_where_list( + cond, condvars, sort=True, **table_slice + ) + for _ in range(2) + ] + ptfvalues = [ + table.read_where( + cond, condvars, field=acolname, **table_slice + ) + for _ in range(2) + ] + except TypeError as te: + raise SilentlySkipTest("The condition is not boolean.") + except NotImplementedError: + raise SilentlySkipTest( + "The PyTables type does not support the operation." + ) + for ptfvals in ptfvalues: # row numbers already sorted + ptfvals.sort() + common.verbosePrint( + f"* {len(ptrownos[0])} rows selected by " + f"PyTables from ``{acolname}``", + nonl=True, + ) + common.verbosePrint(f"(indexing: {'yes' if isidxq else 'no'}).") + self.assertTrue(np.all(ptrownos[0] == rownos)) + self.assertTrue(np.all(ptfvalues[0] == fvalues)) + # The following test possible caching of query results. + self.assertTrue(np.all(ptrownos[0] == ptrownos[1])) + self.assertTrue(np.all(ptfvalues[0] == ptfvalues[1])) + + test_method.__doc__ = "Testing ``%s``." % cond + return test_method + + +def add_test_method(type_, op, extracond="", func=None): + global testn + # Decide to which set the test belongs. + heavy = type_ in heavy_types or op in heavy_operators + if heavy: + testfmt = TableDataTestCase._testfmt_heavy + else: + testfmt = TableDataTestCase._testfmt_light + tmethod = create_test_method(type_, op, extracond, func) + # The test number is appended to the docstring to help + # identify failing methods in non-verbose mode. + tmethod.__name__ = testfmt % testn + tmethod.__doc__ += testfmt % testn + setattr(TableDataTestCase, tmethod.__name__, tmethod) + testn += 1 + + +# Create individual tests. You may restrict which tests are generated +# by replacing the sequences in the ``for`` statements. For instance: +testn = 0 +for type_ in type_info: # for type_ in ['string']: + for op in operators: # for op in ['!=']: + for extracond in extra_conditions: # for extracond in ['']: + add_test_method(type_, op, extracond) + +for type_ in ["float32", "float64"]: + for func in func_info: # i for func in ['log10']: + for op in operators: + add_test_method(type_, op, func=func) + +# Base classes for non-indexed queries. +NX_BLOCK_SIZE1 = 128 # from ``interpreter.c`` in Numexpr +NX_BLOCK_SIZE2 = 8 # from ``interpreter.c`` in Numexpr + + +class SmallNITableMixin: + nrows = row_period * 2 + assert NX_BLOCK_SIZE2 < nrows < NX_BLOCK_SIZE1 + assert nrows % NX_BLOCK_SIZE2 != 0 # to have some residual rows + + +class BigNITableMixin: + nrows = row_period * 3 + assert nrows > NX_BLOCK_SIZE1 + NX_BLOCK_SIZE2 + assert nrows % NX_BLOCK_SIZE1 != 0 + assert nrows % NX_BLOCK_SIZE2 != 0 # to have some residual rows + + +# Parameters for non-indexed queries. +table_sizes = ["Small", "Big"] +heavy_table_sizes = frozenset(["Big"]) +table_ndims = ["Scalar"] # to enable multidimensional testing, include 'MD' + +# Non-indexed queries: ``[SB][SM]TDTestCase``, where: +# +# 1. S is for small and B is for big size table. +# Sizes are listed in `table_sizes`. +# 2. S is for scalar and M for multidimensional columns. +# Dimensionalities are listed in `table_ndims`. + + +def niclassdata(): + for size in table_sizes: + heavy = size in heavy_table_sizes + for ndim in table_ndims: + classname = f"{size[0]}{ndim[0]}TDTestCase" + cbasenames = ( + f"{size}NITableMixin", + f"{ndim}TableMixin", + "TableDataTestCase", + ) + classdict = dict(heavy=heavy) + yield (classname, cbasenames, classdict) + + +# Base classes for the different type index. +class UltraLightITableMixin: + kind = "ultralight" + + +class LightITableMixin: + kind = "light" + + +class MediumITableMixin: + kind = "medium" + + +class FullITableMixin: + kind = "full" + + +# Base classes for indexed queries. + + +class SmallSTableMixin: + nrows = 50 + + +class MediumSTableMixin: + nrows = 100 + + +class BigSTableMixin: + nrows = 500 + + +# Parameters for indexed queries. +ckinds = ["UltraLight", "Light", "Medium", "Full"] +itable_sizes = ["Small", "Medium", "Big"] +heavy_itable_sizes = frozenset(["Medium", "Big"]) +itable_optvalues = [0, 1, 3, 7, 9] +heavy_itable_optvalues = frozenset([0, 1, 7, 9]) + +# Indexed queries: ``[SMB]I[ulmf]O[01379]TDTestCase``, where: +# +# 1. S is for small, M for medium and B for big size table. +# Sizes are listed in `itable_sizes`. +# 2. U is for 'ultraLight', L for 'light', M for 'medium', F for 'Full' indexes +# Index types are listed in `ckinds`. +# 3. 0 to 9 is the desired index optimization level. +# Optimizations are listed in `itable_optvalues`. + + +def iclassdata(): + for ckind in ckinds: + for size in itable_sizes: + for optlevel in itable_optvalues: + heavy = ( + optlevel in heavy_itable_optvalues + or size in heavy_itable_sizes + ) + classname = "%sI%sO%dTDTestCase" % ( + size[0], + ckind[0], + optlevel, + ) + cbasenames = ( + "%sSTableMixin" % size, + "%sITableMixin" % ckind, + "ScalarTableMixin", + "TableDataTestCase", + ) + classdict = dict(heavy=heavy, optlevel=optlevel, indexed=True) + yield (classname, cbasenames, classdict) + + +# Create test classes. +for cdatafunc in [niclassdata, iclassdata]: + for cname, cbasenames, cdict in cdatafunc(): + cbases = tuple(eval(cbase) for cbase in cbasenames) + class_ = type(cname, cbases, cdict) + exec("%s = class_" % cname) + + +# Test cases on query usage +# ------------------------- +class BaseTableUsageTestCase(BaseTableQueryTestCase): + nrows = row_period + + +_gvar = None +"""Use this when a global variable is needed.""" + + +class ScalarTableUsageTestCase(ScalarTableMixin, BaseTableUsageTestCase): + """Test case for query usage on scalar tables. + + This also tests for most usage errors and situations. + + """ + + def test_empty_condition(self): + """Using an empty condition.""" + + self.assertRaises(SyntaxError, self.table.where, "") + + def test_syntax_error(self): + """Using a condition with a syntax error.""" + + self.assertRaises(SyntaxError, self.table.where, "foo bar") + + def test_unsupported_object(self): + """Using a condition with an unsupported object.""" + + self.assertRaises((TypeError, ValueError), self.table.where, "[]") + self.assertRaises(TypeError, self.table.where, "obj", {"obj": {}}) + self.assertRaises( + (TypeError, ValueError), self.table.where, "c_bool < []" + ) + + def test_unsupported_syntax(self): + """Using a condition with unsupported syntax.""" + + self.assertRaises( + (TypeError, ValueError), self.table.where, "c_bool[0]" + ) + self.assertRaises(TypeError, self.table.where, "c_bool()") + self.assertRaises(NameError, self.table.where, "c_bool.__init__") + + def test_no_column(self): + """Using a condition with no participating columns.""" + + self.assertRaises(ValueError, self.table.where, "True") + + def test_foreign_column(self): + """Using a condition with a column from other table.""" + + table2 = self.h5file.create_table("/", "other", self.tableDescription) + self.assertRaises( + ValueError, + self.table.where, + "c_int32_a + c_int32_b > 0", + { + "c_int32_a": self.table.cols.c_int32, + "c_int32_b": table2.cols.c_int32, + }, + ) + + def test_unsupported_op(self): + """Using a condition with unsupported operations on types.""" + + NIE = NotImplementedError + self.assertRaises(NIE, self.table.where, "c_complex128 > 0j") + self.assertRaises(NIE, self.table.where, 'c_string + b"a" > b"abc"') + + def test_not_boolean(self): + """Using a non-boolean condition.""" + + self.assertRaises(TypeError, self.table.where, "c_int32") + + def test_nested_col(self): + """Using a condition with nested columns.""" + + self.assertRaises(TypeError, self.table.where, "c_nested") + + def test_implicit_col(self): + """Using implicit column names in conditions.""" + + # If implicit columns didn't work, a ``NameError`` would be raised. + self.assertRaises(TypeError, self.table.where, "c_int32") + # If overriding didn't work, no exception would be raised. + self.assertRaises( + TypeError, + self.table.where, + "c_bool", + {"c_bool": self.table.cols.c_int32}, + ) + # External variables do not override implicit columns. + + def where_with_locals(): + c_int32 = self.table.cols.c_bool # this wouldn't cause an error + self.assertIsNotNone(c_int32) + self.table.where("c_int32") + + self.assertRaises(TypeError, where_with_locals) + + def test_condition_vars(self): + """Using condition variables in conditions.""" + + # If condition variables didn't work, a ``NameError`` would be raised. + self.assertRaises( + NotImplementedError, + self.table.where, + "c_string > bound", + {"bound": 0}, + ) + + def where_with_locals(): + bound = "foo" # this wouldn't cause an error + # silence pyflakes warnings + self.assertIsInstance(bound, str) + self.table.where("c_string > bound", {"bound": 0}) + + self.assertRaises(NotImplementedError, where_with_locals) + + def where_with_globals(): + global _gvar + _gvar = "foo" # this wouldn't cause an error + # silence pyflakes warnings + self.assertIsInstance(_gvar, str) + try: + self.table.where("c_string > _gvar", {"_gvar": 0}) + finally: + del _gvar # to keep global namespace clean + + self.assertRaises(NotImplementedError, where_with_globals) + + def test_scopes(self): + """Looking up different scopes for variables.""" + + # Make sure the variable is not implicit. + self.assertRaises(NameError, self.table.where, "col") + + # First scope: dictionary of condition variables. + self.assertRaises( + TypeError, + self.table.where, + "col", + {"col": self.table.cols.c_int32}, + ) + + # Second scope: local variables. + def where_whith_locals(): + col = self.table.cols.c_int32 + self.assertIsNotNone(col) + self.table.where("col") + + self.assertRaises(TypeError, where_whith_locals) + + # Third scope: global variables. + def where_with_globals(): + global _gvar + _gvar = self.table.cols.c_int32 + # silence pyflakes warnings + self.assertIsNotNone(_gvar) + try: + self.table.where("_gvar") + finally: + del _gvar # to keep global namespace clean + + self.assertRaises(TypeError, where_with_globals) + + +class MDTableUsageTestCase(MDTableMixin, BaseTableUsageTestCase): + """Test case for query usage on multidimensional tables.""" + + def test(self): + """Using a condition on a multidimensional table.""" + + # Easy: queries on multidimensional tables are not implemented yet! + self.assertRaises(NotImplementedError, self.table.where, "c_bool") + + +class IndexedTableUsage(ScalarTableMixin, BaseTableUsageTestCase): + """Test case for query usage on indexed tables. + + Indexing could be used in more cases, but it is expected to kick in + at least in the cases tested here. + + """ + + nrows = 50 + indexed = True + + def setUp(self): + super().setUp() + self.table.cols.c_bool.create_index(_blocksizes=small_blocksizes) + self.table.cols.c_int32.create_index(_blocksizes=small_blocksizes) + self.will_query_use_indexing = self.table.will_query_use_indexing + self.compileCondition = self.table._compile_condition + self.requiredExprVars = self.table._required_expr_vars + usable_idxs = set() + for expr in self.idx_expr: + idxvar = expr[0] + if idxvar not in usable_idxs: + usable_idxs.add(idxvar) + self.usable_idxs = frozenset(usable_idxs) + + def test(self): + for condition in self.conditions: + c_usable_idxs = self.will_query_use_indexing(condition, {}) + self.assertEqual( + c_usable_idxs, + self.usable_idxs, + f"\nQuery with condition: ``{condition}``\n" + f"Computed usable indexes are: " + f"``{c_usable_idxs}``\nand should be: " + f"``{self.usable_idxs}``", + ) + condvars = self.requiredExprVars(condition, None) + compiled = self.compileCondition(condition, condvars) + c_idx_expr = compiled.index_expressions + self.assertEqual( + c_idx_expr, + self.idx_expr, + f"\nWrong index expression in condition:\n" + f"``{condition}``\nCompiled index expression is:" + f"\n``{c_idx_expr}``\nand should be:\n" + f"``{self.idx_expr}``", + ) + c_str_expr = compiled.string_expression + self.assertEqual( + c_str_expr, + self.str_expr, + f"\nWrong index operations in condition:\n" + f"``{condition}``\nComputed index operations are:" + f"\n``{c_str_expr}``\nand should be:\n" + f"``{self.str_expr}``", + ) + common.verbosePrint( + f"* Query with condition ``{condition}`` will use variables " + f"``{compiled.index_variables}`` for indexing." + ) + + +class IndexedTableUsage1(IndexedTableUsage): + conditions = [ + "(c_int32 > 0)", + "(c_int32 > 0) & (c_extra > 0)", + "(c_int32 > 0) & ((~c_bool) | (c_extra > 0))", + "(c_int32 > 0) & ((c_extra < 3) & (c_extra > 0))", + ] + idx_expr = [("c_int32", ("gt",), (0,))] + str_expr = "e0" + + +class IndexedTableUsage2(IndexedTableUsage): + conditions = [ + "(c_int32 > 0) & (c_int32 < 5)", + "(c_int32 > 0) & (c_int32 < 5) & (c_extra > 0)", + "(c_int32 > 0) & (c_int32 < 5) & ((c_bool == True) | (c_extra > 0))", + "(c_int32 > 0) & (c_int32 < 5) & ((c_extra > 0) | (c_bool == True))", + ] + idx_expr = [("c_int32", ("gt", "lt"), (0, 5))] + str_expr = "e0" + + +class IndexedTableUsage3(IndexedTableUsage): + conditions = [ + "(c_bool == True)", + "(c_bool == True) & (c_extra > 0)", + "(c_extra > 0) & (c_bool == True)", + "((c_extra > 0) & (c_extra < 4)) & (c_bool == True)", + "(c_bool == True) & ((c_extra > 0) & (c_extra < 4))", + ] + idx_expr = [("c_bool", ("eq",), (True,))] + str_expr = "e0" + + +class IndexedTableUsage4(IndexedTableUsage): + conditions = [ + "((c_int32 > 0) & (c_bool == True)) & (c_extra > 0)", + "((c_int32 > 0) & (c_bool == True)) & ((c_extra > 0)" + + " & (c_extra < 4))", + ] + idx_expr = [ + ("c_int32", ("gt",), (0,)), + ("c_bool", ("eq",), (True,)), + ] + str_expr = "(e0 & e1)" + + +class IndexedTableUsage5(IndexedTableUsage): + conditions = [ + "(c_int32 >= 1) & (c_int32 < 2) & (c_bool == True)", + "(c_int32 >= 1) & (c_int32 < 2) & (c_bool == True)" + + " & (c_extra > 0)", + ] + idx_expr = [ + ("c_int32", ("ge", "lt"), (1, 2)), + ("c_bool", ("eq",), (True,)), + ] + str_expr = "(e0 & e1)" + + +class IndexedTableUsage6(IndexedTableUsage): + conditions = [ + "(c_int32 >= 1) & (c_int32 < 2) & (c_int32 > 0) & (c_int32 < 5)", + "(c_int32 >= 1) & (c_int32 < 2) & (c_int32 > 0) & (c_int32 < 5)" + + " & (c_extra > 0)", + ] + idx_expr = [ + ("c_int32", ("ge", "lt"), (1, 2)), + ("c_int32", ("gt",), (0,)), + ("c_int32", ("lt",), (5,)), + ] + str_expr = "((e0 & e1) & e2)" + + +class IndexedTableUsage7(IndexedTableUsage): + conditions = [ + "(c_int32 >= 1) & (c_int32 < 2) & ((c_int32 > 0) & (c_int32 < 5))", + "((c_int32 >= 1) & (c_int32 < 2)) & ((c_int32 > 0) & (c_int32 < 5))", + "((c_int32 >= 1) & (c_int32 < 2)) & ((c_int32 > 0) & (c_int32 < 5))" + + " & (c_extra > 0)", + ] + idx_expr = [ + ("c_int32", ("ge", "lt"), (1, 2)), + ("c_int32", ("gt", "lt"), (0, 5)), + ] + str_expr = "(e0 & e1)" + + +class IndexedTableUsage8(IndexedTableUsage): + conditions = [ + "(c_extra > 0) & ((c_int32 > 0) & (c_int32 < 5))", + ] + idx_expr = [ + ("c_int32", ("gt", "lt"), (0, 5)), + ] + str_expr = "e0" + + +class IndexedTableUsage9(IndexedTableUsage): + conditions = [ + "(c_extra > 0) & (c_int32 > 0) & (c_int32 < 5)", + "((c_extra > 0) & (c_int32 > 0)) & (c_int32 < 5)", + "(c_extra > 0) & (c_int32 > 0) & (c_int32 < 5) & (c_extra > 3)", + ] + idx_expr = [("c_int32", ("gt",), (0,)), ("c_int32", ("lt",), (5,))] + str_expr = "(e0 & e1)" + + +class IndexedTableUsage10(IndexedTableUsage): + conditions = [ + "(c_int32 < 5) & (c_extra > 0) & (c_bool == True)", + "(c_int32 < 5) & (c_extra > 2) & c_bool", + "(c_int32 < 5) & (c_bool == True) & (c_extra > 0) & (c_extra < 4)", + "(c_int32 < 5) & (c_extra > 0) & (c_bool == True) & (c_extra < 4)", + ] + idx_expr = [("c_int32", ("lt",), (5,)), ("c_bool", ("eq",), (True,))] + str_expr = "(e0 & e1)" + + +class IndexedTableUsage11(IndexedTableUsage): + """Complex operations are not eligible for indexing.""" + + conditions = [ + "sin(c_int32) > 0", + "(c_int32 * 2.4) > 0", + "(c_int32 + c_int32) > 0", + "c_int32**2 > 0", + ] + idx_expr = [] + str_expr = "" + + +class IndexedTableUsage12(IndexedTableUsage): + conditions = [ + "~c_bool", + "~(c_bool)", + "~c_bool & (c_extra > 0)", + "~(c_bool) & (c_extra > 0)", + ] + idx_expr = [("c_bool", ("eq",), (False,))] + str_expr = "e0" + + +class IndexedTableUsage13(IndexedTableUsage): + conditions = [ + "~(c_bool == True)", + "~((c_bool == True))", + "~(c_bool == True) & (c_extra > 0)", + "~((c_bool == True)) & (c_extra > 0)", + ] + idx_expr = [("c_bool", ("eq",), (False,))] + str_expr = "e0" + + +class IndexedTableUsage14(IndexedTableUsage): + conditions = [ + "~(c_int32 > 0)", + "~((c_int32 > 0)) & (c_extra > 0)", + "~(c_int32 > 0) & ((~c_bool) | (c_extra > 0))", + "~(c_int32 > 0) & ((c_extra < 3) & (c_extra > 0))", + ] + idx_expr = [("c_int32", ("le",), (0,))] + str_expr = "e0" + + +class IndexedTableUsage15(IndexedTableUsage): + conditions = [ + "(~(c_int32 > 0) | ~c_bool)", + "(~(c_int32 > 0) | ~(c_bool)) & (c_extra > 0)", + "(~(c_int32 > 0) | ~(c_bool == True)) & ((c_extra > 0)" + + " & (c_extra < 4))", + ] + idx_expr = [ + ("c_int32", ("le",), (0,)), + ("c_bool", ("eq",), (False,)), + ] + str_expr = "(e0 | e1)" + + +class IndexedTableUsage16(IndexedTableUsage): + conditions = [ + "(~(c_int32 > 0) & ~(c_int32 < 2))", + "(~(c_int32 > 0) & ~(c_int32 < 2)) & (c_extra > 0)", + "(~(c_int32 > 0) & ~(c_int32 < 2)) & ((c_extra > 0)" + + " & (c_extra < 4))", + ] + idx_expr = [ + ("c_int32", ("le",), (0,)), + ("c_int32", ("ge",), (2,)), + ] + str_expr = "(e0 & e1)" + + +class IndexedTableUsage17(IndexedTableUsage): + conditions = [ + "(~(c_int32 > 0) & ~(c_int32 < 2))", + "(~(c_int32 > 0) & ~(c_int32 < 2)) & (c_extra > 0)", + "(~(c_int32 > 0) & ~(c_int32 < 2)) & ((c_extra > 0)" + + " & (c_extra < 4))", + ] + idx_expr = [ + ("c_int32", ("le",), (0,)), + ("c_int32", ("ge",), (2,)), + ] + str_expr = "(e0 & e1)" + + +# Negations of complex conditions are not supported yet + + +class IndexedTableUsage18(IndexedTableUsage): + conditions = [ + "~((c_int32 > 0) & (c_bool))", + "~((c_int32 > 0) & (c_bool)) & (c_extra > 0)", + "~((c_int32 > 0) & (c_bool)) & ((c_extra > 0)" + " & (c_extra < 4))", + ] + idx_expr = [] + str_expr = "" + + +class IndexedTableUsage19(IndexedTableUsage): + conditions = [ + "~((c_int32 > 0) & (c_bool)) & ((c_bool == False)" + + " & (c_extra < 4))", + ] + idx_expr = [ + ("c_bool", ("eq",), (False,)), + ] + str_expr = "e0" + + +class IndexedTableUsage20(IndexedTableUsage): + conditions = [ + "((c_int32 > 0) & ~(c_bool))", + "((c_int32 > 0) & ~(c_bool)) & (c_extra > 0)", + "((c_int32 > 0) & ~(c_bool == True)) & ((c_extra > 0) & (c_extra < 4))", + ] + idx_expr = [ + ("c_int32", ("gt",), (0,)), + ("c_bool", ("eq",), (False,)), + ] + str_expr = "(e0 & e1)" + + +class IndexedTableUsage21(IndexedTableUsage): + conditions = [ + "(~(c_int32 > 0) & (c_bool))", + "(~(c_int32 > 0) & (c_bool)) & (c_extra > 0)", + "(~(c_int32 > 0) & (c_bool == True)) & ((c_extra > 0)" + + " & (c_extra < 4))", + ] + idx_expr = [ + ("c_int32", ("le",), (0,)), + ("c_bool", ("eq",), (True,)), + ] + str_expr = "(e0 & e1)" + + +class IndexedTableUsage22(IndexedTableUsage): + conditions = [ + "~((c_int32 >= 1) & (c_int32 < 2)) & ~(c_bool == True)", + "~(c_bool == True) & (c_extra > 0)", + "~((c_int32 >= 1) & (c_int32 < 2)) & (~(c_bool == True)" + + " & (c_extra > 0))", + ] + idx_expr = [ + ("c_bool", ("eq",), (False,)), + ] + str_expr = "e0" + + +class IndexedTableUsage23(IndexedTableUsage): + conditions = [ + "c_int32 != 1", + "c_bool != False", + "~(c_int32 != 1)", + "~(c_bool != False)", + "(c_int32 != 1) & (c_extra != 2)", + ] + idx_expr = [] + str_expr = "" + + +class IndexedTableUsage24(IndexedTableUsage): + conditions = [ + "c_bool", + "c_bool == True", + "True == c_bool", + "~(~c_bool)", + "~~c_bool", + "~~~~c_bool", + "~(~c_bool) & (c_extra != 2)", + ] + idx_expr = [ + ("c_bool", ("eq",), (True,)), + ] + str_expr = "e0" + + +class IndexedTableUsage25(IndexedTableUsage): + conditions = [ + "~c_bool", + "c_bool == False", + "False == c_bool", + "~(c_bool)", + "~((c_bool))", + "~~~c_bool", + "~~(~c_bool) & (c_extra != 2)", + ] + idx_expr = [ + ("c_bool", ("eq",), (False,)), + ] + str_expr = "e0" + + +class IndexedTableUsage26(IndexedTableUsage): + conditions = [ + "c_bool != True", + "True != c_bool", + "c_bool != False", + "False != c_bool", + ] + idx_expr = [] + str_expr = "" + + +class IndexedTableUsage27(IndexedTableUsage): + conditions = [ + "(c_int32 == 3) | c_bool | (c_int32 == 5)", + "(((c_int32 == 3) | (c_bool == True)) | (c_int32 == 5))" + + " & (c_extra > 0)", + ] + idx_expr = [ + ("c_int32", ("eq",), (3,)), + ("c_bool", ("eq",), (True,)), + ("c_int32", ("eq",), (5,)), + ] + str_expr = "((e0 | e1) | e2)" + + +class IndexedTableUsage28(IndexedTableUsage): + conditions = [ + "((c_int32 == 3) | c_bool) & (c_int32 == 5)", + "(((c_int32 == 3) | (c_bool == True)) & (c_int32 == 5))" + + " & (c_extra > 0)", + ] + idx_expr = [ + ("c_int32", ("eq",), (3,)), + ("c_bool", ("eq",), (True,)), + ("c_int32", ("eq",), (5,)), + ] + str_expr = "((e0 | e1) & e2)" + + +class IndexedTableUsage29(IndexedTableUsage): + conditions = [ + "(c_int32 == 3) | ((c_int32 == 4) & (c_int32 == 5))", + "((c_int32 == 3) | ((c_int32 == 4) & (c_int32 == 5)))" + + " & (c_extra > 0)", + ] + idx_expr = [ + ("c_int32", ("eq",), (4,)), + ("c_int32", ("eq",), (5,)), + ("c_int32", ("eq",), (3,)), + ] + str_expr = "((e0 & e1) | e2)" + + +class IndexedTableUsage30(IndexedTableUsage): + conditions = [ + "((c_int32 == 3) | (c_int32 == 4)) & (c_int32 == 5)", + "((c_int32 == 3) | (c_int32 == 4)) & (c_int32 == 5)" + + " & (c_extra > 0)", + ] + idx_expr = [ + ("c_int32", ("eq",), (3,)), + ("c_int32", ("eq",), (4,)), + ("c_int32", ("eq",), (5,)), + ] + str_expr = "((e0 | e1) & e2)" + + +class IndexedTableUsage31(IndexedTableUsage): + conditions = [ + "(c_extra > 0) & ((c_extra < 4) & (c_bool == True))", + "(c_extra > 0) & ((c_bool == True) & (c_extra < 5))", + "((c_int32 > 0) | (c_extra > 0)) & (c_bool == True)", + ] + idx_expr = [ + ("c_bool", ("eq",), (True,)), + ] + str_expr = "e0" + + +class IndexedTableUsage32(IndexedTableUsage): + conditions = [ + "(c_int32 < 5) & (c_extra > 0) & (c_bool == True) | (c_extra < 4)", + ] + idx_expr = [] + str_expr = "" + + +# Main part +# --------- +def suite(): + """Return a test suite consisting of all the test cases in the module.""" + + testSuite = common.unittest.TestSuite() + + cdatafuncs = [niclassdata] # non-indexing data tests + cdatafuncs.append(iclassdata) # indexing data tests + + heavy = common.heavy + # Choose which tests to run in classes with autogenerated tests. + if heavy: + autoprefix = "test" # all tests + else: + autoprefix = "test_l" # only light tests + + niter = 1 + for i in range(niter): + # Tests on query data. + for cdatafunc in cdatafuncs: + for cdata in cdatafunc(): + class_ = eval(cdata[0]) + if heavy or not class_.heavy: + suite_ = common.make_suite(class_, prefix=autoprefix) + testSuite.addTest(suite_) + # Tests on query usage. + testSuite.addTest(common.make_suite(ScalarTableUsageTestCase)) + testSuite.addTest(common.make_suite(MDTableUsageTestCase)) + testSuite.addTest(common.make_suite(IndexedTableUsage1)) + testSuite.addTest(common.make_suite(IndexedTableUsage2)) + testSuite.addTest(common.make_suite(IndexedTableUsage3)) + testSuite.addTest(common.make_suite(IndexedTableUsage4)) + testSuite.addTest(common.make_suite(IndexedTableUsage5)) + testSuite.addTest(common.make_suite(IndexedTableUsage6)) + testSuite.addTest(common.make_suite(IndexedTableUsage7)) + testSuite.addTest(common.make_suite(IndexedTableUsage8)) + testSuite.addTest(common.make_suite(IndexedTableUsage9)) + testSuite.addTest(common.make_suite(IndexedTableUsage10)) + testSuite.addTest(common.make_suite(IndexedTableUsage11)) + testSuite.addTest(common.make_suite(IndexedTableUsage12)) + testSuite.addTest(common.make_suite(IndexedTableUsage13)) + testSuite.addTest(common.make_suite(IndexedTableUsage14)) + testSuite.addTest(common.make_suite(IndexedTableUsage15)) + testSuite.addTest(common.make_suite(IndexedTableUsage16)) + testSuite.addTest(common.make_suite(IndexedTableUsage17)) + testSuite.addTest(common.make_suite(IndexedTableUsage18)) + testSuite.addTest(common.make_suite(IndexedTableUsage19)) + testSuite.addTest(common.make_suite(IndexedTableUsage20)) + testSuite.addTest(common.make_suite(IndexedTableUsage21)) + testSuite.addTest(common.make_suite(IndexedTableUsage22)) + testSuite.addTest(common.make_suite(IndexedTableUsage23)) + testSuite.addTest(common.make_suite(IndexedTableUsage24)) + testSuite.addTest(common.make_suite(IndexedTableUsage25)) + testSuite.addTest(common.make_suite(IndexedTableUsage26)) + testSuite.addTest(common.make_suite(IndexedTableUsage27)) + testSuite.addTest(common.make_suite(IndexedTableUsage28)) + testSuite.addTest(common.make_suite(IndexedTableUsage29)) + testSuite.addTest(common.make_suite(IndexedTableUsage30)) + testSuite.addTest(common.make_suite(IndexedTableUsage31)) + testSuite.addTest(common.make_suite(IndexedTableUsage32)) + + return testSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_ref_array1.mat b/venv/Lib/site-packages/tables/tests/test_ref_array1.mat new file mode 100644 index 0000000..d449504 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/test_ref_array1.mat differ diff --git a/venv/Lib/site-packages/tables/tests/test_ref_array2.mat b/venv/Lib/site-packages/tables/tests/test_ref_array2.mat new file mode 100644 index 0000000..0754e54 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/test_ref_array2.mat differ diff --git a/venv/Lib/site-packages/tables/tests/test_suite.py b/venv/Lib/site-packages/tables/tests/test_suite.py new file mode 100644 index 0000000..879dfed --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_suite.py @@ -0,0 +1,98 @@ +"""Test suite consisting of all testcases.""" + +import sys + +from tables.tests import common + + +def suite(): + test_modules = [ + "tables.tests.test_attributes", + "tables.tests.test_basics", + "tables.tests.test_create", + "tables.tests.test_backcompat", + "tables.tests.test_types", + "tables.tests.test_lists", + "tables.tests.test_tables", + "tables.tests.test_tablesMD", + "tables.tests.test_large_tables", + "tables.tests.test_array", + "tables.tests.test_earray", + "tables.tests.test_carray", + "tables.tests.test_vlarray", + "tables.tests.test_tree", + "tables.tests.test_timetype", + "tables.tests.test_do_undo", + "tables.tests.test_enum", + "tables.tests.test_nestedtypes", + "tables.tests.test_hdf5compat", + "tables.tests.test_numpy", + "tables.tests.test_queries", + "tables.tests.test_expression", + "tables.tests.test_links", + "tables.tests.test_indexes", + "tables.tests.test_indexvalues", + "tables.tests.test_index_backcompat", + "tables.tests.test_aux", + "tables.tests.test_utils", + "tables.tests.test_direct_chunk", + # Sub-packages + "tables.nodes.tests.test_filenode", + ] + + # print('-=' * 38) + + # The test for garbage must be run *in the last place*. + # Else, it is not as useful. + test_modules.append("tables.tests.test_garbage") + + alltests = common.unittest.TestSuite() + if common.show_memory: + # Add a memory report at the beginning + alltests.addTest(common.make_suite(common.ShowMemTime)) + for name in test_modules: + # Unexpectedly, the following code doesn't seem to work anymore + # in python 3 + # exec('from %s import suite as test_suite' % name) + __import__(name) + test_suite = sys.modules[name].suite + + alltests.addTest(test_suite()) + if common.show_memory: + # Add a memory report after each test module + alltests.addTest(common.make_suite(common.ShowMemTime)) + return alltests + + +def test(verbose=False, heavy=False): + """Run all the tests in the test suite. + + If *verbose* is set, the test suite will emit messages with full + verbosity (not recommended unless you are looking into a certain + problem). + + If *heavy* is set, the test suite will be run in *heavy* mode (you + should be careful with this because it can take a lot of time and + resources from your computer). + + Return 0 (os.EX_OK) if all tests pass, 1 in case of failure + + """ + + common.print_versions() + common.print_heavy(heavy) + + # What a context this is! + # oldverbose, common.verbose = common.verbose, verbose + oldheavy, common.heavy = common.heavy, heavy + try: + result = common.unittest.TextTestRunner( + verbosity=1 + int(verbose) + ).run(suite()) + if result.wasSuccessful(): + return 0 + else: + return 1 + finally: + # common.verbose = oldverbose + common.heavy = oldheavy # there are pretty young heavies, too ;) diff --git a/venv/Lib/site-packages/tables/tests/test_szip.h5 b/venv/Lib/site-packages/tables/tests/test_szip.h5 new file mode 100644 index 0000000..892fa4d Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/test_szip.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/test_tables.py b/venv/Lib/site-packages/tables/tests/test_tables.py new file mode 100644 index 0000000..e05052c --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_tables.py @@ -0,0 +1,8066 @@ +import sys +import struct +import platform +import tempfile +import itertools +from pathlib import Path + +import numpy as np + +import tables as tb +from tables.tests import common + + +# To know whether the interpreter is 32 or 64 bit +def is_python_64bit(): + return struct.calcsize("P") == 8 + + +# To know whether the os platform is 32 or 64 bit +def is_os_64bit(): + return platform.machine().endswith("64") + + +# Test Record class +class Record(tb.IsDescription): + var1 = tb.StringCol(itemsize=4, dflt=b"abcd", pos=0) # 4-character String + var2 = tb.IntCol(dflt=1, pos=1) # integer + var3 = tb.Int16Col(dflt=2, pos=2) # short integer + var4 = tb.Float64Col(dflt=3.1, pos=3) # double (double-precision) + var5 = tb.Float32Col(dflt=4.2, pos=4) # float (single-precision) + var6 = tb.UInt16Col(dflt=5, pos=5) # unsigned short integer + var7 = tb.StringCol(itemsize=1, dflt=b"e", pos=6) # 1-character String + var8 = tb.BoolCol(dflt=True, pos=7) # boolean + var9 = tb.ComplexCol( + itemsize=8, dflt=(0.0 + 1.0j), pos=8 + ) # Complex single precision + var10 = tb.ComplexCol( + itemsize=16, dflt=(1.0 - 0.0j), pos=9 + ) # Complex double precision + if hasattr(tb, "Float16Col"): + var11 = tb.Float16Col(dflt=6.4) # float (half-precision) + if hasattr(tb, "Float96Col"): + var12 = tb.Float96Col(dflt=6.4) # float (extended precision) + if hasattr(tb, "Float128Col"): + var13 = tb.Float128Col(dflt=6.4) # float (extended precision) + if hasattr(tb, "Complex192Col"): + var14 = tb.ComplexCol( + itemsize=24, dflt=(1.0 - 0.0j) + ) # Complex double (extended precision) + if hasattr(tb, "Complex256Col"): + var15 = tb.ComplexCol( + itemsize=32, dflt=(1.0 - 0.0j) + ) # Complex double (extended precision) + + +# Dictionary definition +RecordDescriptionDict = { + "var1": tb.StringCol(itemsize=4, dflt=b"abcd", pos=0), # 4-char String + "var2": tb.IntCol(dflt=1, pos=1), # integer + "var3": tb.Int16Col(dflt=2, pos=2), # short integer + "var4": tb.Float64Col(dflt=3.1, pos=3), # double (double-precision) + "var5": tb.Float32Col(dflt=4.2, pos=4), # float (single-precision) + "var6": tb.UInt16Col(dflt=5, pos=5), # unsigned short integer + "var7": tb.StringCol(itemsize=1, dflt=b"e", pos=6), # 1-character String + "var8": tb.BoolCol(dflt=True, pos=7), # boolean + "var9": tb.ComplexCol( + itemsize=8, dflt=(0.0 + 1.0j), pos=8 + ), # Complex single precision + "var10": tb.ComplexCol( + itemsize=16, dflt=(1.0 - 0.0j), pos=9 + ), # Complex double precision +} + +if hasattr(tb, "Float16Col"): + # float (half-precision) + RecordDescriptionDict["var11"] = tb.Float16Col(dflt=6.4) +if hasattr(tb, "Float96Col"): + # float (extended precision) + RecordDescriptionDict["var12"] = tb.Float96Col(dflt=6.4) +if hasattr(tb, "Float128Col"): + # float (extended precision) + RecordDescriptionDict["var13"] = tb.Float128Col(dflt=6.4) +if hasattr(tb, "Complex192Col"): + # Complex double (extended precision) + RecordDescriptionDict["var14"] = tb.ComplexCol( + itemsize=24, dflt=(1.0 - 0.0j) + ) +if hasattr(tb, "Complex256Col"): + # Complex double (extended precision) + RecordDescriptionDict["var15"] = tb.ComplexCol( + itemsize=32, dflt=(1.0 - 0.0j) + ) + + +# Old fashion of defining tables (for testing backward compatibility) +class OldRecord(tb.IsDescription): + var1 = tb.StringCol(itemsize=4, dflt=b"abcd", pos=0) + var2 = tb.Col.from_type("int32", (), 1, pos=1) + var3 = tb.Col.from_type("int16", (), 2, pos=2) + var4 = tb.Col.from_type("float64", (), 3.1, pos=3) + var5 = tb.Col.from_type("float32", (), 4.2, pos=4) + var6 = tb.Col.from_type("uint16", (), 5, pos=5) + var7 = tb.StringCol(itemsize=1, dflt=b"e", pos=6) + var8 = tb.Col.from_type("bool", shape=(), dflt=1, pos=7) + var9 = tb.ComplexCol(itemsize=8, shape=(), dflt=(0.0 + 1.0j), pos=8) + var10 = tb.ComplexCol(itemsize=16, shape=(), dflt=(1.0 - 0.0j), pos=9) + if hasattr(tb, "Float16Col"): + var11 = tb.Col.from_type("float16", (), 6.4) + if hasattr(tb, "Float96Col"): + var12 = tb.Col.from_type("float96", (), 6.4) + if hasattr(tb, "Float128Col"): + var13 = tb.Col.from_type("float128", (), 6.4) + if hasattr(tb, "Complex192Col"): + var14 = tb.ComplexCol(itemsize=24, shape=(), dflt=(1.0 - 0.0j)) + if hasattr(tb, "Complex256Col"): + var15 = tb.ComplexCol(itemsize=32, shape=(), dflt=(1.0 - 0.0j)) + + +class BasicTestCase(common.TempFileMixin, common.PyTablesTestCase): + # file = "test.h5" + open_mode = "w" + title = "This is the table title" + expectedrows = 100 + appendrows = 20 + compress = 0 + shuffle = 0 + bitshuffle = 0 + fletcher32 = 0 + complib = "zlib" # Default compression library + record = Record + recarrayinit = 0 + maxshort = 1 << 15 + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + self.rootgroup = self.h5file.root + self.populateFile() + self.h5file.close() + + def initRecArray(self): + record = self.recordtemplate + row = record[0] + buflist = [] + # Fill the recarray + for i in range(self.expectedrows): + tmplist = [] + var1 = "%04d" % (self.expectedrows - i) + tmplist.append(var1) + var2 = i + tmplist.append(var2) + var3 = i % self.maxshort + tmplist.append(var3) + if isinstance(row["var4"], np.ndarray): + tmplist.append([float(i), float(i * i)]) + else: + tmplist.append(float(i)) + if isinstance(row["var5"], np.ndarray): + tmplist.append(np.array((float(i),) * 4)) + else: + tmplist.append(float(i)) + # var6 will be like var3 but byteswaped + tmplist.append(((var3 >> 8) & 0xFF) + ((var3 << 8) & 0xFF00)) + var7 = var1[-1] + tmplist.append(var7) + if isinstance(row["var8"], np.ndarray): + tmplist.append([0, 10]) # should be equivalent to [0,1] + else: + tmplist.append(10) # should be equivalent to 1 + if isinstance(row["var9"], np.ndarray): + tmplist.append([0.0 + float(i) * 1j, float(i) + 0.0j]) + else: + tmplist.append(float(i) + 0j) + if isinstance(row["var10"], np.ndarray): + tmplist.append([float(i) + 0j, 1 + float(i) * 1j]) + else: + tmplist.append(1 + float(i) * 1j) + if hasattr(tb, "Float16Col"): + if isinstance(row["var11"], np.ndarray): + tmplist.append(np.array((float(i),) * 4)) + else: + tmplist.append(float(i)) + if hasattr(tb, "Float96Col"): + if isinstance(row["var12"], np.ndarray): + tmplist.append(np.array((float(i),) * 4)) + else: + tmplist.append(float(i)) + if hasattr(tb, "Float128Col"): + if isinstance(row["var13"], np.ndarray): + tmplist.append(np.array((float(i),) * 4)) + else: + tmplist.append(float(i)) + if hasattr(tb, "Complex192Col"): + if isinstance(row["var14"], np.ndarray): + tmplist.append([float(i) + 0j, 1 + float(i) * 1j]) + else: + tmplist.append(1 + float(i) * 1j) + if hasattr(tb, "Complex256Col"): + if isinstance(row["var15"], np.ndarray): + tmplist.append([float(i) + 0j, 1 + float(i) * 1j]) + else: + tmplist.append(1 + float(i) * 1j) + + buflist.append(tuple(tmplist)) + + self.record = np.rec.array( + buflist, dtype=record.dtype, shape=self.expectedrows + ) + + def populateFile(self): + group = self.rootgroup + if self.recarrayinit: + # Initialize a starting buffer, if any + self.initRecArray() + for j in range(3): + # Create a table + filterprops = tb.Filters( + complevel=self.compress, + shuffle=self.shuffle, + bitshuffle=self.bitshuffle, + fletcher32=self.fletcher32, + complib=self.complib, + ) + if j < 2: + byteorder = sys.byteorder + else: + # table2 will be byteswapped + byteorder = {"little": "big", "big": "little"}[sys.byteorder] + table = self.h5file.create_table( + group, + "table" + str(j), + self.record, + title=self.title, + filters=filterprops, + expectedrows=self.expectedrows, + byteorder=byteorder, + ) + if not self.recarrayinit: + # Get the row object associated with the new table + row = table.row + # Fill the table + for i in range(self.expectedrows): + s = "%04d" % (self.expectedrows - i) + row["var1"] = s.encode("ascii") + row["var7"] = s[-1].encode("ascii") + # row['var7'] = ('%04d' % (self.expectedrows - i))[-1] + row["var2"] = i + row["var3"] = i % self.maxshort + if isinstance(row["var4"], np.ndarray): + row["var4"] = [float(i), float(i * i)] + else: + row["var4"] = float(i) + if isinstance(row["var8"], np.ndarray): + row["var8"] = [0, 1] + else: + row["var8"] = 1 + if isinstance(row["var9"], np.ndarray): + row["var9"] = [0.0 + float(i) * 1j, float(i) + 0.0j] + else: + row["var9"] = float(i) + 0.0j + if isinstance(row["var10"], np.ndarray): + row["var10"] = [float(i) + 0.0j, 1.0 + float(i) * 1j] + else: + row["var10"] = 1.0 + float(i) * 1j + if isinstance(row["var5"], np.ndarray): + row["var5"] = np.array((float(i),) * 4) + else: + row["var5"] = float(i) + if hasattr(tb, "Float16Col"): + if isinstance(row["var11"], np.ndarray): + row["var11"] = np.array((float(i),) * 4) + else: + row["var11"] = float(i) + if hasattr(tb, "Float96Col"): + if isinstance(row["var12"], np.ndarray): + row["var12"] = np.array((float(i),) * 4) + else: + row["var12"] = float(i) + if hasattr(tb, "Float128Col"): + if isinstance(row["var13"], np.ndarray): + row["var13"] = np.array((float(i),) * 4) + else: + row["var13"] = float(i) + if hasattr(tb, "Complex192Col"): + if isinstance(row["var14"], np.ndarray): + row["var14"] = [float(i) + 0j, 1 + float(i) * 1j] + else: + row["var14"] = 1 + float(i) * 1j + if hasattr(tb, "Complex256Col"): + if isinstance(row["var15"], np.ndarray): + row["var15"] = [float(i) + 0j, 1 + float(i) * 1j] + else: + row["var15"] = 1 + float(i) * 1j + + # var6 will be like var3 but byteswaped + row["var6"] = ((row["var3"] >> 8) & 0xFF) + ( + (row["var3"] << 8) & 0xFF00 + ) + # print("Saving -->", row) + row.append() + + # Flush the buffer for this table + table.flush() + # Create a new group (descendant of group) + group2 = self.h5file.create_group(group, "group" + str(j)) + # Iterate over this new group (group2) + group = group2 + + def test00_description(self): + """Checking table description and descriptive fields.""" + + self.h5file = tb.open_file(self.h5fname) + + tbl = self.h5file.get_node("/table0") + desc = tbl.description + + if isinstance(self.record, dict): + columns = self.record + elif isinstance(self.record, np.ndarray): + descr, _ = tb.description.descr_from_dtype(self.record.dtype) + columns = descr._v_colobjects + elif isinstance(self.record, np.dtype): + descr, _ = tb.description.descr_from_dtype(self.record) + columns = descr._v_colobjects + else: + # This is an ordinary description. + columns = self.record.columns + + # Check table and description attributes at the same time. + # These checks are only valid for non-nested tables. + + # Column names. + fix_n_column = 10 + expectedNames = ["var%d" % n for n in range(1, fix_n_column + 1)] + types = ("float16", "float96", "float128", "complex192", "complex256") + for n, typename in enumerate(types, fix_n_column + 1): + name = typename.capitalize() + "Col" + if hasattr(tb, name): + expectedNames.append("var%d" % n) + + self.assertEqual(expectedNames, list(tbl.colnames)) + self.assertEqual(expectedNames, list(desc._v_names)) + + # Column instances. + for colname in expectedNames: + self.assertTrue( + tbl.colinstances[colname] is tbl.cols._f_col(colname) + ) + + # Column types. + expectedTypes = [columns[colname].dtype for colname in expectedNames] + self.assertEqual( + expectedTypes, [tbl.coldtypes[v] for v in expectedNames] + ) + self.assertEqual( + expectedTypes, [desc._v_dtypes[v] for v in expectedNames] + ) + + # Column string types. + expectedTypes = [columns[colname].type for colname in expectedNames] + self.assertEqual( + expectedTypes, [tbl.coltypes[v] for v in expectedNames] + ) + self.assertEqual( + expectedTypes, [desc._v_types[v] for v in expectedNames] + ) + + # Column defaults. + for v in expectedNames: + if common.verbose: + print("dflt-->", columns[v].dflt, type(columns[v].dflt)) + print("coldflts-->", tbl.coldflts[v], type(tbl.coldflts[v])) + print( + "desc.dflts-->", desc._v_dflts[v], type(desc._v_dflts[v]) + ) + self.assertTrue( + common.areArraysEqual(tbl.coldflts[v], columns[v].dflt) + ) + self.assertTrue( + common.areArraysEqual(desc._v_dflts[v], columns[v].dflt) + ) + + # Column path names. + self.assertEqual(expectedNames, list(desc._v_pathnames)) + + # Column objects. + for colName in expectedNames: + expectedCol = columns[colName] + col = desc._v_colobjects[colName] + + self.assertEqual(expectedCol.dtype, col.dtype) + self.assertEqual(expectedCol.type, col.type) + + def test01_readTable(self): + """Checking table read.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_readTable..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Choose a small value for buffer size + table.nrowsinbuf = 3 + # Read the records and select those with "var2" file less than 20 + result = [rec["var2"] for rec in table.iterrows() if rec["var2"] < 20] + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last record in table ==>", table[-1]) + print("Total selected records in table ==> ", len(result)) + nrows = self.expectedrows - 1 + rec = list(table.iterrows())[-1] + self.assertEqual( + (rec["var1"], rec["var2"], rec["var7"]), (b"0001", nrows, b"1") + ) + if isinstance(rec["var5"], np.ndarray): + self.assertTrue( + common.allequal( + rec["var5"], np.array((float(nrows),) * 4, np.float32) + ) + ) + else: + self.assertEqual(rec["var5"], float(nrows)) + if isinstance(rec["var9"], np.ndarray): + self.assertTrue( + common.allequal( + rec["var9"], + np.array( + [0.0 + float(nrows) * 1.0j, float(nrows) + 0.0j], + np.complex64, + ), + ) + ) + else: + self.assertEqual((rec["var9"]), float(nrows) + 0.0j) + self.assertEqual(len(result), 20) + + def test01a_fetch_all_fields(self): + """Checking table read (using Row.fetch_all_fields)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01a_fetch_all_fields..." + % self.__class__.__name__ + ) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Choose a small value for buffer size + table.nrowsinbuf = 3 + # Read the records and select those with "var2" file less than 20 + result = [ + rec.fetch_all_fields() + for rec in table.iterrows() + if rec["var2"] < 20 + ] + rec = result[-1] + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last record in table ==>", rec) + print("Total selected records in table ==> ", len(result)) + nrows = 20 - 1 + strnrows = "%04d" % (self.expectedrows - nrows) + strnrows = strnrows.encode("ascii") + self.assertEqual( + (rec["var1"], rec["var2"], rec["var7"]), (strnrows, nrows, b"1") + ) + if isinstance(rec["var5"], np.ndarray): + self.assertTrue( + common.allequal( + rec["var5"], np.array((float(nrows),) * 4, np.float32) + ) + ) + else: + self.assertEqual(rec["var5"], float(nrows)) + if isinstance(rec["var9"], np.ndarray): + self.assertTrue( + common.allequal( + rec["var9"], + np.array( + [0.0 + float(nrows) * 1.0j, float(nrows) + 0.0j], + np.complex64, + ), + ) + ) + else: + self.assertEqual(rec["var9"], float(nrows) + 0.0j) + self.assertEqual(len(result), 20) + + def test01a_integer(self): + """Checking table read (using Row[integer])""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01a_integer..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Choose a small value for buffer size + table.nrowsinbuf = 3 + # Read the records and select those with "var2" file less than 20 + result = [rec[1] for rec in table.iterrows() if rec["var2"] < 20] + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Total selected records in table ==> ", len(result)) + print("All results ==>", result) + self.assertEqual(len(result), 20) + self.assertEqual(result, list(range(20))) + + def test01a_extslice(self): + """Checking table read (using Row[::2])""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01a_extslice..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Choose a small value for buffer size + table.nrowsinbuf = 3 + # Read the records and select those with "var2" file less than 20 + result = [rec[::2] for rec in table.iterrows() if rec["var2"] < 20] + rec = result[-1] + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last record in table ==>", rec) + print("Total selected records in table ==> ", len(result)) + nrows = 20 - 1 + strnrows = "%04d" % (self.expectedrows - nrows) + strnrows = strnrows.encode("ascii") + self.assertEqual(rec[:2], (strnrows, 19)) + self.assertEqual(rec[3], b"1") + if isinstance(rec[2], np.ndarray): + self.assertTrue( + common.allequal( + rec[2], np.array((float(nrows),) * 4, np.float32) + ) + ) + else: + self.assertEqual(rec[2], nrows) + if isinstance(rec[4], np.ndarray): + self.assertTrue( + common.allequal( + rec[4], + np.array( + [0.0 + float(nrows) * 1.0j, float(nrows) + 0.0j], + np.complex64, + ), + ) + ) + else: + self.assertEqual(rec[4], float(nrows) + 0.0j) + self.assertEqual(len(result), 20) + + def test01a_nofield(self): + """Checking table read (using Row['no-field'])""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01a_nofield..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Check that a KeyError is raised + # self.assertRaises only work with functions + # self.assertRaises(KeyError, [rec['no-field'] for rec in table]) + with self.assertRaises(KeyError): + result = [rec["no-field"] for rec in table] + if common.verbose: + print("result:", result) + + def test01a_badtypefield(self): + """Checking table read (using Row[{}])""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01a_badtypefield..." % self.__class__.__name__ + ) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Check that a TypeError is raised + # self.assertRaises only work with functions + # self.assertRaises(TypeError, [rec[{}] for rec in table]) + with self.assertRaises(TypeError): + result = [rec[{}] for rec in table] + if common.verbose: + print("result:", result) + + def test01b_readTable(self): + """Checking table read and cuts (multidimensional columns case)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b_readTable..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Choose a small value for buffer size + table.nrowsinbuf = 3 + # Read the records and select those with "var2" file less than 20 + result = [rec["var5"] for rec in table.iterrows() if rec["var2"] < 20] + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last record in table ==>", table[-1]) + print("rec['var5'] ==>", table[-1]["var5"], end=" ") + print("nrows ==>", table.nrows) + print("Total selected records in table ==> ", len(result)) + nrows = table.nrows + rec = list(table.iterrows())[-1] + if isinstance(rec["var5"], np.ndarray): + np.testing.assert_array_equal( + result[0], np.array((float(0),) * 4, np.float32) + ) + np.testing.assert_array_equal( + result[1], np.array((float(1),) * 4, np.float32) + ) + np.testing.assert_array_equal( + result[2], np.array((float(2),) * 4, np.float32) + ) + np.testing.assert_array_equal( + result[3], np.array((float(3),) * 4, np.float32) + ) + np.testing.assert_array_equal( + result[10], np.array((float(10),) * 4, np.float32) + ) + np.testing.assert_array_equal( + rec["var5"], np.array((float(nrows - 1),) * 4, np.float32) + ) + else: + self.assertEqual(rec["var5"], float(nrows - 1)) + + # Read the records and select those with "var2" file less than 20 + result = [ + record["var10"] + for record in table.iterrows() + if record["var2"] < 20 + ] + if isinstance(rec["var10"], np.ndarray): + np.testing.assert_array_equal( + result[0], + np.array( + [float(0) + 0.0j, 1.0 + float(0) * 1j], np.complex128 + ), + ) + np.testing.assert_array_equal( + result[1], + np.array( + [float(1) + 0.0j, 1.0 + float(1) * 1j], np.complex128 + ), + ) + np.testing.assert_array_equal( + result[2], + np.array( + [float(2) + 0.0j, 1.0 + float(2) * 1j], np.complex128 + ), + ) + np.testing.assert_array_equal( + result[3], + np.array( + [float(3) + 0.0j, 1.0 + float(3) * 1j], np.complex128 + ), + ) + np.testing.assert_array_equal( + result[10], + np.array( + [float(10) + 0.0j, 1.0 + float(10) * 1j], np.complex128 + ), + ) + np.testing.assert_array_equal( + rec["var10"], + np.array( + [float(nrows - 1) + 0.0j, 1.0 + float(nrows - 1) * 1j], + np.complex128, + ), + ) + else: + self.assertEqual(rec["var10"], 1.0 + float(nrows - 1) * 1j) + self.assertEqual(len(result), 20) + + def test01c_readTable(self): + """Checking nested iterators (reading)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01c_readTable..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Read the records and select those with "var2" file less than 20 + result = [] + for rec in table.iterrows(stop=2): + for rec2 in table.iterrows(stop=2): + if rec2["var2"] < 20: + result.append([rec["var2"], rec2["var2"]]) + if common.verbose: + print("result ==>", result) + + self.assertEqual(result, [[0, 0], [0, 1], [1, 0], [1, 1]]) + + def test01d_readTable(self): + """Checking nested iterators (reading, mixed conditions)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01d_readTable..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Read the records and select those with "var2" file less than 20 + result = [] + for rec in table.iterrows(stop=2): + for rec2 in table.where("var2 < 20", stop=2): + result.append([rec["var2"], rec2["var2"]]) + if common.verbose: + print("result ==>", result) + + self.assertEqual(result, [[0, 0], [0, 1], [1, 0], [1, 1]]) + + def test01e_readTable(self): + """Checking nested iterators (reading, both conditions)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01e_readTable..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Read the records and select those with "var2" file less than 20 + result = [] + for rec in table.where("var3 < 2"): + for rec2 in table.where("var2 < 3"): + result.append([rec["var2"], rec2["var3"]]) + if common.verbose: + print("result ==>", result) + + self.assertEqual( + result, [[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2]] + ) + + def test01f_readTable(self): + """Checking nested iterators (reading, break in the loop)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01f_readTable..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Read the records and select those with "var2" file less than 20 + result = [] + for rec in table.where("var3 < 2"): + for rec2 in table.where("var2 < 4"): + if rec2["var2"] >= 3: + break + result.append([rec["var2"], rec2["var3"]]) + if common.verbose: + print("result ==>", result) + + self.assertEqual( + result, [[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2]] + ) + + def test01g_readTable(self): + """Checking iterator with an evanescent table.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01g_readTable..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + + # Read from an evanescent table + result = [ + rec["var2"] + for rec in self.h5file.get_node("/table0") + if rec["var2"] < 20 + ] + + self.assertEqual(len(result), 20) + + def test02_AppendRows(self): + """Checking whether appending record rows works or not.""" + + # Now, open it, but in "append" mode + self.h5file = tb.open_file(self.h5fname, mode="a") + self.rootgroup = self.h5file.root + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_AppendRows..." % self.__class__.__name__) + + # Get a table + table = self.h5file.get_node("/group0/table1") + # Get their row object + row = table.row + if common.verbose: + print("Nrows in old", table._v_pathname, ":", table.nrows) + print("Record Format ==>", table.description._v_nested_formats) + print("Record Size ==>", table.rowsize) + # Append some rows + for i in range(self.appendrows): + s = "%04d" % (self.appendrows - i) + row["var1"] = s.encode("ascii") + row["var7"] = s[-1].encode("ascii") + row["var2"] = i + row["var3"] = i % self.maxshort + if isinstance(row["var4"], np.ndarray): + row["var4"] = [float(i), float(i * i)] + else: + row["var4"] = float(i) + if isinstance(row["var8"], np.ndarray): + row["var8"] = [0, 1] + else: + row["var8"] = 1 + if isinstance(row["var9"], np.ndarray): + row["var9"] = [0.0 + float(i) * 1j, float(i) + 0.0j] + else: + row["var9"] = float(i) + 0.0j + if isinstance(row["var10"], np.ndarray): + row["var10"] = [float(i) + 0.0j, 1.0 + float(i) * 1j] + else: + row["var10"] = 1.0 + float(i) * 1j + if isinstance(row["var5"], np.ndarray): + row["var5"] = np.array((float(i),) * 4) + else: + row["var5"] = float(i) + if hasattr(tb, "Float16Col"): + if isinstance(row["var11"], np.ndarray): + row["var11"] = np.array((float(i),) * 4) + else: + row["var11"] = float(i) + if hasattr(tb, "Float96Col"): + if isinstance(row["var12"], np.ndarray): + row["var12"] = np.array((float(i),) * 4) + else: + row["var12"] = float(i) + if hasattr(tb, "Float128Col"): + if isinstance(row["var13"], np.ndarray): + row["var13"] = np.array((float(i),) * 4) + else: + row["var13"] = float(i) + if hasattr(tb, "Complex192Col"): + if isinstance(row["var14"], np.ndarray): + row["var14"] = [float(i) + 0j, 1 + float(i) * 1j] + else: + row["var14"] = 1 + float(i) * 1j + if hasattr(tb, "Complex256Col"): + if isinstance(row["var15"], np.ndarray): + row["var15"] = [float(i) + 0j, 1 + float(i) * 1j] + else: + row["var15"] = 1 + float(i) * 1j + + row.append() + + # Flush the buffer for this table and read it + table.flush() + result = [r["var2"] for r in table.iterrows() if r["var2"] < 20] + + nrows = self.appendrows - 1 + row = list(table.iterrows())[-1] + self.assertEqual( + (row["var1"], row["var2"], row["var7"]), (b"0001", nrows, b"1") + ) + if isinstance(row["var5"], np.ndarray): + self.assertTrue( + common.allequal( + row["var5"], np.array((float(nrows),) * 4, np.float32) + ) + ) + else: + self.assertEqual(row["var5"], float(nrows)) + if self.appendrows <= 20: + add = self.appendrows + else: + add = 20 + self.assertEqual(len(result), 20 + add) # because we appended new rows + + # This test has been commented out because appending records without + # flushing them explicitely is being warned from now on. + # F. Alted 2006-08-03 + def _test02a_AppendRows(self): + """Checking appending records without flushing explicitly.""" + + # Now, open it, but in "append" mode + self.h5file = tb.open_file(self.h5fname, mode="a") + self.rootgroup = self.h5file.root + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02a_AppendRows..." % self.__class__.__name__) + + group = self.rootgroup + for group_i in range(3): + # Get a table + table = self.h5file.get_node(group, "table" + str(group_i)) + # Get the next group + group = self.h5file.get_node(group, "group" + str(group_i)) + # Get their row object + row = table.row + if common.verbose: + print("Nrows in old", table._v_pathname, ":", table.nrows) + print("Record Format ==>", table.description._v_nested_formats) + print("Record Size ==>", table.rowsize) + # Append some rows + for row_i in range(self.appendrows): + row["var1"] = "%04d" % (self.appendrows - row_i) + row["var7"] = row["var1"][-1] + row["var2"] = row_i + row["var3"] = row_i % self.maxshort + if isinstance(row["var4"], np.ndarray): + row["var4"] = [float(row_i), float(row_i * row_i)] + else: + row["var4"] = float(row_i) + if isinstance(row["var8"], np.ndarray): + row["var8"] = [0, 1] + else: + row["var8"] = 1 + if isinstance(row["var9"], np.ndarray): + row["var9"] = [ + 0.0 + float(row_i) * 1j, + float(row_i) + 0.0j, + ] + else: + row["var9"] = float(row_i) + 0.0j + if isinstance(row["var10"], np.ndarray): + row["var10"] = [ + float(row_i) + 0.0j, + 1.0 + float(row_i) * 1j, + ] + else: + row["var10"] = 1.0 + float(row_i) * 1j + if isinstance(row["var5"], np.ndarray): + row["var5"] = np.array((float(row_i),) * 4) + else: + row["var5"] = float(row_i) + if hasattr(tb, "Float16Col"): + if isinstance(row["var11"], np.ndarray): + row["var11"] = np.array((float(row_i),) * 4) + else: + row["var11"] = float(row_i) + if hasattr(tb, "Float96Col"): + if isinstance(row["var12"], np.ndarray): + row["var12"] = np.array((float(row_i),) * 4) + else: + row["var12"] = float(row_i) + if hasattr(tb, "Float128Col"): + if isinstance(row["var13"], np.ndarray): + row["var13"] = np.array((float(row_i),) * 4) + else: + row["var13"] = float(row_i) + if hasattr(tb, "Complex192Col"): + if isinstance(row["var14"], np.ndarray): + row["var14"] = [ + float(row_i) + 0j, + 1 + float(row_i) * 1j, + ] + else: + row["var14"] = 1 + float(row_i) * 1j + if hasattr(tb, "Complex256Col"): + if isinstance(row["var15"], np.ndarray): + row["var15"] = [ + float(row_i) + 0j, + 1 + float(row_i) * 1j, + ] + else: + row["var15"] = 1 + float(row_i) * 1j + + row.append() + table.flush() + + # Close the file and re-open it. + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname, mode="a") + table = self.h5file.root.table0 + # Flush the buffer for this table and read it + result = [r["var2"] for r in table.iterrows() if r["var2"] < 20] + + nrows = self.appendrows - 1 + self.assertEqual( + (row["var1"], row["var2"], row["var7"]), ("0001", nrows, "1") + ) + if isinstance(row["var5"], np.ndarray): + self.assertTrue( + common.allequal( + row["var5"], np.array((float(nrows),) * 4, np.float32) + ) + ) + else: + self.assertEqual(row["var5"], float(nrows)) + if self.appendrows <= 20: + add = self.appendrows + else: + add = 20 + self.assertEqual(len(result), 20 + add) # because we appended new rows + + def test02b_AppendRows(self): + """Checking whether appending *and* reading rows works or not""" + + # Now, open it, but in "append" mode + self.h5file = tb.open_file(self.h5fname, mode="a") + self.rootgroup = self.h5file.root + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02b_AppendRows..." % self.__class__.__name__) + + # Get a table + table = self.h5file.get_node("/group0/table1") + if common.verbose: + print("Nrows in old", table._v_pathname, ":", table.nrows) + print("Record Format ==>", table.description._v_nested_formats) + print("Record Size ==>", table.rowsize) + # Set a small number of buffer to make this test faster + table.nrowsinbuf = 3 + # Get their row object + row = table.row + # Append some rows (3 * table.nrowsinbuf is enough for + # checking purposes) + for i in range(3 * table.nrowsinbuf): + s = "%04d" % (self.appendrows - i) + row["var1"] = s.encode("ascii") + row["var7"] = s[-1].encode("ascii") + # row['var7'] = table.cols['var1'][i][-1] + row["var2"] = i + row["var3"] = i % self.maxshort + if isinstance(row["var4"], np.ndarray): + row["var4"] = [float(i), float(i * i)] + else: + row["var4"] = float(i) + if isinstance(row["var8"], np.ndarray): + row["var8"] = [0, 1] + else: + row["var8"] = 1 + if isinstance(row["var9"], np.ndarray): + row["var9"] = [0.0 + float(i) * 1j, float(i) + 0.0j] + else: + row["var9"] = float(i) + 0.0j + if isinstance(row["var10"], np.ndarray): + row["var10"] = [float(i) + 0.0j, 1.0 + float(i) * 1j] + else: + row["var10"] = 1.0 + float(i) * 1j + if isinstance(row["var5"], np.ndarray): + row["var5"] = np.array((float(i),) * 4) + else: + row["var5"] = float(i) + if hasattr(tb, "Float16Col"): + if isinstance(row["var11"], np.ndarray): + row["var11"] = np.array((float(i),) * 4) + else: + row["var11"] = float(i) + if hasattr(tb, "Float96Col"): + if isinstance(row["var12"], np.ndarray): + row["var12"] = np.array((float(i),) * 4) + else: + row["var12"] = float(i) + if hasattr(tb, "Float128Col"): + if isinstance(row["var13"], np.ndarray): + row["var13"] = np.array((float(i),) * 4) + else: + row["var13"] = float(i) + if hasattr(tb, "Complex192Col"): + if isinstance(row["var14"], np.ndarray): + row["var14"] = [float(i) + 0j, 1 + float(i) * 1j] + else: + row["var14"] = 1 + float(i) * 1j + if hasattr(tb, "Complex256Col"): + if isinstance(row["var15"], np.ndarray): + row["var15"] = [float(i) + 0j, 1 + float(i) * 1j] + else: + row["var15"] = 1 + float(i) * 1j + + row.append() + # We are closing and reopening in 'r'ead-only instead of flushing for + # making Windows use the Blosc2 optimized path for reading chunks + # table.flush() + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, mode="r") + table = self.h5file.get_node("/group0/table1") + table.nrowsinbuf = 3 + row = table.row + result = [row2["var2"] for row2 in table] + # warning! the next will result into wrong results + # result = [ row['var2'] for row in table ] + # This is because the iterator for writing and for reading + # cannot be shared! + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, mode="a") + table = self.h5file.get_node("/group0/table1") + table.nrowsinbuf = 3 + row = table.row + + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, mode="r") + table = self.h5file.get_node("/group0/table1") + table.nrowsinbuf = 3 + + # print(table.read()) + + result = [ + row3["var2"] for row3 in table.iterrows() if row3["var2"] < 20 + ] + if common.verbose: + print("Result length ==>", len(result)) + print("Result contents ==>", result) + self.assertEqual(len(result), 20 + 3 * table.nrowsinbuf) + self.assertEqual( + result, + [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + ], + ) + # Check consistency of I/O buffers when doing mixed I/O operations + # That is, the next should work in these operations + # row['var1'] = '%04d' % (self.appendrows - i) + # row['var7'] = row['var1'][-1] + result7 = [ + row4["var7"] for row4 in table.iterrows() if row4["var2"] < 20 + ] + if common.verbose: + print("Result7 length ==>", len(result7)) + print("Result7 contents ==>", result7) + self.assertEqual( + result7, + [ + b"0", + b"9", + b"8", + b"7", + b"6", + b"5", + b"4", + b"3", + b"2", + b"1", + b"0", + b"9", + b"8", + b"7", + b"6", + b"5", + b"4", + b"3", + b"2", + b"1", + b"0", + b"9", + b"8", + b"7", + b"6", + b"5", + b"4", + b"3", + b"2", + ], + ) + + def test02d_AppendRows(self): + """Checking appending using the same Row object after flushing.""" + + # This test is kind of magic, but it is a good sanity check anyway. + + # Now, open it, but in "append" mode + self.h5file = tb.open_file(self.h5fname, mode="a") + self.rootgroup = self.h5file.root + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02d_AppendRows..." % self.__class__.__name__) + + # Get a table + table = self.h5file.get_node("/group0/table1") + if common.verbose: + print("Nrows in old", table._v_pathname, ":", table.nrows) + print("Record Format ==>", table.description._v_nested_formats) + print("Record Size ==>", table.rowsize) + # Set a small number of buffer to make this test faster + table.nrowsinbuf = 3 + # Get their row object + row = table.row + # Append some rows + for i in range(10): + row["var2"] = 100 + i + row.append() + # Force a flush + table.flush() + # Add new rows + for i in range(9): + row["var2"] = 110 + i + row.append() + table.flush() # XXX al eliminar... + result = [ + r["var2"] for r in table.iterrows() if 100 <= r["var2"] < 120 + ] + if common.verbose: + print("Result length ==>", len(result)) + print("Result contents ==>", result) + if table.nrows > 119: + # Case for big tables + self.assertEqual(len(result), 39) + self.assertEqual( + result, + [ + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + ], + ) + else: + self.assertEqual(len(result), 19) + self.assertEqual( + result, + [ + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + ], + ) + + def test02e_AppendRows(self): + """Checking appending using the Row of an unreferenced table.""" + # See ticket #94 (http://www.pytables.org/trac/ticket/94). + + # Reopen the file in append mode. + self.h5file = tb.open_file(self.h5fname, mode="a") + + # Get the row handler which will outlive the reference to the table. + table = self.h5file.get_node("/group0/table1") + oldnrows = table.nrows + row = table.row + + # Few appends are made to avoid flushing the buffers in ``row``. + + # First case: append to an alive (referenced) table. + row.append() + table.flush() + newnrows = table.nrows + self.assertEqual( + newnrows, oldnrows + 1, "Append to alive table failed." + ) + + if self.h5file._node_manager.cache.nslots == 0: + # Skip this test from here on because the second case + # won't work when thereis not a node cache. + return + + # Second case: append to a dead (unreferenced) table. + del table + row.append() + table = self.h5file.get_node("/group0/table1") + table.flush() + newnrows = table.nrows + self.assertEqual( + newnrows, oldnrows + 2, "Append to dead table failed." + ) + + def test02f_AppendRows(self): + """Checking whether blosc2 optimized appending *and* reading rows works or not""" + + class Particle(tb.IsDescription): + name = tb.StringCol(16, pos=1) # 16-character String + lati = tb.Int32Col(pos=2) # integer + longi = tb.Int32Col(pos=3) # integer + pressure = tb.Float32Col(pos=4) # float (single-precision) + temperature = tb.Float64Col(pos=5) # double (double-precision) + + # Now, open it, but in "append" mode + self.h5file = tb.open_file(self.h5fname, mode="a") + + # Create a new group + group = self.h5file.create_group(self.h5file.root, "newgroup") + + # Create a new table in newgroup group + table = self.h5file.create_table( + group, + "table", + Particle, + "A table", + tb.Filters( + complevel=self.compress, + shuffle=bool(self.shuffle), + bitshuffle=bool(self.bitshuffle), + complib=self.complib, + ), + chunkshape=3, + ) + + self.rootgroup = self.h5file.root.newgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02f_AppendRows..." % self.__class__.__name__) + + if common.verbose: + print("Nrows in old", table._v_pathname, ":", table.nrows) + print("Record Format ==>", table.description._v_nested_formats) + print("Record Size ==>", table.rowsize) + + # Add a couple of user attrs + table.attrs.user_attr1 = 1.023 + table.attrs.user_attr2 = "This is the second user attr" + + # Append several rows in only one call + for i in range(10): + table.append( + [(f"Particle: {i:6d}", i, 10 - i, float(i * i), float(i**2))] + ) + + table.append( + [ + ("Particle: 10", 10, 0, 10 * 10, 10**2), + ("Particle: 11", 11, -1, 11 * 11, 11**2), + ("Particle: 12", 12, -2, 12 * 12, 12**2), + ] + ) + + table.append( + [ + ("Particle: 13", 13, -3, 13 * 13, 13**2), + ("Particle: 14", 14, -4, 14 * 14, 14**2), + ] + ) + + for i in range(10): + j = i + 1 + k = i * i + l = k + 1 + table.append( + [ + ( + f"Particle: {i:6d}", + i, + 10 - i, + float(i * i), + float(i**2), + ), + ( + f"Particle: {j:6d}", + j, + 10 - j, + float(j * j), + float(j**2), + ), + ( + f"Particle: {k:6d}", + k, + 10 - k, + float(k * k), + float(k**2), + ), + ( + f"Particle: {l:6d}", + l, + 10 - l, + float(l * l), + float(l**2), + ), + ] + ) + + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, mode="r") + self.rootgroup = self.h5file.root.newgroup + table = self.rootgroup.table + + result = [row[:] for row in table.iterrows()] + # result = table[:].tolist() + if common.verbose: + print("Result length ==>", len(result)) + print("Result contents ==>", result) + self.assertEqual(len(result), 10 + 3 + 2 + 10 * 4) + self.assertEqual( + result, + [ + (b"Particle: 0", 0, 10, 0.0, 0.0), + (b"Particle: 1", 1, 9, 1.0, 1.0), + (b"Particle: 2", 2, 8, 4.0, 4.0), + (b"Particle: 3", 3, 7, 9.0, 9.0), + (b"Particle: 4", 4, 6, 16.0, 16.0), + (b"Particle: 5", 5, 5, 25.0, 25.0), + (b"Particle: 6", 6, 4, 36.0, 36.0), + (b"Particle: 7", 7, 3, 49.0, 49.0), + (b"Particle: 8", 8, 2, 64.0, 64.0), + (b"Particle: 9", 9, 1, 81.0, 81.0), + (b"Particle: 10", 10, 0, 100.0, 100.0), + (b"Particle: 11", 11, -1, 121.0, 121.0), + (b"Particle: 12", 12, -2, 144.0, 144.0), + (b"Particle: 13", 13, -3, 169.0, 169.0), + (b"Particle: 14", 14, -4, 196.0, 196.0), + (b"Particle: 0", 0, 10, 0.0, 0.0), + (b"Particle: 1", 1, 9, 1.0, 1.0), + (b"Particle: 0", 0, 10, 0.0, 0.0), + (b"Particle: 1", 1, 9, 1.0, 1.0), + (b"Particle: 1", 1, 9, 1.0, 1.0), + (b"Particle: 2", 2, 8, 4.0, 4.0), + (b"Particle: 1", 1, 9, 1.0, 1.0), + (b"Particle: 2", 2, 8, 4.0, 4.0), + (b"Particle: 2", 2, 8, 4.0, 4.0), + (b"Particle: 3", 3, 7, 9.0, 9.0), + (b"Particle: 4", 4, 6, 16.0, 16.0), + (b"Particle: 5", 5, 5, 25.0, 25.0), + (b"Particle: 3", 3, 7, 9.0, 9.0), + (b"Particle: 4", 4, 6, 16.0, 16.0), + (b"Particle: 9", 9, 1, 81.0, 81.0), + (b"Particle: 10", 10, 0, 100.0, 100.0), + (b"Particle: 4", 4, 6, 16.0, 16.0), + (b"Particle: 5", 5, 5, 25.0, 25.0), + (b"Particle: 16", 16, -6, 256.0, 256.0), + (b"Particle: 17", 17, -7, 289.0, 289.0), + (b"Particle: 5", 5, 5, 25.0, 25.0), + (b"Particle: 6", 6, 4, 36.0, 36.0), + (b"Particle: 25", 25, -15, 625.0, 625.0), + (b"Particle: 26", 26, -16, 676.0, 676.0), + (b"Particle: 6", 6, 4, 36.0, 36.0), + (b"Particle: 7", 7, 3, 49.0, 49.0), + (b"Particle: 36", 36, -26, 1296.0, 1296.0), + (b"Particle: 37", 37, -27, 1369.0, 1369.0), + (b"Particle: 7", 7, 3, 49.0, 49.0), + (b"Particle: 8", 8, 2, 64.0, 64.0), + (b"Particle: 49", 49, -39, 2401.0, 2401.0), + (b"Particle: 50", 50, -40, 2500.0, 2500.0), + (b"Particle: 8", 8, 2, 64.0, 64.0), + (b"Particle: 9", 9, 1, 81.0, 81.0), + (b"Particle: 64", 64, -54, 4096.0, 4096.0), + (b"Particle: 65", 65, -55, 4225.0, 4225.0), + (b"Particle: 9", 9, 1, 81.0, 81.0), + (b"Particle: 10", 10, 0, 100.0, 100.0), + (b"Particle: 81", 81, -71, 6561.0, 6561.0), + (b"Particle: 82", 82, -72, 6724.0, 6724.0), + ], + ) + + def test02g_AppendRows(self): + """Checking whether blosc2 optimized appending *and* reading rows works or not""" + + class Particle(tb.IsDescription): + name = tb.StringCol(16, pos=1) # 16-character String + lati = tb.Int32Col(pos=2) # integer + longi = tb.Int32Col(pos=3) # integer + pressure = tb.Float32Col(pos=4) # float (single-precision) + temperature = tb.Float64Col(pos=5) # double (double-precision) + + # Now, open it, but in "append" mode + self.h5file = tb.open_file(self.h5fname, mode="a") + + # Create a new group + group = self.h5file.create_group(self.h5file.root, "newgroup") + + # Create a new table in newgroup group + table = self.h5file.create_table( + group, + "table", + Particle, + "A table", + tb.Filters( + complevel=self.compress, + shuffle=bool(self.shuffle), + bitshuffle=bool(self.bitshuffle), + complib=self.complib, + ), + chunkshape=3, + ) + + self.rootgroup = self.h5file.root.newgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02g_AppendRows..." % self.__class__.__name__) + + if common.verbose: + print("Nrows in old", table._v_pathname, ":", table.nrows) + print("Record Format ==>", table.description._v_nested_formats) + print("Record Size ==>", table.rowsize) + + # Add a couple of user attrs + table.attrs.user_attr1 = 1.023 + table.attrs.user_attr2 = "This is the second user attr" + + # Append several rows in only one call + for j in range(50): + i = 13 * j + table.append( + [(f"Particle: {i:6d}", i, 10 - i, float(i * i), float(i**2))] + ) + + table.append( + [ + ( + f"Particle: {i+1:6d}", + i + 1, + 10 - (i + 1), + float((i + 1) * (i + 1)), + float((i + 1) ** 2), + ), + ( + f"Particle: {i+2:6d}", + i + 2, + 10 - (i + 2), + float((i + 2) * (i + 2)), + float((i + 2) ** 2), + ), + ( + f"Particle: {i+3:6d}", + i + 3, + 10 - (i + 3), + float((i + 3) * (i + 3)), + float((i + 3) ** 2), + ), + ] + ) + + table.append( + [ + ( + f"Particle: {i+4:6d}", + i + 4, + 10 - (i + 4), + float((i + 4) * (i + 4)), + float((i + 4) ** 2), + ), + ( + f"Particle: {i+5:6d}", + i + 5, + 10 - (i + 5), + float((i + 5) * (i + 5)), + float((i + 5) ** 2), + ), + ( + f"Particle: {i+6:6d}", + i + 6, + 10 - (i + 6), + float((i + 6) * (i + 6)), + float((i + 6) ** 2), + ), + ( + f"Particle: {i+7:6d}", + i + 7, + 10 - (i + 7), + float((i + 7) * (i + 7)), + float((i + 7) ** 2), + ), + ] + ) + + table.append( + [ + ( + f"Particle: {i+8:6d}", + i + 8, + 10 - (i + 8), + float((i + 8) * (i + 8)), + float((i + 8) ** 2), + ), + ( + f"Particle: {i+9:6d}", + i + 9, + 10 - (i + 9), + float((i + 9) * (i + 9)), + float((i + 9) ** 2), + ), + ( + f"Particle: {i+10:6d}", + i + 10, + 10 - (i + 10), + float((i + 10) * (i + 10)), + float((i + 10) ** 2), + ), + ( + f"Particle: {i+11:6d}", + i + 11, + 10 - (i + 11), + float((i + 11) * (i + 11)), + float((i + 11) ** 2), + ), + ( + f"Particle: {i+12:6d}", + i + 12, + 10 - (i + 12), + float((i + 12) * (i + 12)), + float((i + 12) ** 2), + ), + ] + ) + + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, mode="r") + self.rootgroup = self.h5file.root.newgroup + table = self.rootgroup.table + result = [row[:] for row in table.iterrows()] + # result = table[:].tolist() + if common.verbose: + print("Result length ==>", len(result)) + print("Result contents ==>", result) + + particles = [] + for i in range(50 * 13): + particles.append( + ( + f"Particle: {i:6d}".encode(), + i, + 10 - i, + float(i * i), + float(i**2), + ) + ) + + self.assertEqual(len(result), 50 * 13) + self.assertEqual(result, particles) + + # CAVEAT: The next test only works for tables with rows < 2**15 + def test03_endianess(self): + """Checking if table is endianess aware.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_endianess..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/group0/group1/table2") + + # Read the records and select the ones with "var3" column less than 20 + result = [rec["var2"] for rec in table.iterrows() if rec["var3"] < 20] + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("On-disk byteorder ==>", table.byteorder) + print("Last record in table ==>", table[-1]) + print("Selected records ==>", result) + print("Total selected records in table ==>", len(result)) + nrows = self.expectedrows - 1 + self.assertEqual( + table.byteorder, {"little": "big", "big": "little"}[sys.byteorder] + ) + rec = list(table.iterrows())[-1] + self.assertEqual((rec["var1"], rec["var3"]), (b"0001", nrows)) + self.assertEqual(len(result), 20) + + def test04_delete(self): + """Checking whether a single row can be deleted.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_delete..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "a") + table = self.h5file.get_node("/table0") + + # Read the records and select the ones with "var2" column less than 20 + result = [r["var2"] for r in table.iterrows() if r["var2"] < 20] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last selected value ==>", result[-1]) + print("Total selected records in table ==>", len(result)) + + nrows = table.nrows + table.nrowsinbuf = 3 # small value of the buffer + # Delete the twenty-th row + table.remove_rows(19, 20) + + # Re-read the records + result2 = [r["var2"] for r in table.iterrows() if r["var2"] < 20] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last selected value ==>", result2[-1]) + print("Total selected records in table ==>", len(result2)) + + self.assertEqual(table.nrows, nrows - 1) + self.assertEqual(table.shape, (nrows - 1,)) + # Check that the new list is smaller than the original one + self.assertEqual(len(result), len(result2) + 1) + self.assertEqual(result[:-1], result2) + + def test04a_delete(self): + """Checking whether a single row can be deleted.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_delete..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "a") + table = self.h5file.get_node("/table0") + + # Read the records and select the ones with "var2" column less than 20 + result = [r["var2"] for r in table.iterrows() if r["var2"] < 20] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last selected value ==>", result[-1]) + print("Total selected records in table ==>", len(result)) + + nrows = table.nrows + table.nrowsinbuf = 3 # small value of the buffer + # Delete the twenty-th row + table.remove_row(19) + + # Re-read the records + result2 = [r["var2"] for r in table.iterrows() if r["var2"] < 20] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last selected value ==>", result2[-1]) + print("Total selected records in table ==>", len(result2)) + + self.assertEqual(table.nrows, nrows - 1) + self.assertEqual(table.shape, (nrows - 1,)) + # Check that the new list is smaller than the original one + self.assertEqual(len(result), len(result2) + 1) + self.assertEqual(result[:-1], result2) + + def test04b_delete(self): + """Checking whether a range of rows can be deleted.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04b_delete..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "a") + table = self.h5file.get_node("/table0") + + # Read the records and select the ones with "var2" column less than 20 + result = [r["var2"] for r in table.iterrows() if r["var2"] < 20] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last selected value ==>", result[-1]) + print("Total selected records in table ==>", len(result)) + + nrows = table.nrows + table.nrowsinbuf = 4 # small value of the buffer + # Delete the last ten rows + table.remove_rows(10, 20) + + # Re-read the records + result2 = [r["var2"] for r in table.iterrows() if r["var2"] < 20] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last selected value ==>", result2[-1]) + print("Total selected records in table ==>", len(result2)) + + self.assertEqual(table.nrows, nrows - 10) + self.assertEqual(table.shape, (nrows - 10,)) + # Check that the new list is smaller than the original one + self.assertEqual(len(result), len(result2) + 10) + self.assertEqual(result[:10], result2) + + def test04c_delete(self): + """Checking whether removing a bad range of rows is detected.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04c_delete..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "a") + table = self.h5file.get_node("/table0") + + # Read the records and select the ones with "var2" column less than 20 + result = [r["var2"] for r in table.iterrows() if r["var2"] < 20] + + nrows = table.nrows + table.nrowsinbuf = 5 # small value of the buffer + # Delete a too large range of rows + table.remove_rows(10, nrows + 100) + + # Re-read the records + result2 = [r["var2"] for r in table.iterrows() if r["var2"] < 20] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last selected value ==>", result2[-1]) + print("Total selected records in table ==>", len(result2)) + + self.assertEqual(table.nrows, 10) + self.assertEqual(table.shape, (10,)) + # Check that the new list is smaller than the original one + self.assertEqual(len(result), len(result2) + 10) + self.assertEqual(result[:10], result2) + + def test04d_delete(self): + """Checking whether removing rows several times at once is working.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04d_delete..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "a") + table = self.h5file.get_node("/table0") + + # Read the records and select the ones with "var2" column less than 20 + result = [r["var2"] for r in table if r["var2"] < 20] + + nrows = table.nrows + nrowsinbuf = table.nrowsinbuf + table.nrowsinbuf = 6 # small value of the buffer + # Delete some rows + table.remove_rows(10, 15) + # It's necessary to restore the value of buffer to use the row object + # afterwards... + table.nrowsinbuf = nrowsinbuf + + # Append some rows + row = table.row + for i in range(10, 15): + row["var1"] = "%04d" % (self.appendrows - i) + # This line gives problems on Windows. Why? + # row['var7'] = row['var1'][-1] + row["var2"] = i + row["var3"] = i % self.maxshort + if isinstance(row["var4"], np.ndarray): + row["var4"] = [float(i), float(i * i)] + else: + row["var4"] = float(i) + if isinstance(row["var8"], np.ndarray): + row["var8"] = [0, 1] + else: + row["var8"] = 1 + if isinstance(row["var9"], np.ndarray): + row["var9"] = [0.0 + float(i) * 1j, float(i) + 0.0j] + else: + row["var9"] = float(i) + 0.0j + if isinstance(row["var10"], np.ndarray): + row["var10"] = [float(i) + 0.0j, 1.0 + float(i) * 1j] + else: + row["var10"] = 1.0 + float(i) * 1j + if isinstance(row["var5"], np.ndarray): + row["var5"] = np.array((float(i),) * 4) + else: + row["var5"] = float(i) + if hasattr(tb, "Float16Col"): + if isinstance(row["var11"], np.ndarray): + row["var11"] = np.array((float(i),) * 4) + else: + row["var11"] = float(i) + if hasattr(tb, "Float96Col"): + if isinstance(row["var12"], np.ndarray): + row["var12"] = np.array((float(i),) * 4) + else: + row["var12"] = float(i) + if hasattr(tb, "Float128Col"): + if isinstance(row["var13"], np.ndarray): + row["var13"] = np.array((float(i),) * 4) + else: + row["var13"] = float(i) + if hasattr(tb, "Complex192Col"): + if isinstance(row["var14"], np.ndarray): + row["var14"] = [float(i) + 0j, 1 + float(i) * 1j] + else: + row["var14"] = 1 + float(i) * 1j + if hasattr(tb, "Complex256Col"): + if isinstance(row["var15"], np.ndarray): + row["var15"] = [float(i) + 0j, 1 + float(i) * 1j] + else: + row["var15"] = 1 + float(i) * 1j + + row.append() + # Flush the buffer for this table + table.flush() + + # Delete 5 rows more + table.remove_rows(5, 10) + + # Re-read the records + result2 = [r["var2"] for r in table if r["var2"] < 20] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last selected value ==>", result2[-1]) + print("Total selected records in table ==>", len(result2)) + + self.assertEqual(table.nrows, nrows - 5) + self.assertEqual(table.shape, (nrows - 5,)) + # Check that the new list is smaller than the original one + self.assertEqual(len(result), len(result2) + 5) + # The last values has to be equal + self.assertEqual(result[10:15], result2[10:15]) + + def test04e_delete(self): + """Checking whether all rows can be deleted.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04e_delete..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "a") + table = self.h5file.get_node("/table0") + + # Read all records + result = [r["var2"] for r in table.iterrows()] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last selected value ==>", result[-1]) + print("Total selected records in table ==>", len(result)) + + table.nrowsinbuf = 4 # small value of the buffer + # Delete all rows + table.remove_rows(0, self.expectedrows) + + # Re-read the records + result2 = [r["var2"] for r in table.iterrows()] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Total selected records in table ==>", len(result2)) + + self.assertEqual(table.nrows, 0) + self.assertEqual(table.shape, (0,)) + self.assertEqual(len(result2), 0) + + def test04f_delete(self): + """Checking whether all rows can be deleted.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04e_delete..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "a") + table = self.h5file.get_node("/table0") + + # Read all records + result = [r["var2"] for r in table.iterrows()] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last selected value ==>", result[-1]) + print("Total selected records in table ==>", len(result)) + + table.nrowsinbuf = 4 # small value of the buffer + # Delete 100 rows + table.remove_rows() + + # Re-read the records + result2 = [r["var2"] for r in table.iterrows()] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Total selected records in table ==>", len(result2)) + + self.assertEqual(table.nrows, 0) + self.assertEqual(table.shape, (0,)) + self.assertEqual(len(result2), 0) + + def test04g_delete(self): + """Checking whether rows can be deleted with a step parameter.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04e_delete..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "a") + table = self.h5file.get_node("/table0") + + # Read all records + result = [r["var2"] for r in table.iterrows()] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last selected value ==>", result[-1]) + print("Total selected records in table ==>", len(result)) + + nrows = table.nrows + table.nrowsinbuf = 4 # small value of the buffer + # Delete 100 rows + table.remove_rows(0, nrows + 1, 5) + + # Re-read the records + result2 = [r["var2"] for r in table.iterrows()] + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Total selected records in table ==>", len(result2)) + + outnrows = nrows - nrows // 5 + self.assertEqual(table.nrows, outnrows) + self.assertEqual(table.shape, (outnrows,)) + self.assertEqual(len(result2), outnrows) + + def test05_filtersTable(self): + """Checking tablefilters.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05_filtersTable..." % self.__class__.__name__ + ) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Check filters: + if self.compress != table.filters.complevel and common.verbose: + print("Error in compress. Class:", self.__class__.__name__) + print("self, table:", self.compress, table.filters.complevel) + self.assertEqual(table.filters.complevel, self.compress) + if self.compress > 0 and tb.which_lib_version(self.complib): + self.assertEqual(table.filters.complib, self.complib) + if self.shuffle != table.filters.shuffle and common.verbose: + print("Error in shuffle. Class:", self.__class__.__name__) + print("self, table:", self.shuffle, table.filters.shuffle) + self.assertEqual(self.shuffle, table.filters.shuffle) + if self.bitshuffle != table.filters.bitshuffle and common.verbose: + print("Error in bitshuffle. Class:", self.__class__.__name__) + print("self, table:", self.bitshuffle, table.filters.bitshuffle) + self.assertEqual(self.bitshuffle, table.filters.bitshuffle) + if self.fletcher32 != table.filters.fletcher32 and common.verbose: + print("Error in fletcher32. Class:", self.__class__.__name__) + print("self, table:", self.fletcher32, table.filters.fletcher32) + self.assertEqual(self.fletcher32, table.filters.fletcher32) + + def test06_attributes(self): + self.h5file = tb.open_file(self.h5fname) + obj = self.h5file.get_node("/table0") + + self.assertEqual(obj.flavor, "numpy") + self.assertEqual(obj.shape, (self.expectedrows,)) + self.assertEqual(obj.ndim, 1) + self.assertEqual(obj.nrows, self.expectedrows) + + def test07_out_of_order_members(self): + # If members are stored 'out of order' make sure they are loaded + # correctly + self.h5file = tb.open_file( + common.test_filename("out_of_order_types.h5") + ) + row = self.h5file.get_node("/group/table")[0] + + self.assertEqual(row[0], b"*" * 14) + self.assertEqual(row[1], b"-" * 9) + self.assertEqual(row[2], b"." * 4) + + def test08_AppendModifyRows(self): + """Checking whether blosc2 optimized appending *and* reading rows works or not""" + + class Particle(tb.IsDescription): + name = tb.StringCol(16, pos=1) # 16-character String + lati = tb.Int32Col(pos=2) # integer + longi = tb.Int32Col(pos=3) # integer + pressure = tb.Float32Col(pos=4) # float (single-precision) + temperature = tb.Float64Col(pos=5) # double (double-precision) + + # Now, open it, but in "append" mode + self.h5file = tb.open_file(self.h5fname, mode="a") + + # Create a new group + group = self.h5file.create_group(self.h5file.root, "newgroup") + + # Create a new table in newgroup group + table = self.h5file.create_table( + group, + "table", + Particle, + "A table", + tb.Filters( + complevel=self.compress, + shuffle=bool(self.shuffle), + bitshuffle=bool(self.bitshuffle), + complib=self.complib, + ), + chunkshape=3, + ) + + self.rootgroup = self.h5file.root.newgroup + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test08_AppendModifyRows..." + % self.__class__.__name__ + ) + + if common.verbose: + print("Nrows in old", table._v_pathname, ":", table.nrows) + print("Record Format ==>", table.description._v_nested_formats) + print("Record Size ==>", table.rowsize) + + # Add a couple of user attrs + table.attrs.user_attr1 = 1.023 + table.attrs.user_attr2 = "This is the second user attr" + + # Append several rows in only one call + for j in range(200): + i = 13 * j + table.append( + [(f"Particle: {i:6d}", i, 10 - i, float(i * i), float(i**2))] + ) + + table.append( + [ + ( + f"Particle: {i+1:6d}", + i + 1, + 10 - (i + 1), + float((i + 1) * (i + 1)), + float((i + 1) ** 2), + ), + ( + f"Particle: {i+2:6d}", + i + 2, + 10 - (i + 2), + float((i + 2) * (i + 2)), + float((i + 2) ** 2), + ), + ( + f"Particle: {i+3:6d}", + i + 3, + 10 - (i + 3), + float((i + 3) * (i + 3)), + float((i + 3) ** 2), + ), + ] + ) + + table.append( + [ + ( + f"Particle: {i+4:6d}", + i + 4, + 10 - (i + 4), + float((i + 4) * (i + 4)), + float((i + 4) ** 2), + ), + ( + f"Particle: {i+5:6d}", + i + 5, + 10 - (i + 5), + float((i + 5) * (i + 5)), + float((i + 5) ** 2), + ), + ( + f"Particle: {i+6:6d}", + i + 6, + 10 - (i + 6), + float((i + 6) * (i + 6)), + float((i + 6) ** 2), + ), + ( + f"Particle: {i+7:6d}", + i + 7, + 10 - (i + 7), + float((i + 7) * (i + 7)), + float((i + 7) ** 2), + ), + ] + ) + + table.append( + [ + ( + f"Particle: {i+8:6d}", + i + 8, + 10 - (i + 8), + float((i + 8) * (i + 8)), + float((i + 8) ** 2), + ), + ( + f"Particle: {i+9:6d}", + i + 9, + 10 - (i + 9), + float((i + 9) * (i + 9)), + float((i + 9) ** 2), + ), + ( + f"Particle: {i+10:6d}", + i + 10, + 10 - (i + 10), + float((i + 10) * (i + 10)), + float((i + 10) ** 2), + ), + ( + f"Particle: {i+11:6d}", + i + 11, + 10 - (i + 11), + float((i + 11) * (i + 11)), + float((i + 11) ** 2), + ), + ( + f"Particle: {i+12:6d}", + i + 12, + 10 - (i + 12), + float((i + 12) * (i + 12)), + float((i + 12) ** 2), + ), + ] + ) + table.modify_rows( + i + 10, + i + 11, + None, + [(f"Particle: {i:6d}", i, 10 - i, float(i * i), float(i**2))], + ) + + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, mode="r") + self.rootgroup = self.h5file.root.newgroup + table = self.rootgroup.table + result = [row[:] for row in table.iterrows()] + # result = table[:].tolist() + if common.verbose: + print("Result length ==>", len(result)) + print("Result contents ==>", result) + + particles = [] + for i in range(200 * 13): + particles.append( + ( + f"Particle: {i:6d}".encode(), + i, + 10 - i, + float(i * i), + float(i**2), + ) + ) + for j in range(200): + i = 13 * j + particles.pop(i + 10) + particles.insert( + i + 10, + ( + f"Particle: {i:6d}".encode(), + i, + 10 - i, + float(i * i), + float(i**2), + ), + ) + + self.assertEqual(len(result), 200 * 13) + self.assertEqual(result, particles) + + +class BasicWriteTestCase(BasicTestCase): + title = "BasicWrite" + + +class OldRecordBasicWriteTestCase(BasicTestCase): + title = "OldRecordBasicWrite" + record = OldRecord + + +class DictWriteTestCase(BasicTestCase): + # This checks also unidimensional arrays as columns + title = "DictWrite" + record = RecordDescriptionDict + nrows = 21 + nrowsinbuf = 3 # Choose a small value for the buffer size + start = 0 + stop = 10 + step = 3 + + +# Pure NumPy dtype +class NumPyDTWriteTestCase(BasicTestCase): + title = "NumPyDTWriteTestCase" + formats = "S4,i4,i2,2f8,f4,i2,S1,b1,c8,c16".split(",") + names = "var1,var2,var3,var4,var5,var6,var7,var8,var9,var10".split(",") + + if hasattr(tb, "Float16Col"): + formats.append("f2") + names.append("var11") + if hasattr(tb, "Float96Col"): + formats.append("f12") + names.append("var12") + if hasattr(tb, "Float128Col"): + formats.append("f16") + names.append("var13") + if hasattr(tb, "Complex192Col"): + formats.append("c24") + names.append("var14") + if hasattr(tb, "Complex256Col"): + formats.append("c32") + names.append("var15") + + record = np.dtype(",".join(formats)) + record.names = names + + +class RecArrayOneWriteTestCase(BasicTestCase): + title = "RecArrayOneWrite" + formats = "S4,i4,i2,2f8,f4,i2,S1,b1,c8,c16".split(",") + names = "var1,var2,var3,var4,var5,var6,var7,var8,var9,var10".split(",") + + if hasattr(tb, "Float16Col"): + formats.append("f2") + names.append("var11") + if hasattr(tb, "Float96Col"): + formats.append("f12") + names.append("var12") + if hasattr(tb, "Float128Col"): + formats.append("f16") + names.append("var13") + if hasattr(tb, "Complex192Col"): + formats.append("c24") + names.append("var14") + if hasattr(tb, "Complex256Col"): + formats.append("c32") + names.append("var15") + + record = np.rec.array( + None, shape=0, formats=",".join(formats), names=names + ) + + +class RecArrayTwoWriteTestCase(BasicTestCase): + title = "RecArrayTwoWrite" + expectedrows = 100 + recarrayinit = 1 + formats = "S4,i4,i2,2f8,f4,i2,S1,b1,c8,c16".split(",") + names = "var1,var2,var3,var4,var5,var6,var7,var8,var9,var10".split(",") + + if hasattr(tb, "Float16Col"): + formats.append("f2") + names.append("var11") + if hasattr(tb, "Float96Col"): + formats.append("f12") + names.append("var12") + if hasattr(tb, "Float128Col"): + formats.append("f16") + names.append("var13") + if hasattr(tb, "Complex192Col"): + formats.append("c24") + names.append("var14") + if hasattr(tb, "Complex256Col"): + formats.append("c32") + names.append("var15") + + recordtemplate = np.rec.array( + None, shape=1, formats=",".join(formats), names=names + ) + + +class RecArrayThreeWriteTestCase(BasicTestCase): + title = "RecArrayThreeWrite" + expectedrows = 100 + recarrayinit = 1 + formats = "S4,i4,i2,2f8,f4,i2,S1,b1,c8,c16".split(",") + names = "var1,var2,var3,var4,var5,var6,var7,var8,var9,var10".split(",") + + if hasattr(tb, "Float16Col"): + formats.append("f2") + names.append("var11") + if hasattr(tb, "Float96Col"): + formats.append("f12") + names.append("var12") + if hasattr(tb, "Float128Col"): + formats.append("f16") + names.append("var13") + if hasattr(tb, "Complex192Col"): + formats.append("c24") + names.append("var14") + if hasattr(tb, "Complex256Col"): + formats.append("c32") + names.append("var15") + + recordtemplate = np.rec.array( + None, shape=1, formats=",".join(formats), names=names + ) + + +class RecArrayAlignedWriteTestCase(BasicTestCase): + title = "RecArrayThreeWrite" + expectedrows = 100 + recarrayinit = 1 + formats = "S4,i4,i2,2f8,f4,i2,S1,b1,c8,c16".split(",") + names = "var1,var2,var3,var4,var5,var6,var7,var8,var9,var10".split(",") + + if hasattr(tb, "Float16Col"): + formats.append("f2") + names.append("var11") + if hasattr(tb, "Float96Col"): + formats.append("f12") + names.append("var12") + if hasattr(tb, "Float128Col"): + formats.append("f16") + names.append("var13") + if hasattr(tb, "Complex192Col"): + formats.append("c24") + names.append("var14") + if hasattr(tb, "Complex256Col"): + formats.append("c32") + names.append("var15") + + recordtemplate = np.rec.array( + None, shape=1, formats=",".join(formats), names=names, aligned=True + ) + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class CompressBloscTablesTestCase(BasicTestCase): + title = "CompressBloscTables" + compress = 6 + complib = "blosc" + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class CompressBlosc2TablesTestCase(BasicTestCase): + title = "Compress2BloscTables" + compress = 6 + complib = "blosc2" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class CompressBloscShuffleTablesTestCase(BasicTestCase): + title = "CompressBloscTables" + compress = 1 + shuffle = 1 + complib = "blosc" + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class CompressBlosc2ShuffleTablesTestCase(BasicTestCase): + title = "CompressBloscTables" + compress = 1 + shuffle = 1 + complib = "blosc2" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class CompressBloscBitShuffleTablesTestCase(BasicTestCase): + title = "CompressBloscBitShuffleTables" + compress = 1 + shuffle = 0 + bitshuffle = 1 + complib = "blosc:blosclz" + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +class CompressBlosc2BitShuffleTablesTestCase(BasicTestCase): + title = "CompressBloscBit2ShuffleTables" + compress = 1 + shuffle = 0 + bitshuffle = 1 + complib = "blosc2:blosclz" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class CompressBloscBloscLZTablesTestCase(BasicTestCase): + title = "CompressBloscLZTables" + compress = 1 + shuffle = 1 + complib = "blosc:blosclz" + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC compression library not available" +) +class CompressBlosc2BloscLZTablesTestCase(BasicTestCase): + title = "CompressBloscLZTables" + compress = 1 + shuffle = 1 + complib = "blosc2:blosclz" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "lz4" not in tb.blosc_compressor_list(), "lz4 required" +) +class CompressBloscLZ4TablesTestCase(BasicTestCase): + title = "CompressLZ4Tables" + compress = 1 + shuffle = 1 + complib = "blosc:lz4" + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +@common.unittest.skipIf( + "lz4" not in tb.blosc2_compressor_list(), "lz4 required" +) +class CompressBlosc2LZ4TablesTestCase(BasicTestCase): + title = "CompressLZ4Tables" + compress = 1 + shuffle = 1 + complib = "blosc2:lz4" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "lz4" not in tb.blosc_compressor_list(), "lz4 required" +) +class CompressBloscLZ4HCTablesTestCase(BasicTestCase): + title = "CompressLZ4HCTables" + compress = 1 + shuffle = 1 + complib = "blosc:lz4hc" + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +@common.unittest.skipIf( + "lz4" not in tb.blosc2_compressor_list(), "lz4 required" +) +class CompressBlosc2LZ4HCTablesTestCase(BasicTestCase): + title = "CompressLZ4HCTables" + compress = 1 + shuffle = 1 + complib = "blosc2:lz4hc" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "snappy" not in tb.blosc_compressor_list(), "snappy required" +) +class CompressBloscSnappyTablesTestCase(BasicTestCase): + title = "CompressSnappyTables" + compress = 1 + shuffle = 1 + complib = "blosc:snappy" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "zlib" not in tb.blosc_compressor_list(), "zlib required" +) +class CompressBloscZlibTablesTestCase(BasicTestCase): + title = "CompressZlibTables" + compress = 1 + shuffle = 1 + complib = "blosc:zlib" + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +@common.unittest.skipIf( + "zlib" not in tb.blosc2_compressor_list(), "zlib required" +) +class CompressBlosc2ZlibTablesTestCase(BasicTestCase): + title = "CompressZlibTables" + compress = 5 + shuffle = 0 + bitshuffle = 1 + complib = "blosc2:zlib" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "zstd" not in tb.blosc_compressor_list(), "zstd required" +) +class CompressBloscZstdTablesTestCase(BasicTestCase): + title = "CompressZstdTables" + compress = 1 + shuffle = 1 + complib = "blosc:zstd" + + +@common.unittest.skipIf( + not common.blosc2_avail, "BLOSC2 compression library not available" +) +@common.unittest.skipIf( + "zstd" not in tb.blosc2_compressor_list(), "zstd required" +) +class CompressBlosc2ZstdTablesTestCase(BasicTestCase): + title = "CompressZstdTables" + compress = 1 + shuffle = 1 + complib = "blosc2:zstd" + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class CompressLZOTablesTestCase(BasicTestCase): + title = "CompressLZOTables" + compress = 1 + complib = "lzo" + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class CompressLZOShuffleTablesTestCase(BasicTestCase): + title = "CompressLZOTables" + compress = 1 + shuffle = 1 + complib = "lzo" + + +@common.unittest.skipIf( + not common.bzip2_avail, "BZIP2 compression library not available" +) +class CompressBzip2TablesTestCase(BasicTestCase): + title = "CompressBzip2Tables" + compress = 1 + complib = "bzip2" + + +@common.unittest.skipIf( + not common.bzip2_avail, "BZIP2 compression library not available" +) +class CompressBzip2ShuffleTablesTestCase(BasicTestCase): + title = "CompressBzip2Tables" + compress = 1 + shuffle = 1 + complib = "bzip2" + + +class CompressZLIBTablesTestCase(BasicTestCase): + title = "CompressOneTables" + compress = 1 + complib = "zlib" + + +class CompressZLIBShuffleTablesTestCase(BasicTestCase): + title = "CompressOneTables" + compress = 1 + shuffle = 1 + complib = "zlib" + + +class Fletcher32TablesTestCase(BasicTestCase): + title = "Fletcher32Tables" + fletcher32 = 1 + shuffle = 0 + complib = "zlib" + + +class AllFiltersTablesTestCase(BasicTestCase): + title = "AllFiltersTables" + compress = 1 + fletcher32 = 1 + shuffle = 1 + complib = "zlib" + + +class CompressTwoTablesTestCase(BasicTestCase): + title = "CompressTwoTables" + compress = 1 + # This checks also unidimensional arrays as columns + record = RecordDescriptionDict + + +class BigTablesTestCase(BasicTestCase): + title = "BigTables" + # 10000 rows takes much more time than we can afford for tests + # reducing to 1000 would be more than enough + # F. Alted 2004-01-19 + # Will be executed only in common.heavy mode + expectedrows = 10_000 + appendrows = 100 + + +class SizeOnDiskInMemoryPropertyTestCase( + common.TempFileMixin, common.PyTablesTestCase +): + def setUp(self): + super().setUp() + + # set chunkshape so it divides evenly into array_size, to avoid + # partially filled chunks + self.chunkshape = (1000,) + self.dtype = np.rec.format_parser(["i4"] * 10, [], []).dtype + # approximate size (in bytes) of non-data portion of hdf5 file + self.hdf_overhead = 6000 + + def create_table(self, complevel): + filters = tb.Filters(complevel=complevel, complib="blosc") + self.table = self.h5file.create_table( + "/", + "sometable", + self.dtype, + filters=filters, + chunkshape=self.chunkshape, + ) + + def test_zero_length(self): + complevel = 0 + self.create_table(complevel) + self.assertEqual(self.table.size_on_disk, 0) + self.assertEqual(self.table.size_in_memory, 0) + + # add 10 chunks of data in one append + def test_no_compression_one_append(self): + complevel = 0 + self.create_table(complevel) + self.table.append([tuple(range(10))] * self.chunkshape[0] * 10) + self.assertEqual(self.table.size_on_disk, 10 * 1000 * 10 * 4) + self.assertEqual(self.table.size_in_memory, 10 * 1000 * 10 * 4) + + # add 10 chunks of data in two appends + def test_no_compression_multiple_appends(self): + complevel = 0 + self.create_table(complevel) + self.table.append([tuple(range(10))] * self.chunkshape[0] * 5) + self.table.append([tuple(range(10))] * self.chunkshape[0] * 5) + self.assertEqual(self.table.size_on_disk, 10 * 1000 * 10 * 4) + self.assertEqual(self.table.size_in_memory, 10 * 1000 * 10 * 4) + + def test_with_compression(self): + complevel = 1 + self.create_table(complevel) + self.table.append([tuple(range(10))] * self.chunkshape[0] * 10) + file_size = Path(self.h5fname).stat().st_size + self.assertTrue( + abs(self.table.size_on_disk - file_size) <= self.hdf_overhead + ) + self.assertEqual(self.table.size_in_memory, 10 * 1000 * 10 * 4) + self.assertLess(self.table.size_on_disk, self.table.size_in_memory) + + +class NonNestedTableReadTestCase( + common.TempFileMixin, common.PyTablesTestCase +): + def setUp(self): + super().setUp() + + self.dtype = np.rec.format_parser(["i4"] * 10, [], []).dtype + self.table = self.h5file.create_table("/", "table", self.dtype) + self.shape = (100,) + self.populate_file() + + def populate_file(self): + self.array = np.zeros(self.shape, self.dtype) + for row_num, row in enumerate(self.array): + start = row_num * len(self.array.dtype.names) + for value, col in enumerate(self.array.dtype.names, start): + row[col] = value + self.table.append(self.array) + self.assertEqual(len(self.table), len(self.array)) + + def test_read_all(self): + output = self.table.read() + np.testing.assert_array_equal(output, self.array) + + def test_read_slice1(self): + output = self.table.read(0, 51) + np.testing.assert_array_equal(output, self.array[0:51]) + + def test_read_all_rows_specified_field(self): + output = self.table.read(field="f1") + np.testing.assert_array_equal(output, self.array["f1"]) + + def test_read_slice1_specified_field(self): + output = self.table.read(1, 64, field="f1") + np.testing.assert_array_equal(output, self.array["f1"][1:64]) + + def test_out_arg_with_non_numpy_flavor(self): + output = np.empty(self.shape, self.dtype) + self.table.flavor = "python" + self.assertRaises(TypeError, lambda: self.table.read(out=output)) + try: + self.table.read(out=output) + except TypeError as exc: + self.assertIn("Optional 'out' argument may only be", str(exc)) + + def test_read_all_out_arg(self): + output = np.empty(self.shape, self.dtype) + self.table.read(out=output) + np.testing.assert_array_equal(output, self.array) + + def test_read_slice1_out_arg(self): + output = np.empty((51,), self.dtype) + self.table.read(0, 51, out=output) + np.testing.assert_array_equal(output, self.array[0:51]) + + def test_read_all_rows_specified_field_out_arg(self): + output = np.empty(self.shape, "i4") + self.table.read(field="f1", out=output) + np.testing.assert_array_equal(output, self.array["f1"]) + + def test_read_slice1_specified_field_out_arg(self): + output = np.empty((63,), "i4") + self.table.read(1, 64, field="f1", out=output) + np.testing.assert_array_equal(output, self.array["f1"][1:64]) + + def test_read_all_out_arg_sliced(self): + output = np.empty((200,), self.dtype) + output["f0"] = np.random.randint(0, 10_000, (200,)) + output_orig = output.copy() + self.table.read(out=output[0:100]) + np.testing.assert_array_equal(output[0:100], self.array) + np.testing.assert_array_equal(output[100:], output_orig[100:]) + + def test_all_fields_non_contiguous_slice_contiguous_buffer(self): + output = np.empty((50,), self.dtype) + self.table.read(0, 100, 2, out=output) + np.testing.assert_array_equal(output, self.array[0:100:2]) + + def test_specified_field_non_contiguous_slice_contiguous_buffer(self): + output = np.empty((50,), "i4") + self.table.read(0, 100, 2, field="f3", out=output) + np.testing.assert_array_equal(output, self.array["f3"][0:100:2]) + + def test_all_fields_non_contiguous_buffer(self): + output = np.empty((100,), self.dtype) + output_slice = output[0:100:2] + + with self.assertRaisesRegex( + ValueError, "output array not C contiguous" + ): + self.table.read(0, 100, 2, field=None, out=output_slice) + + def test_specified_field_non_contiguous_buffer(self): + output = np.empty((100,), "i4") + output_slice = output[0:100:2] + self.assertRaises( + ValueError, self.table.read, 0, 100, 2, "f3", output_slice + ) + try: + self.table.read(0, 100, 2, field="f3", out=output_slice) + except ValueError as exc: + self.assertEqual("output array not C contiguous", str(exc)) + + def test_all_fields_buffer_too_small(self): + output = np.empty((99,), self.dtype) + self.assertRaises(ValueError, lambda: self.table.read(out=output)) + try: + self.table.read(out=output) + except ValueError as exc: + self.assertIn("output array size invalid, got", str(exc)) + + def test_specified_field_buffer_too_small(self): + output = np.empty((99,), "i4") + self.assertRaises( + ValueError, lambda: self.table.read(field="f5", out=output) + ) + try: + self.table.read(field="f5", out=output) + except ValueError as exc: + self.assertIn("output array size invalid, got", str(exc)) + + def test_all_fields_buffer_too_large(self): + output = np.empty((101,), self.dtype) + self.assertRaises(ValueError, lambda: self.table.read(out=output)) + try: + self.table.read(out=output) + except ValueError as exc: + self.assertIn("output array size invalid, got", str(exc)) + + +class TableReadByteorderTestCase( + common.TempFileMixin, common.PyTablesTestCase +): + def setUp(self): + super().setUp() + self.system_byteorder = sys.byteorder + self.other_byteorder = {"little": "big", "big": "little"}[ + sys.byteorder + ] + self.reverse_byteorders = {"little": "<", "big": ">"} + + def create_table(self, byteorder): + table_dtype_code = self.reverse_byteorders[byteorder] + "i4" + table_dtype = np.rec.format_parser( + [table_dtype_code, "S1"], [], [] + ).dtype + self.table = self.h5file.create_table( + "/", "table", table_dtype, byteorder=byteorder + ) + input_dtype = np.rec.format_parser(["i4", "S1"], [], []).dtype + self.input_array = np.zeros((10,), input_dtype) + self.input_array["f0"] = np.arange(10) + self.input_array["f1"] = b"a" + self.table.append(self.input_array) + + def test_table_system_byteorder_no_out_argument(self): + self.create_table(self.system_byteorder) + output = self.table.read() + self.assertEqual( + tb.utils.byteorders[output["f0"].dtype.byteorder], + self.system_byteorder, + ) + np.testing.assert_array_equal(output["f0"], np.arange(10)) + + def test_table_other_byteorder_no_out_argument(self): + self.create_table(self.other_byteorder) + output = self.table.read() + self.assertEqual( + tb.utils.byteorders[output["f0"].dtype.byteorder], + self.system_byteorder, + ) + np.testing.assert_array_equal(output["f0"], np.arange(10)) + + def test_table_system_byteorder_out_argument_system_byteorder(self): + self.create_table(self.system_byteorder) + out_dtype_code = self.reverse_byteorders[self.system_byteorder] + "i4" + out_dtype = np.rec.format_parser([out_dtype_code, "S1"], [], []).dtype + output = np.empty((10,), out_dtype) + self.table.read(out=output) + self.assertEqual( + tb.utils.byteorders[output["f0"].dtype.byteorder], + self.system_byteorder, + ) + np.testing.assert_array_equal(output["f0"], np.arange(10)) + + def test_table_other_byteorder_out_argument_system_byteorder(self): + self.create_table(self.other_byteorder) + out_dtype_code = self.reverse_byteorders[self.system_byteorder] + "i4" + out_dtype = np.rec.format_parser([out_dtype_code, "S1"], [], []).dtype + output = np.empty((10,), out_dtype) + self.table.read(out=output) + self.assertEqual( + tb.utils.byteorders[output["f0"].dtype.byteorder], + self.system_byteorder, + ) + np.testing.assert_array_equal(output["f0"], np.arange(10)) + + def test_table_system_byteorder_out_argument_other_byteorder(self): + self.create_table(self.system_byteorder) + out_dtype_code = self.reverse_byteorders[self.other_byteorder] + "i4" + out_dtype = np.rec.format_parser([out_dtype_code, "S1"], [], []).dtype + output = np.empty((10,), out_dtype) + self.assertRaises(ValueError, lambda: self.table.read(out=output)) + try: + self.table.read(out=output) + except ValueError as exc: + self.assertIn("array must be in system's byteorder", str(exc)) + + def test_table_other_byteorder_out_argument_other_byteorder(self): + self.create_table(self.other_byteorder) + out_dtype_code = self.reverse_byteorders[self.other_byteorder] + "i4" + out_dtype = np.rec.format_parser([out_dtype_code, "S1"], [], []).dtype + output = np.empty((10,), out_dtype) + self.assertRaises(ValueError, lambda: self.table.read(out=output)) + try: + self.table.read(out=output) + except ValueError as exc: + self.assertIn("array must be in system's byteorder", str(exc)) + + +class BasicRangeTestCase(common.TempFileMixin, common.PyTablesTestCase): + # file = "test.h5" + open_mode = "w" + title = "This is the table title" + record = Record + maxshort = 1 << 15 + expectedrows = 100 + compress = 0 + shuffle = 1 + # Default values + nrows = 20 + nrowsinbuf = 3 # Choose a small value for the buffer size + start = 1 + stop = nrows + checkrecarray = 0 + checkgetCol = 0 + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + self.rootgroup = self.h5file.root + self.populateFile() + self.h5file.close() + + def populateFile(self): + group = self.rootgroup + for j in range(3): + # Create a table + filterprops = tb.Filters( + complevel=self.compress, shuffle=self.shuffle + ) + table = self.h5file.create_table( + group, + "table" + str(j), + self.record, + title=self.title, + filters=filterprops, + expectedrows=self.expectedrows, + ) + + # Get the row object associated with the new table + row = table.row + + # Fill the table + for i in range(self.expectedrows): + row["var1"] = "%04d" % (self.expectedrows - i) + row["var7"] = row["var1"][-1] + row["var2"] = i + row["var3"] = i % self.maxshort + if isinstance(row["var4"], np.ndarray): + row["var4"] = [float(i), float(i * i)] + else: + row["var4"] = float(i) + if isinstance(row["var5"], np.ndarray): + row["var5"] = np.array((float(i),) * 4) + else: + row["var5"] = float(i) + + # var6 will be like var3 but byteswaped + row["var6"] = ((row["var3"] >> 8) & 0xFF) + ( + (row["var3"] << 8) & 0xFF00 + ) + row.append() + + # Flush the buffer for this table + table.flush() + # Create a new group (descendant of group) + group2 = self.h5file.create_group(group, "group" + str(j)) + # Iterate over this new group (group2) + group = group2 + + def check_range(self): + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + table.nrowsinbuf = self.nrowsinbuf + resrange = slice(self.start, self.stop, self.step).indices(table.nrows) + reslength = len(list(range(*resrange))) + # print "self.checkrecarray = ", self.checkrecarray + # print "self.checkgetCol = ", self.checkgetCol + if self.checkrecarray: + recarray = table.read(self.start, self.stop, self.step) + result = [] + for nrec in range(len(recarray)): + if recarray["var2"][nrec] < self.nrows and 0 < self.step: + result.append(recarray["var2"][nrec]) + elif recarray["var2"][nrec] > self.nrows and 0 > self.step: + result.append(recarray["var2"][nrec]) + elif self.checkgetCol: + column = table.read(self.start, self.stop, self.step, "var2") + result = [] + for nrec in range(len(column)): + if column[nrec] < self.nrows and 0 < self.step: + result.append(column[nrec]) + elif column[nrec] > self.nrows and 0 > self.step: + result.append(column[nrec]) + else: + if 0 < self.step: + result = [ + rec["var2"] + for rec in table.iterrows(self.start, self.stop, self.step) + if rec["var2"] < self.nrows + ] + elif 0 > self.step: + result = [ + rec["var2"] + for rec in table.iterrows(self.start, self.stop, self.step) + if rec["var2"] > self.nrows + ] + + if self.start < 0: + startr = self.expectedrows + self.start + else: + startr = self.start + + if self.stop is None: + if self.checkrecarray or self.checkgetCol: + # data read using the read method + stopr = startr + 1 + else: + # data read using the iterrows method + stopr = self.nrows + elif self.stop < 0: + stopr = self.expectedrows + self.stop + else: + stopr = self.stop + + if self.nrows < stopr: + stopr = self.nrows + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + if reslength: + if self.checkrecarray: + print("Last record *read* in recarray ==>", recarray[-1]) + elif self.checkgetCol: + print("Last value *read* in getCol ==>", column[-1]) + else: + rec = list( + table.iterrows(self.start, self.stop, self.step) + )[-1] + print("Last record *read* in table range ==>", rec) + print("Total number of selected records ==>", len(result)) + print("Selected records:\n", result) + print( + "Selected records should look like:\n", + list(range(startr, stopr, self.step)), + ) + print("start, stop, step ==>", self.start, self.stop, self.step) + print("startr, stopr, step ==>", startr, stopr, self.step) + + self.assertEqual(result, list(range(startr, stopr, self.step))) + if not (self.checkrecarray or self.checkgetCol): + if startr < stopr and 0 < self.step: + rec = [ + r + for r in table.iterrows(self.start, self.stop, self.step) + if r["var2"] < self.nrows + ][-1] + if self.nrows < self.expectedrows: + self.assertEqual( + rec["var2"], + list(range(self.start, self.stop, self.step))[-1], + ) + else: + self.assertEqual( + rec["var2"], list(range(startr, stopr, self.step))[-1] + ) + elif startr > stopr and 0 > self.step: + rec = [ + r["var2"] + for r in table.iterrows(self.start, self.stop, self.step) + if r["var2"] > self.nrows + ][0] + if self.nrows < self.expectedrows: + self.assertEqual( + rec, + list(range(self.start, self.stop or -1, self.step))[0], + ) + else: + self.assertEqual( + rec, list(range(startr, stopr or -1, self.step))[0] + ) + + # Close the file + self.h5file.close() + + def test01_range(self): + """Checking ranges in table iterators (case1)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_range..." % self.__class__.__name__) + + # Case where step < nrowsinbuf < 2 * step + self.nrows = 21 + self.nrowsinbuf = 3 + self.start = 0 + self.stop = self.expectedrows + self.step = 2 + + self.check_range() + + def test01a_range(self): + """Checking ranges in table iterators (case1)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01a_range..." % self.__class__.__name__) + + # Case where step < nrowsinbuf < 2 * step + self.nrows = 21 + self.nrowsinbuf = 3 + self.start = self.expectedrows - 1 + self.stop = None + self.step = -2 + + self.check_range() + + def test02_range(self): + """Checking ranges in table iterators (case2)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_range..." % self.__class__.__name__) + + # Case where step < nrowsinbuf < 10 * step + self.nrows = 21 + self.nrowsinbuf = 31 + self.start = 11 + self.stop = self.expectedrows + self.step = 3 + + self.check_range() + + def test03_range(self): + """Checking ranges in table iterators (case3)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_range..." % self.__class__.__name__) + + # Case where step < nrowsinbuf < 1.1 * step + self.nrows = self.expectedrows + self.nrowsinbuf = 11 # Choose a small value for the buffer size + self.start = 0 + self.stop = self.expectedrows + self.step = 10 + + self.check_range() + + def test04_range(self): + """Checking ranges in table iterators (case4)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_range..." % self.__class__.__name__) + + # Case where step == nrowsinbuf + self.nrows = self.expectedrows + self.nrowsinbuf = 11 # Choose a small value for the buffer size + self.start = 1 + self.stop = self.expectedrows + self.step = 11 + + self.check_range() + + def test05_range(self): + """Checking ranges in table iterators (case5)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05_range..." % self.__class__.__name__) + + # Case where step > 1.1 * nrowsinbuf + self.nrows = 21 + self.nrowsinbuf = 10 # Choose a small value for the buffer size + self.start = 1 + self.stop = self.expectedrows + self.step = 11 + + self.check_range() + + def test06_range(self): + """Checking ranges in table iterators (case6)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06_range..." % self.__class__.__name__) + + # Case where step > 3 * nrowsinbuf + self.nrows = 3 + self.nrowsinbuf = 3 # Choose a small value for the buffer size + self.start = 2 + self.stop = self.expectedrows + self.step = 10 + + self.check_range() + + def test07_range(self): + """Checking ranges in table iterators (case7)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test07_range..." % self.__class__.__name__) + + # Case where start == stop + self.nrows = 2 + self.nrowsinbuf = 3 # Choose a small value for the buffer size + self.start = self.nrows + self.stop = self.nrows + self.step = 10 + + self.check_range() + + def test08_range(self): + """Checking ranges in table iterators (case8)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08_range..." % self.__class__.__name__) + + # Case where start > stop + self.nrows = 2 + self.nrowsinbuf = 3 # Choose a small value for the buffer size + self.start = self.nrows + 1 + self.stop = self.nrows + self.step = 1 + + self.check_range() + + def test09_range(self): + """Checking ranges in table iterators (case9)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09_range..." % self.__class__.__name__) + + # Case where stop = None (last row) + self.nrows = 100 + self.nrowsinbuf = 3 # Choose a small value for the buffer size + self.start = 1 + self.stop = 2 + self.step = 1 + + self.check_range() + + def test10_range(self): + """Checking ranges in table iterators (case10)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test10_range..." % self.__class__.__name__) + + # Case where start < 0 and stop = None (last row) + self.nrows = self.expectedrows + self.nrowsinbuf = 5 # Choose a small value for the buffer size + self.start = -6 + self.startr = self.expectedrows + self.start + self.stop = -5 + self.stopr = self.expectedrows + self.step = 2 + + self.check_range() + + def test10a_range(self): + """Checking ranges in table iterators (case10a)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test10a_range..." % self.__class__.__name__) + + # Case where start < 0 and stop = 0 + self.nrows = self.expectedrows + self.nrowsinbuf = 5 # Choose a small value for the buffer size + self.start = -6 + self.startr = self.expectedrows + self.start + self.stop = 0 + self.stopr = self.expectedrows + self.step = 2 + + self.check_range() + + def test11_range(self): + """Checking ranges in table iterators (case11)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test11_range..." % self.__class__.__name__) + + # Case where start < 0 and stop < 0 + self.nrows = self.expectedrows + self.nrowsinbuf = 5 # Choose a small value for the buffer size + self.start = -6 + self.startr = self.expectedrows + self.start + self.stop = -2 + self.stopr = self.expectedrows + self.stop + self.step = 1 + + self.check_range() + + def test12_range(self): + """Checking ranges in table iterators (case12)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test12_range..." % self.__class__.__name__) + + # Case where start < 0 and stop < 0 and start > stop + self.nrows = self.expectedrows + self.nrowsinbuf = 5 # Choose a small value for the buffer size + self.start = -1 + self.startr = self.expectedrows + self.start + self.stop = -2 + self.stopr = self.expectedrows + self.stop + self.step = 1 + + self.check_range() + + def test13_range(self): + """Checking ranges in table iterators (case13)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test13_range..." % self.__class__.__name__) + + # Case where step < 0 + self.step = -11 + try: + self.check_range() + except ValueError: + if common.verbose: + type, value, traceback = sys.exc_info() + print("\nGreat!, the next ValueError was catched!") + print(value) + self.h5file.close() + # else: + # print rec + # self.fail("expected a ValueError") + + # Case where step == 0 + self.step = 0 + try: + self.check_range() + except ValueError: + if common.verbose: + type, value, traceback = sys.exc_info() + print("\nGreat!, the next ValueError was catched!") + print(value) + self.h5file.close() + # else: + # print rec + # self.fail("expected a ValueError") + + +class IterRangeTestCase(BasicRangeTestCase): + pass + + +class RecArrayRangeTestCase(BasicRangeTestCase): + checkrecarray = 1 + + +class GetColRangeTestCase(BasicRangeTestCase): + checkgetCol = 1 + + def test01_nonexistentField(self): + """Checking non-existing Field in getCol method""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01_nonexistentField..." + % self.__class__.__name__ + ) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + self.root = self.h5file.root + table = self.h5file.get_node("/table0") + + with self.assertRaises(KeyError): + # column = table.read(field='non-existent-column') + table.col("non-existent-column") + + +class GetItemTestCase(common.TempFileMixin, common.PyTablesTestCase): + open_mode = "w" + title = "This is the table title" + record = Record + maxshort = 1 << 15 + expectedrows = 100 + compress = 0 + shuffle = 1 + # Default values + nrows = 20 + nrowsinbuf = 3 # Choose a small value for the buffer size + start = 1 + stop = nrows + checkrecarray = 0 + checkgetCol = 0 + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + self.rootgroup = self.h5file.root + self.populateFile() + self.h5file.close() + + def populateFile(self): + group = self.rootgroup + for j in range(3): + # Create a table + filterprops = tb.Filters( + complevel=self.compress, shuffle=self.shuffle + ) + table = self.h5file.create_table( + group, + "table" + str(j), + self.record, + title=self.title, + filters=filterprops, + expectedrows=self.expectedrows, + ) + # Get the row object associated with the new table + row = table.row + + # Fill the table + for i in range(self.expectedrows): + row["var1"] = "%04d" % (self.expectedrows - i) + row["var7"] = row["var1"][-1] + row["var2"] = i + row["var3"] = i % self.maxshort + if isinstance(row["var4"], np.ndarray): + row["var4"] = [float(i), float(i * i)] + else: + row["var4"] = float(i) + if isinstance(row["var5"], np.ndarray): + row["var5"] = np.array((float(i),) * 4) + else: + row["var5"] = float(i) + # var6 will be like var3 but byteswaped + row["var6"] = ((row["var3"] >> 8) & 0xFF) + ( + (row["var3"] << 8) & 0xFF00 + ) + row.append() + + # Flush the buffer for this table + table.flush() + # Create a new group (descendant of group) + group2 = self.h5file.create_group(group, "group" + str(j)) + # Iterate over this new group (group2) + group = group2 + + def test01a_singleItem(self): + """Checking __getitem__ method with single parameter (int)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01a_singleItem..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + result = table[2] + self.assertEqual(result["var2"], 2) + result = table[25] + self.assertEqual(result["var2"], 25) + result = table[self.expectedrows - 1] + self.assertEqual(result["var2"], self.expectedrows - 1) + + def test01b_singleItem(self): + """Checking __getitem__ method with single parameter (neg. int)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b_singleItem..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + result = table[-5] + self.assertEqual(result["var2"], self.expectedrows - 5) + result = table[-1] + self.assertEqual(result["var2"], self.expectedrows - 1) + result = table[-self.expectedrows] + self.assertEqual(result["var2"], 0) + + def test01c_singleItem(self): + """Checking __getitem__ method with single parameter (long)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01c_singleItem..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + result = table[2] + self.assertEqual(result["var2"], 2) + result = table[25] + self.assertEqual(result["var2"], 25) + result = table[self.expectedrows - 1] + self.assertEqual(result["var2"], self.expectedrows - 1) + + def test01d_singleItem(self): + """Checking __getitem__ method with single parameter (neg. long)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01d_singleItem..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + result = table[-5] + self.assertEqual(result["var2"], self.expectedrows - 5) + result = table[-1] + self.assertEqual(result["var2"], self.expectedrows - 1) + result = table[-self.expectedrows] + self.assertEqual(result["var2"], 0) + + def test01e_singleItem(self): + """Checking __getitem__ method with single parameter (rank-0 ints)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01e_singleItem..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + result = table[np.array(2)] + self.assertEqual(result["var2"], 2) + result = table[np.array(25)] + self.assertEqual(result["var2"], 25) + result = table[np.array(self.expectedrows - 1)] + self.assertEqual(result["var2"], self.expectedrows - 1) + + def test01f_singleItem(self): + """Checking __getitem__ method with single parameter (np.uint64)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01f_singleItem..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + + result = table[np.uint64(2)] + self.assertEqual(result["var2"], 2) + + def test02_twoItems(self): + """Checking __getitem__ method with start, stop parameters.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_twoItem..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + result = table[2:6] + self.assertEqual(result["var2"].tolist(), list(range(2, 6))) + result = table[2:-6] + self.assertEqual( + result["var2"].tolist(), list(range(2, self.expectedrows - 6)) + ) + result = table[2:] + self.assertEqual( + result["var2"].tolist(), list(range(2, self.expectedrows)) + ) + result = table[-2:] + self.assertEqual( + result["var2"].tolist(), + list(range(self.expectedrows - 2, self.expectedrows)), + ) + + def test03_threeItems(self): + """Checking __getitem__ method with start, stop, step parameters.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_threeItem..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + result = table[2:6:3] + self.assertEqual(result["var2"].tolist(), list(range(2, 6, 3))) + result = table[2::3] + self.assertEqual( + result["var2"].tolist(), list(range(2, self.expectedrows, 3)) + ) + result = table[:6:2] + self.assertEqual(result["var2"].tolist(), list(range(0, 6, 2))) + result = table[::] + self.assertEqual( + result["var2"].tolist(), list(range(0, self.expectedrows, 1)) + ) + + def test04_negativeStep(self): + """Checking __getitem__ method with negative step parameter.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test04_negativeStep..." % self.__class__.__name__ + ) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + with self.assertRaises(ValueError): + table[2:3:-3] + + def test06a_singleItemCol(self): + """Checking __getitem__ method in Col with single parameter.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test06a_singleItemCol..." % self.__class__.__name__ + ) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + colvar2 = table.cols.var2 + self.assertEqual(colvar2[2], 2) + self.assertEqual(colvar2[25], 25) + self.assertEqual(colvar2[self.expectedrows - 1], self.expectedrows - 1) + + def test06b_singleItemCol(self): + """Checking __getitem__ method in Col with single parameter + (negative)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06b_singleItem..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + colvar2 = table.cols.var2 + self.assertEqual(colvar2[-5], self.expectedrows - 5) + self.assertEqual(colvar2[-1], self.expectedrows - 1) + self.assertEqual(colvar2[-self.expectedrows], 0) + + def test07_twoItemsCol(self): + """Checking __getitem__ method in Col with start, stop parameters.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test07_twoItemCol..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + colvar2 = table.cols.var2 + self.assertEqual(colvar2[2:6].tolist(), list(range(2, 6))) + self.assertEqual( + colvar2[2:-6].tolist(), list(range(2, self.expectedrows - 6)) + ) + self.assertEqual( + colvar2[2:].tolist(), list(range(2, self.expectedrows)) + ) + self.assertEqual( + colvar2[-2:].tolist(), + list(range(self.expectedrows - 2, self.expectedrows)), + ) + + def test08_threeItemsCol(self): + """Checking __getitem__ method in Col with start, stop, step + parameters.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test08_threeItemCol..." % self.__class__.__name__ + ) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + colvar2 = table.cols.var2 + self.assertEqual(colvar2[2:6:3].tolist(), list(range(2, 6, 3))) + self.assertEqual( + colvar2[2::3].tolist(), list(range(2, self.expectedrows, 3)) + ) + self.assertEqual(colvar2[:6:2].tolist(), list(range(0, 6, 2))) + self.assertEqual( + colvar2[::].tolist(), list(range(0, self.expectedrows, 1)) + ) + + def test09_negativeStep(self): + """Checking __getitem__ method in Col with negative step parameter.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test09_negativeStep..." % self.__class__.__name__ + ) + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + colvar2 = table.cols.var2 + with self.assertRaises(ValueError): + colvar2[2:3:-3] + + def test10_list_integers(self): + """Checking accessing Table with a list of integers.""" + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + idx = list(range(10, 70, 11)) + + result = table[idx] + self.assertEqual(result["var2"].tolist(), idx) + + result = table.read_coordinates(idx) + self.assertEqual(result["var2"].tolist(), idx) + + def test11_list_booleans(self): + """Checking accessing Table with a list of boolean values.""" + + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.root.table0 + idx = list(range(10, 70, 11)) + + selection = [n in idx for n in range(self.expectedrows)] + + result = table[selection] + self.assertEqual(result["var2"].tolist(), idx) + + result = table.read_coordinates(selection) + self.assertEqual(result["var2"].tolist(), idx) + + +class Rec(tb.IsDescription): + col1 = tb.IntCol(pos=1) + col2 = tb.StringCol(itemsize=3, pos=2) + col3 = tb.FloatCol(pos=3) + + +class SetItemTestCase(common.TempFileMixin, common.PyTablesTestCase): + def test01(self): + """Checking modifying one table row with __setitem__""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing row + table[2] = (456, "db2", 1.2) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (456, b"db2", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test01b(self): + """Checking modifying one table row with __setitem__ (long index)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing row + table[2] = (456, "db2", 1.2) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (456, b"db2", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test02(self): + """Modifying one row, with a step (__setitem__)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify two existing rows + rows = np.rec.array([(457, b"db1", 1.2)], formats="i4,S3,f8") + table[1:3:2] = rows + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (457, b"db1", 1.2), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test03(self): + """Checking modifying several rows at once (__setitem__)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify two existing rows + rows = np.rec.array( + [(457, b"db1", 1.2), (5, b"de1", 1.3)], formats="i4,S3,f8" + ) + # table.modify_rows(start=1, rows=rows) + table[1:3] = rows + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test04(self): + """Modifying several rows at once, with a step (__setitem__)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify two existing rows + rows = np.rec.array( + [(457, b"db1", 1.2), (6, b"de2", 1.3)], formats="i4,S3,f8" + ) + # table[1:4:2] = rows + table[1::2] = rows + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (457, b"db1", 1.2), + (457, b"db1", 1.2), + (6, b"de2", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test05(self): + """Checking modifying one column (single element, __setitem__)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing column + table.cols.col1[1] = -1 + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (-1, b"ded", 1.3), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test06a(self): + """Checking modifying one column (several elements, __setitem__)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing column + table.cols.col1[1:4] = [2, 3, 4] + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (3, b"db1", 1.2), + (4, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test06b(self): + """Checking modifying one column (iterator, __setitem__)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing column + with self.assertRaises(NotImplementedError): + for row in table.iterrows(): + row["col1"] = row.nrow + 1 + row.append() + table.flush() + + # # Create the modified recarray + # r1=np.rec.array([[1,b'dbe',1.2],[2,b'ded',1.3], + # [3,b'db1',1.2],[4,b'de1',1.3]], + # formats="i4,S3,f8", + # names = "col1,col2,col3") + # # Read the modified table + # if self.reopen: + # self.fileh.close() + # self.fileh = tables.open_file(self.file, "r") + # table = self.fileh.root.recarray + # table.nrowsinbuf = self.buffersize # set buffer value + # r2 = table.read() + # if common.verbose: + # print "Original table-->", repr(r2) + # print "Should look like-->", repr(r1) + # self.assertEqual(r1.tobytes(), r2.tobytes()) + # self.assertEqual(table.nrows, 4) + + def test07(self): + """Modifying one column (several elements, __setitem__, step)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (1, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + # Modify just one existing column + table.cols.col1[1:4:2] = [2, 3] + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (457, b"db1", 1.2), + (3, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test08(self): + """Modifying one column (one element, __setitem__, step)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing column + table.cols.col1[1:4:3] = [2] + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test09(self): + """Modifying beyond the table extend (__setitem__, step)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Try to modify beyond the extend + # This will silently exclude the non-fitting rows + rows = np.rec.array( + [(457, b"db1", 1.2), (6, b"de2", 1.3)], formats="i4,S3,f8" + ) + table[1::2] = rows + # How it should look like + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (457, b"db1", 1.2), + (457, b"db1", 1.2), + (6, b"de2", 1.3), + ], + formats="i4,S3,f8", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + +class SetItemTestCase1(SetItemTestCase): + reopen = 0 + buffersize = 1 + + +class SetItemTestCase2(SetItemTestCase): + reopen = 1 + buffersize = 2 + + +class SetItemTestCase3(SetItemTestCase): + reopen = 0 + buffersize = 1000 + + +class SetItemTestCase4(SetItemTestCase): + reopen = 1 + buffersize = 1000 + + +class UpdateRowTestCase(common.TempFileMixin, common.PyTablesTestCase): + def test01(self): + """Checking modifying one table row with Row.update""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing row + for row in table.iterrows(2, 3): + row["col1"], row["col2"], row["col3"] = (456, "db2", 1.2) + row.update() + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (456, b"db2", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test02(self): + """Modifying one row, with a step (Row.update)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify two existing rows + for row in table.iterrows(1, 3, 2): + if row.nrow == 1: + row["col1"], row["col2"], row["col3"] = (457, "db1", 1.2) + elif row.nrow == 3: + row["col1"], row["col2"], row["col3"] = (6, "de2", 1.3) + row.update() + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (457, b"db1", 1.2), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test03(self): + """Checking modifying several rows at once (Row.update)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify two existing rows + for row in table.iterrows(1, 3): + if row.nrow == 1: + row["col1"], row["col2"], row["col3"] = (457, "db1", 1.2) + elif row.nrow == 2: + row["col1"], row["col2"], row["col3"] = (5, "de1", 1.3) + row.update() + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test04(self): + """Modifying several rows at once, with a step (Row.update)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify two existing rows + for row in table.iterrows(1, stop=4, step=2): + if row.nrow == 1: + row["col1"], row["col2"], row["col3"] = (457, "db1", 1.2) + elif row.nrow == 3: + row["col1"], row["col2"], row["col3"] = (6, "de2", 1.3) + row.update() + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (457, b"db1", 1.2), + (457, b"db1", 1.2), + (6, b"de2", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test05(self): + """Checking modifying one column (single element, Row.update)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing column + for row in table.iterrows(1, 2): + row["col1"] = -1 + row.update() + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (-1, b"ded", 1.3), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test06(self): + """Checking modifying one column (several elements, Row.update)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing column + for row in table.iterrows(1, 4): + row["col1"] = row.nrow + 1 + row.update() + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (3, b"db1", 1.2), + (4, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test07(self): + """Modifying values from a selection""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (1, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + # Modify just rows with col1 < 456 + for row in table.where("col1 < 456"): + row["col1"] = 2 + row["col2"] = "ada" + row.update() + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ada", 1.3), + (457, b"db1", 1.2), + (2, b"ada", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test08(self): + """Modifying a large table (Row.update)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + nrows = 100 + # append new rows + row = table.row + for i in range(nrows): + row["col1"] = i - 1 + row["col2"] = "a" + str(i - 1) + row["col3"] = -1.0 + row.append() + table.flush() + + # Modify all the rows + for row in table: + row["col1"] = row.nrow + row["col2"] = "b" + str(row.nrow) + row["col3"] = 0.0 + row.update() + + # Create the modified recarray + r1 = np.rec.array( + None, shape=nrows, formats="i4,S3,f8", names="col1,col2,col3" + ) + for i in range(nrows): + r1["col1"][i] = i + r1["col2"][i] = "b" + str(i) + r1["col3"][i] = 0.0 + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, nrows) + + def test08b(self): + """Setting values on a large table without calling Row.update""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + nrows = 100 + # append new rows + row = table.row + for i in range(nrows): + row["col1"] = i - 1 + row["col2"] = "a" + str(i - 1) + row["col3"] = -1.0 + row.append() + table.flush() + + # Modify all the rows (actually don't) + for row in table: + row["col1"] = row.nrow + row["col2"] = "b" + str(row.nrow) + row["col3"] = 0.0 + # row.update() + + # Create the modified recarray + r1 = np.rec.array( + None, shape=nrows, formats="i4,S3,f8", names="col1,col2,col3" + ) + for i in range(nrows): + r1["col1"][i] = i - 1 + r1["col2"][i] = "a" + str(i - 1) + r1["col3"][i] = -1.0 + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, nrows) + + def test09(self): + """Modifying selected values on a large table""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + nrows = 100 + # append new rows + row = table.row + for i in range(nrows): + row["col1"] = i - 1 + row["col2"] = "a" + str(i - 1) + row["col3"] = -1.0 + row.append() + table.flush() + + # Modify selected rows + for row in table.where("col1 > nrows-3"): + row["col1"] = row.nrow + row["col2"] = "b" + str(row.nrow) + row["col3"] = 0.0 + row.update() + + # Create the modified recarray + r1 = np.rec.array( + None, shape=nrows, formats="i4,S3,f8", names="col1,col2,col3" + ) + for i in range(nrows): + r1["col1"][i] = i - 1 + r1["col2"][i] = "a" + str(i - 1) + r1["col3"][i] = -1.0 + # modify just the last line + r1["col1"][i] = i + r1["col2"][i] = "b" + str(i) + r1["col3"][i] = 0.0 + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, nrows) + + def test09b(self): + """Modifying selected values on a large table (alternate values)""" + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + table.nrowsinbuf = self.buffersize # set buffer value + + nrows = 100 + # append new rows + row = table.row + for i in range(nrows): + row["col1"] = i - 1 + row["col2"] = "a" + str(i - 1) + row["col3"] = -1.0 + row.append() + table.flush() + + # Modify selected rows + for row in table.iterrows(step=10): + row["col1"] = row.nrow + row["col2"] = "b" + str(row.nrow) + row["col3"] = 0.0 + row.update() + + # Create the modified recarray + r1 = np.rec.array( + None, shape=nrows, formats="i4,S3,f8", names="col1,col2,col3" + ) + for i in range(nrows): + if i % 10 > 0: + r1["col1"][i] = i - 1 + r1["col2"][i] = "a" + str(i - 1) + r1["col3"][i] = -1.0 + else: + r1["col1"][i] = i + r1["col2"][i] = "b" + str(i) + r1["col3"][i] = 0.0 + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, nrows) + + +class UpdateRowTestCase1(UpdateRowTestCase): + reopen = 0 + buffersize = 1 + + +class UpdateRowTestCase2(UpdateRowTestCase): + reopen = 1 + buffersize = 2 + + +class UpdateRowTestCase3(UpdateRowTestCase): + reopen = 0 + buffersize = 1000 + + +class UpdateRowTestCase4(UpdateRowTestCase): + reopen = 1 + buffersize = 1000 + + +class RecArrayIO(common.TempFileMixin, common.PyTablesTestCase): + def test00(self): + """Checking saving a regular recarray""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00..." % self.__class__.__name__) + + # Create a recarray + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"de", 1.3)], names="col1,col2,col3" + ) + + # Save it in a table: + self.h5file.create_table(self.h5file.root, "recarray", r) + + # Read it again + if self.reopen: + self._reopen() + r2 = self.h5file.root.recarray.read() + self.assertEqual(r.tobytes(), r2.tobytes()) + + def test01(self): + """Checking saving a recarray with an offset in its buffer""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01..." % self.__class__.__name__) + + # Create a recarray + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"de", 1.3)], names="col1,col2,col3" + ) + + # Get an offset bytearray + r1 = r[1:] + + # Save it in a table: + self.h5file.create_table(self.h5file.root, "recarray", r1) + + # Read it again + if self.reopen: + self._reopen() + r2 = self.h5file.root.recarray.read() + + self.assertEqual(r1.tobytes(), r2.tobytes()) + + def test02(self): + """Checking saving a large recarray with an offset in its buffer""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02..." % self.__class__.__name__) + + # Create a recarray + r = np.rec.array(b"a" * 200_000, "f4,3i4,S5,i2", 3000) + + # Get an offset bytearray + r1 = r[2000:] + + # Save it in a table: + self.h5file.create_table(self.h5file.root, "recarray", r1) + + # Read it again + if self.reopen: + self._reopen() + r2 = self.h5file.root.recarray.read() + + self.assertEqual(r1.tobytes(), r2.tobytes()) + + def test03(self): + """Checking saving a strided recarray with an offset in its buffer""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03..." % self.__class__.__name__) + + # Create a recarray + r = np.rec.array(b"a" * 200_000, "f4,3i4,S5,i2", 3000) + + # Get a strided recarray + r2 = r[::2] + + # Get an offset bytearray + r1 = r2[1200:] + + # Save it in a table: + self.h5file.create_table(self.h5file.root, "recarray", r1) + + # Read it again + if self.reopen: + self._reopen() + r2 = self.h5file.root.recarray.read() + + self.assertEqual(r1.tobytes(), r2.tobytes()) + + def test04(self): + """Checking appending several rows at once""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04..." % self.__class__.__name__) + + class Rec(tb.IsDescription): + col1 = tb.IntCol(pos=1) + col2 = tb.StringCol(itemsize=3, pos=2) + col3 = tb.FloatCol(pos=3) + + # Save it in a table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + # Create the complete table + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the original table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = self.h5file.root.recarray.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test05(self): + """Checking appending several rows at once (close file version)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05..." % self.__class__.__name__) + + # Save it in a table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + self._reopen() + + table = self.h5file.root.recarray + # Create the complete table + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + + # Read the original table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = self.h5file.root.recarray.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test06a(self): + """Checking modifying one table row (list version)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06a..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + # Modify just one existing rows + table.modify_rows(start=1, rows=[(456, "db1", 1.2)]) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (456, b"db1", 1.2), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test06b(self): + """Checking modifying one table row (recarray version)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06b..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + # Modify just one existing rows + table.modify_rows( + start=2, rows=np.rec.array([(456, "db2", 1.2)], formats="i4,S3,f8") + ) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (456, b"db2", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test07a(self): + """Checking modifying several rows at once (list version)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test07a..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + # Modify two existing rows + table.modify_rows(start=1, rows=[(457, "db1", 1.2), (5, "de1", 1.3)]) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test07b(self): + """Checking modifying several rows at once (recarray version)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test07b..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + # Modify two existing rows + rows = np.rec.array( + [(457, b"db1", 1.2), (5, b"de1", 1.3)], formats="i4,S3,f8" + ) + table.modify_rows(start=1, rows=rows) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test07c(self): + """Checking modifying several rows with a mismatching value""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test07c..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + # Modify two existing rows + rows = np.rec.array( + [(457, b"db1", 1.2), (5, b"de1", 1.3)], formats="i4,S3,f8" + ) + self.assertRaises( + ValueError, table.modify_rows, start=1, stop=2, rows=rows + ) + + def test08a(self): + """Checking modifying one column (single column version)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08a..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing column + table.modify_columns(start=1, columns=[[2, 3, 4]], names=["col1"]) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (3, b"db1", 1.2), + (4, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test08a2(self): + """Checking modifying one column (single column version, + modify_column)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08a2..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing column + table.modify_column(start=1, column=[2, 3, 4], colname="col1") + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (3, b"db1", 1.2), + (4, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test08b(self): + """Checking modifying one column (single column version, recarray)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08b..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing column + columns = np.rec.fromarrays(np.array([[2, 3, 4]]), formats="i4") + table.modify_columns(start=1, columns=columns, names=["col1"]) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (3, b"db1", 1.2), + (4, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test08b2(self): + """Checking modifying one column (single column version, recarray, + modify_column)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08b2..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing column + columns = np.rec.fromarrays(np.array([[2, 3, 4]]), formats="i4") + table.modify_column(start=1, column=columns, colname="col1") + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"ded", 1.3), + (3, b"db1", 1.2), + (4, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test08c(self): + """Checking modifying one column (single column version, + single element)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08c..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify just one existing column + # columns = np.rec.fromarrays(np.array([[4]]), formats="i4") + # table.modify_columns(start=1, columns=columns, names=["col1"]) + table.modify_columns(start=1, columns=[[4]], names=["col1"]) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (4, b"ded", 1.3), + (457, b"db1", 1.2), + (5, b"de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test09a(self): + """Checking modifying table columns (multiple column version)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09a..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify a couple of columns + columns = [["aaa", "bbb", "ccc"], [1.2, 0.1, 0.3]] + table.modify_columns(start=1, columns=columns, names=["col2", "col3"]) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, b"dbe", 1.2), + (2, b"aaa", 1.2), + (457, b"bbb", 0.1), + (5, b"ccc", 0.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test09b(self): + """Checking modifying table columns (multiple columns, recarray)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09b..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify a couple of columns + columns = np.rec.array( + [("aaa", 1.2), ("bbb", 0.1), ("ccc", 0.3)], formats="S3,f8" + ) + table.modify_columns(start=1, columns=columns, names=["col2", "col3"]) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (2, "aaa", 1.2), + (457, "bbb", 0.1), + (5, "ccc", 0.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test09c(self): + """Checking modifying table columns (single column, step)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09c..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + # Modify a couple of columns + columns = np.rec.array([("aaa", 1.2), ("bbb", 0.1)], formats="S3,f8") + table.modify_columns( + start=1, step=2, columns=columns, names=["col2", "col3"] + ) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (2, "aaa", 1.2), + (457, "db1", 1.2), + (5, "bbb", 0.1), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test09d(self): + """Checking modifying table columns (multiple columns, step)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09d..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + # Modify a couple of columns + columns = np.rec.array([("aaa", 1.3), ("bbb", 0.1)], formats="S3,f8") + table.modify_columns( + start=0, step=2, columns=columns, names=["col2", "col3"] + ) + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "aaa", 1.3), + (2, "ded", 1.3), + (457, "bbb", 0.1), + (5, "de1", 1.3), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test10a(self): + """Checking modifying rows using coordinates + (readCoords/modifyCoords).""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test10a..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + columns = table.read_coordinates([0, 3]) + + # Modify both rows + columns["col1"][:] = [55, 56] + columns["col3"][:] = [1.9, 1.8] + + # Modify the table in the same coordinates + table.modify_coordinates([0, 3], columns) + + # Create the modified recarray + r1 = np.rec.array( + [ + (55, b"dbe", 1.9), + (2, b"ded", 1.3), + (457, b"db1", 1.2), + (56, b"de1", 1.8), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test10b(self): + """Checking modifying rows using coordinates (getitem/setitem).""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test10b..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # append new rows + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"ded", 1.3)], formats="i4,S3,f8" + ) + table.append(r) + table.append([(457, b"db1", 1.2), (5, b"de1", 1.3)]) + + columns = table[[0, 3]] + + # Modify both rows + columns["col1"][:] = [55, 56] + columns["col3"][:] = [1.9, 1.8] + + # Modify the table in the same coordinates + table[[0, 3]] = columns + + # Create the modified recarray + r1 = np.rec.array( + [ + (55, b"dbe", 1.9), + (2, b"ded", 1.3), + (457, b"db1", 1.2), + (56, b"de1", 1.8), + ], + formats="i4,S3,f8", + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + +class RecArrayIO1(RecArrayIO): + reopen = 0 + + +class RecArrayIO2(RecArrayIO): + reopen = 1 + + +class CopyTestCase(common.TempFileMixin, common.PyTablesTestCase): + def assertEqualColinstances(self, table1, table2): + """Assert that column instance maps of both tables are equal.""" + + cinst1, cinst2 = table1.colinstances, table2.colinstances + self.assertEqual(len(cinst1), len(cinst2)) + for cpathname, col1 in cinst1.items(): + self.assertTrue(cpathname in cinst2) + col2 = cinst2[cpathname] + self.assertIsInstance(col1, type(col2)) + if isinstance(col1, tb.Column): + self.assertEqual(col1.name, col2.name) + self.assertEqual(col1.pathname, col2.pathname) + self.assertEqual(col1.dtype, col2.dtype) + self.assertEqual(col1.type, col2.type) + elif isinstance(col1, tb.Cols): + self.assertEqual(col1._v_colnames, col2._v_colnames) + self.assertEqual(col1._v_colpathnames, col2._v_colpathnames) + + def test01_copy(self): + """Checking Table.copy() method.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_copy..." % self.__class__.__name__) + + # Create a recarray + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"de", 1.3)], + names="col1,col2,col3", + formats=("i4,S3,f8"), + aligned=self.aligned, + ) + # Save it in a table: + table1 = self.h5file.create_table( + self.h5file.root, "table1", r, "title table1" + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + table1 = self.h5file.root.table1 + + # Copy to another table + table2 = table1.copy("/", "table2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + if common.verbose: + print("table1-->", table1.read()) + print("table2-->", table2.read()) + # print "dirs-->", dir(table1), dir(table2) + print("attrs table1-->", repr(table1.attrs)) + print("attrs table2-->", repr(table2.attrs)) + + # Check that all the elements are equal + for row1 in table1: + nrow = row1.nrow # current row + # row1 is a Row instance, while table2[] is a + # RecArray.Record instance + # print "reprs-->", repr(row1), repr(table2.read(nrow)) + for colname in table1.colnames: + # Both ways to compare work well + # self.assertEqual(row1[colname], table2[nrow][colname)) + self.assertEqual( + row1[colname], table2.read(nrow, field=colname)[0] + ) + + # Assert other properties in table + self.assertEqual(table1.nrows, table2.nrows) + self.assertEqual(table1.shape, table2.shape) + self.assertEqual(table1.colnames, table2.colnames) + self.assertEqual(table1.coldtypes, table2.coldtypes) + self.assertEqualColinstances(table1, table2) + self.assertEqual(repr(table1.description), repr(table2.description)) + # Check alignment + if self.aligned and self.open_kwargs["allow_padding"] is True: + self.assertEqual(table1.description._v_offsets, [0, 4, 8]) + self.assertEqual(table1.description._v_itemsize, 16) + else: + self.assertEqual(table1.description._v_offsets, [0, 4, 7]) + self.assertEqual(table1.description._v_itemsize, 15) + self.assertEqual( + table1.description._v_offsets, table2.description._v_offsets + ) + self.assertEqual( + table1.description._v_itemsize, table2.description._v_itemsize + ) + + # This could be not the same when re-opening the file + # self.assertEqual(table1.description._v_ColObjects, + # table2.description._v_ColObjects) + # Leaf attributes + self.assertEqual(table1.title, table2.title) + self.assertEqual(table1.filters.complevel, table2.filters.complevel) + self.assertEqual(table1.filters.complib, table2.filters.complib) + self.assertEqual(table1.filters.shuffle, table2.filters.shuffle) + self.assertEqual(table1.filters.fletcher32, table2.filters.fletcher32) + + def test02_copy(self): + """Checking Table.copy() method (where specified)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_copy..." % self.__class__.__name__) + + # Create a recarray + r = np.rec.array( + [(b"dbe", 456, 1.2), (b"de", 2, 1.3)], + names="col1,col2,col3", + formats="S3,i4,f8", + aligned=self.aligned, + ) + # Save it in a table: + table1 = self.h5file.create_table( + self.h5file.root, "table1", r, "title table1" + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + table1 = self.h5file.root.table1 + + # Copy to another table in another group + group1 = self.h5file.create_group("/", "group1") + table2 = table1.copy(group1, "table2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + table1 = self.h5file.root.table1 + table2 = self.h5file.root.group1.table2 + + if common.verbose: + print("table1-->", table1.read()) + print("table2-->", table2.read()) + print("attrs table1-->", repr(table1.attrs)) + print("attrs table2-->", repr(table2.attrs)) + + # Check that all the elements are equal + for row1 in table1: + nrow = row1.nrow # current row + for colname in table1.colnames: + # Both ways to compare work well + # self.assertEqual(row1[colname], table2[nrow][colname)) + self.assertEqual( + row1[colname], table2.read(nrow, field=colname)[0] + ) + + # Assert other properties in table + self.assertEqual(table1.nrows, table2.nrows) + self.assertEqual(table1.shape, table2.shape) + self.assertEqual(table1.colnames, table2.colnames) + self.assertEqual(table1.coldtypes, table2.coldtypes) + self.assertEqualColinstances(table1, table2) + self.assertEqual(repr(table1.description), repr(table2.description)) + # Check alignment + if self.aligned and self.open_kwargs["allow_padding"] is True: + self.assertEqual(table1.description._v_offsets, [0, 4, 8]) + self.assertEqual(table1.description._v_itemsize, 16) + else: + self.assertEqual(table1.description._v_offsets, [0, 3, 7]) + self.assertEqual(table1.description._v_itemsize, 15) + self.assertEqual( + table1.description._v_offsets, table2.description._v_offsets + ) + self.assertEqual( + table1.description._v_itemsize, table2.description._v_itemsize + ) + + # Leaf attributes + self.assertEqual(table1.title, table2.title) + self.assertEqual(table1.filters.complevel, table2.filters.complevel) + self.assertEqual(table1.filters.complib, table2.filters.complib) + self.assertEqual(table1.filters.shuffle, table2.filters.shuffle) + self.assertEqual(table1.filters.fletcher32, table2.filters.fletcher32) + + def test03_copy(self): + """Checking Table.copy() method (table larger than buffer)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_copy..." % self.__class__.__name__) + + # Create a recarray exceeding buffers capability + # This works, but takes too much CPU for a test + # It is better to reduce the buffer size (table1.nrowsinbuf) + # r=np.rec.array(b'aaaabbbbccccddddeeeeffffgggg'*20000, + # formats='2i2,i4, (2,3)u2, (1,)f4, f8',shape=700) + r = np.rec.array( + b"aaaabbbbccccddddeeeeffffgggg" * 200, + formats="2i2,i4, (2,3)u2, (1,)f4, f8", + shape=7, + aligned=self.aligned, + ) + # Save it in a table: + table1 = self.h5file.create_table( + self.h5file.root, "table1", r, "title table1" + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + table1 = self.h5file.root.table1 + + # Copy to another table in another group and other title + group1 = self.h5file.create_group("/", "group1") + table1.nrowsinbuf = 2 # small value of buffer + table2 = table1.copy(group1, "table2", title="title table2") + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + table1 = self.h5file.root.table1 + table2 = self.h5file.root.group1.table2 + + if common.verbose: + print("table1-->", table1.read()) + print("table2-->", table2.read()) + print("attrs table1-->", repr(table1.attrs)) + print("attrs table2-->", repr(table2.attrs)) + + # Check that all the elements are equal + for row1 in table1: + nrow = row1.nrow # current row + for colname in table1.colnames: + # self.assertTrue(allequal(row1[colname], + # table2[nrow][colname])) + self.assertTrue( + common.allequal( + row1[colname], table2.read(nrow, field=colname)[0] + ) + ) + + # Assert other properties in table + self.assertEqual(table1.nrows, table2.nrows) + self.assertEqual(table1.shape, table2.shape) + self.assertEqual(table1.colnames, table2.colnames) + self.assertEqual(table1.coldtypes, table2.coldtypes) + self.assertEqualColinstances(table1, table2) + self.assertEqual(repr(table1.description), repr(table2.description)) + # Check alignment + if self.aligned and self.open_kwargs["allow_padding"] is True: + self.assertEqual(table1.description._v_offsets, [0, 4, 8, 20, 24]) + self.assertEqual(table1.description._v_itemsize, 32) + else: + self.assertEqual(table1.description._v_offsets, [0, 4, 8, 20, 24]) + self.assertEqual(table1.description._v_itemsize, 32) + self.assertEqual( + table1.description._v_offsets, table2.description._v_offsets + ) + self.assertEqual( + table1.description._v_itemsize, table2.description._v_itemsize + ) + + # Leaf attributes + self.assertEqual("title table2", table2.title) + self.assertEqual(table1.filters.complevel, table2.filters.complevel) + self.assertEqual(table1.filters.complib, table2.filters.complib) + self.assertEqual(table1.filters.shuffle, table2.filters.shuffle) + self.assertEqual(table1.filters.fletcher32, table2.filters.fletcher32) + + def test04_copy(self): + """Checking Table.copy() method (different compress level)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_copy..." % self.__class__.__name__) + + # Create a recarray + r = np.rec.array( + [(1.2, b"dbe", 456), (1.3, b"de", 2)], + names="col1,col2,col3", + formats="f8,S3,i4", + aligned=self.aligned, + ) + # Save it in a table: + table1 = self.h5file.create_table( + self.h5file.root, "table1", r, "title table1" + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + table1 = self.h5file.root.table1 + + # Copy to another table in another group + group1 = self.h5file.create_group("/", "group1") + table2 = table1.copy(group1, "table2", filters=tb.Filters(complevel=6)) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + table1 = self.h5file.root.table1 + table2 = self.h5file.root.group1.table2 + + if common.verbose: + print("table1-->", table1.read()) + print("table2-->", table2.read()) + print("attrs table1-->", repr(table1.attrs)) + print("attrs table2-->", repr(table2.attrs)) + + # Check that all the elements are equal + for row1 in table1: + nrow = row1.nrow # current row + for colname in table1.colnames: + # Both ways to compare work well + # self.assertEqual(row1[colname], table2[nrow][colname)) + self.assertEqual( + row1[colname], table2.read(nrow, field=colname)[0] + ) + + # Assert other properties in table + self.assertEqual(table1.nrows, table2.nrows) + self.assertEqual(table1.shape, table2.shape) + self.assertEqual(table1.colnames, table2.colnames) + self.assertEqual(table1.coldtypes, table2.coldtypes) + self.assertEqualColinstances(table1, table2) + self.assertEqual(repr(table1.description), repr(table2.description)) + # Check alignment + if self.aligned and self.open_kwargs["allow_padding"] is True: + self.assertEqual(table1.description._v_offsets, [0, 8, 12]) + self.assertEqual(table1.description._v_itemsize, 16) + else: + self.assertEqual(table1.description._v_offsets, [0, 8, 11]) + self.assertEqual(table1.description._v_itemsize, 15) + self.assertEqual( + table1.description._v_offsets, table2.description._v_offsets + ) + self.assertEqual( + table1.description._v_itemsize, table2.description._v_itemsize + ) + + # Leaf attributes + self.assertEqual(table1.title, table2.title) + self.assertEqual(6, table2.filters.complevel) + self.assertEqual(1, table2.filters.shuffle) + self.assertEqual(table1.filters.fletcher32, table2.filters.fletcher32) + + def test05_copy(self): + """Checking Table.copy() method (user attributes copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05_copy..." % self.__class__.__name__) + + # Create a recarray + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"de", 1.3)], + names="col1,col2,col3", + formats="i8,S3,f8", + aligned=self.aligned, + ) + # Save it in a table: + table1 = self.h5file.create_table( + self.h5file.root, "table1", r, "title table1" + ) + # Add some user attributes + table1.attrs.attr1 = "attr1" + table1.attrs.attr2 = 2 + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + table1 = self.h5file.root.table1 + + # Copy to another table in another group + group1 = self.h5file.create_group("/", "group1") + table2 = table1.copy( + group1, "table2", copyuserattrs=1, filters=tb.Filters(complevel=6) + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + table1 = self.h5file.root.table1 + table2 = self.h5file.root.group1.table2 + + if common.verbose: + print("table1-->", table1.read()) + print("table2-->", table2.read()) + print("attrs table1-->", repr(table1.attrs)) + print("attrs table2-->", repr(table2.attrs)) + + # Check that all the elements are equal + for row1 in table1: + nrow = row1.nrow # current row + for colname in table1.colnames: + # self.assertEqual(row1[colname], table2[nrow][colname)) + self.assertEqual( + row1[colname], table2.read(nrow, field=colname)[0] + ) + + # Assert other properties in table + self.assertEqual(table1.nrows, table2.nrows) + self.assertEqual(table1.shape, table2.shape) + self.assertEqual(table1.colnames, table2.colnames) + self.assertEqual(table1.coldtypes, table2.coldtypes) + self.assertEqualColinstances(table1, table2) + self.assertEqual(repr(table1.description), repr(table2.description)) + # Check alignment + if self.aligned and self.open_kwargs["allow_padding"] is True: + # The conditions for guessing the correct alignment are very + # tricky, so better disable the checks. Feel free to re-enable + # them during debugging by removing the False condition below. + if False: + if is_os_64bit() and is_python_64bit(): + self.assertEqual(table1.description._v_offsets, [0, 8, 16]) + self.assertEqual(table1.description._v_itemsize, 24) + elif not is_os_64bit() and not is_python_64bit(): + self.assertEqual(table1.description._v_offsets, [0, 8, 12]) + self.assertEqual(table1.description._v_itemsize, 20) + else: + self.assertEqual(table1.description._v_offsets, [0, 8, 11]) + self.assertEqual(table1.description._v_itemsize, 19) + self.assertEqual( + table1.description._v_offsets, table2.description._v_offsets + ) + self.assertEqual( + table1.description._v_itemsize, table2.description._v_itemsize + ) + + # Leaf attributes + self.assertEqual(table1.title, table2.title) + self.assertEqual(6, table2.filters.complevel) + self.assertEqual(1, table2.filters.shuffle) + self.assertEqual(table1.filters.fletcher32, table2.filters.fletcher32) + # User attributes + self.assertEqual(table2.attrs.attr1, "attr1") + self.assertEqual(table2.attrs.attr2, 2) + + def test05b_copy(self): + """Checking Table.copy() method (user attributes not copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05b_copy..." % self.__class__.__name__) + + # Create a recarray + r = np.rec.array( + [(456, b"dbe", 1.2), (2, b"de", 1.3)], + names="col1,col2,col3", + formats="i8,S3,f4", + aligned=self.aligned, + ) + # Save it in a table: + table1 = self.h5file.create_table( + self.h5file.root, "table1", r, "title table1" + ) + + # Add some user attributes + table1.attrs.attr1 = "attr1" + table1.attrs.attr2 = 2 + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + table1 = self.h5file.root.table1 + + # Copy to another table in another group + group1 = self.h5file.create_group("/", "group1") + table2 = table1.copy( + group1, "table2", copyuserattrs=0, filters=tb.Filters(complevel=6) + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + table1 = self.h5file.root.table1 + table2 = self.h5file.root.group1.table2 + + if common.verbose: + print("table1-->", table1.read()) + print("table2-->", table2.read()) + print("attrs table1-->", repr(table1.attrs)) + print("attrs table2-->", repr(table2.attrs)) + + # Check that all the elements are equal + for row1 in table1: + nrow = row1.nrow # current row + for colname in table1.colnames: + # self.assertEqual(row1[colname], table2[nrow][colname)) + self.assertEqual( + row1[colname], table2.read(nrow, field=colname)[0] + ) + + # Assert other properties in table + self.assertEqual(table1.nrows, table2.nrows) + self.assertEqual(table1.shape, table2.shape) + self.assertEqual(table1.colnames, table2.colnames) + self.assertEqual(table1.coldtypes, table2.coldtypes) + self.assertEqualColinstances(table1, table2) + self.assertEqual(repr(table1.description), repr(table2.description)) + # Check alignment + if self.aligned and self.open_kwargs["allow_padding"] is True: + self.assertEqual(table1.description._v_offsets, [0, 8, 12]) + self.assertEqual(table1.description._v_itemsize, 16) + else: + self.assertEqual(table1.description._v_offsets, [0, 8, 11]) + self.assertEqual(table1.description._v_itemsize, 15) + self.assertEqual( + table1.description._v_offsets, table2.description._v_offsets + ) + self.assertEqual( + table1.description._v_itemsize, table2.description._v_itemsize + ) + + # Leaf attributes + self.assertEqual(table1.title, table2.title) + self.assertEqual(6, table2.filters.complevel) + self.assertEqual(1, table2.filters.shuffle) + self.assertEqual(table1.filters.fletcher32, table2.filters.fletcher32) + # User attributes + self.assertEqual(hasattr(table2.attrs, "attr1"), 0) + self.assertEqual(hasattr(table2.attrs, "attr2"), 0) + + +class CopyIssuesTestCase(common.TempFileMixin, common.PyTablesTestCase): + def test_issue1208(self): + # https://github.com/PyTables/PyTables/issues/1208 + group = self.h5file.create_group("this", "that", createparents=True) + node = self.h5file.create_table("/", "here", {"a": tb.UInt32Col()}) + node.copy(group, createparents=True, overwrite=True) + + +class CloseCopyTestCase(CopyTestCase): + close = True + aligned = False + open_kwargs = {"allow_padding": False} + + +class OpenCopyTestCase(CopyTestCase): + close = False + aligned = False + open_kwargs = {"allow_padding": True} + + +class AlignedCloseCopyTestCase(CopyTestCase): + close = True + aligned = True + open_kwargs = {"allow_padding": False} + + +class AlignedOpenCopyTestCase(CopyTestCase): + close = False + aligned = True + open_kwargs = {"allow_padding": True} + + +class AlignedNoPaddingOpenCopyTestCase(CopyTestCase): + close = False + aligned = True + open_kwargs = {"allow_padding": False} + + +class CopyIndexTestCase(common.TempFileMixin, common.PyTablesTestCase): + def test01_index(self): + """Checking Table.copy() method with indexes.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_index..." % self.__class__.__name__) + + # Create a recarray exceeding buffers capability + r = np.rec.array( + b"aaaabbbbccccddddeeeeffffgggg" * 200, + formats="2i2, (1,)i4, (2,3)u2, (1,)f4, (1,)f8", + shape=10, + ) + # The line below exposes a bug in numpy + # formats='2i2, i4, (2,3)u2, f4, f8',shape=10) + # Save it in a table: + table1 = self.h5file.create_table( + self.h5file.root, "table1", r, "title table1" + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + table1 = self.h5file.root.table1 + + # Copy to another table + table1.nrowsinbuf = self.nrowsinbuf + table2 = table1.copy( + "/", "table2", start=self.start, stop=self.stop, step=self.step + ) + if common.verbose: + print("table1-->", table1.read()) + print("table2-->", table2.read()) + print("attrs table1-->", repr(table1.attrs)) + print("attrs table2-->", repr(table2.attrs)) + + # Check that all the elements are equal + r2 = r[self.start : self.stop : self.step] + for nrow in range(r2.shape[0]): + for colname in table1.colnames: + self.assertTrue( + common.allequal(r2[nrow][colname], table2[nrow][colname]) + ) + + # Assert the number of rows in table + if common.verbose: + print("nrows in table2-->", table2.nrows) + print("and it should be-->", r2.shape[0]) + self.assertEqual(r2.shape[0], table2.nrows) + + def test02_indexclosef(self): + """Checking Table.copy() method with indexes (close file version)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_indexclosef..." % self.__class__.__name__) + + # Create a recarray exceeding buffers capability + r = np.rec.array( + b"aaaabbbbccccddddeeeeffffgggg" * 200, + formats="2i2, i4, (2,3)u2, f4, f8", + shape=10, + ) + # Save it in a table: + table1 = self.h5file.create_table( + self.h5file.root, "table1", r, "title table1" + ) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + table1 = self.h5file.root.table1 + + # Copy to another table + table1.nrowsinbuf = self.nrowsinbuf + table2 = table1.copy( + "/", "table2", start=self.start, stop=self.stop, step=self.step + ) + + self._reopen() + table1 = self.h5file.root.table1 + table2 = self.h5file.root.table2 + + if common.verbose: + print("table1-->", table1.read()) + print("table2-->", table2.read()) + print("attrs table1-->", repr(table1.attrs)) + print("attrs table2-->", repr(table2.attrs)) + + # Check that all the elements are equal + r2 = r[self.start : self.stop : self.step] + for nrow in range(r2.shape[0]): + for colname in table1.colnames: + self.assertTrue( + common.allequal(r2[nrow][colname], table2[nrow][colname]) + ) + + # Assert the number of rows in table + if common.verbose: + print("nrows in table2-->", table2.nrows) + print("and it should be-->", r2.shape[0]) + self.assertEqual(r2.shape[0], table2.nrows) + + +class CopyIndex1TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + close = 1 + start = 0 + stop = 7 + step = 1 + + +class CopyIndex2TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + close = 0 + start = 0 + stop = -1 + step = 1 + + +class CopyIndex3TestCase(CopyIndexTestCase): + nrowsinbuf = 3 + close = 1 + start = 1 + stop = 7 + step = 1 + + +class CopyIndex4TestCase(CopyIndexTestCase): + nrowsinbuf = 4 + close = 0 + start = 0 + stop = 6 + step = 1 + + +class CopyIndex5TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + close = 1 + start = 3 + stop = 7 + step = 1 + + +class CopyIndex6TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + close = 0 + start = 3 + stop = 6 + step = 2 + + +class CopyIndex7TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + close = 1 + start = 0 + stop = 7 + step = 10 + + +class CopyIndex8TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + close = 0 + start = 6 + stop = 3 + step = 1 + + +class CopyIndex9TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + close = 1 + start = 3 + stop = 4 + step = 1 + + +class CopyIndex10TestCase(CopyIndexTestCase): + nrowsinbuf = 1 + close = 0 + start = 3 + stop = 4 + step = 2 + + +class CopyIndex11TestCase(CopyIndexTestCase): + nrowsinbuf = 2 + close = 1 + start = -3 + stop = -1 + step = 2 + + +class CopyIndex12TestCase(CopyIndexTestCase): + nrowsinbuf = 3 + close = 0 + start = -1 # Should point to the last element + stop = None # None should mean the last element (including it) + step = 1 + + +class LargeRowSize(common.TempFileMixin, common.PyTablesTestCase): + def test00(self): + """Checking saving a Table with a moderately large rowsize""" + + # Create a recarray + r = np.rec.array([(np.arange(100)) * 2]) + + # Save it in a table: + self.h5file.create_table(self.h5file.root, "largerow", r) + + # Read it again + r2 = self.h5file.root.largerow.read() + + self.assertEqual(r.tobytes(), r2.tobytes()) + + def test01(self): + """Checking saving a Table with an extremely large rowsize""" + + # Create a recarray (1.4 MB rowsize) + r = np.zeros(10, dtype=np.dtype("(300,100)i4,(400,400)f8")) + # From PyTables 1.3 on, we allow row sizes equal or larger than 640 KB + self.h5file.create_table(self.h5file.root, "largerow", r) + + # Read it again + r2 = self.h5file.root.largerow.read() + self.assertEqual(r.tobytes(), r2.tobytes()) + + +class DefaultValues(common.TempFileMixin, common.PyTablesTestCase): + record = Record + + def test00(self): + """Checking saving a Table with default values (using the same Row)""" + + # Create a table + table = self.h5file.create_table( + self.h5file.root, "table", self.record + ) + + table.nrowsinbuf = 46 # minimum amount that reproduces a problem + # Take a number of records a bit greater + nrows = int(table.nrowsinbuf * 1.1) + row = table.row + # Fill the table with nrows records + for i in range(nrows): + if i == 3: + row["var2"] = 2 + if i == 4: + row["var3"] = 3 + # This injects the row values. + row.append() + + # We need to flush the buffers in table in order to get an + # accurate number of records on it. + table.flush() + + # Create a recarray with the same default values + values = [b"abcd", 1, 2, 3.1, 4.2, 5, "e", 1, 1j, 1 + 0j] + formats = "S4,i4,i2,f8,f4,u2,S1,b1,c8,c16".split(",") + + if hasattr(tb, "Float16Col"): + values.append(6.4) + formats.append("f2") + if hasattr(tb, "Float96Col"): + values.append(6.4) + formats.append("f12") + if hasattr(tb, "Float128Col"): + values.append(6.4) + formats.append("f16") + if hasattr(tb, "Complex192Col"): + values.append(1.0 - 0.0j) + formats.append("c24") + if hasattr(tb, "Complex256Col"): + values.append(1.0 - 0.0j) + formats.append("c32") + + r = np.rec.array([tuple(values)] * nrows, formats=",".join(formats)) + + # Assign the value exceptions + r["f1"][3] = 2 + r["f2"][4] = 3 + + # Read the table in another recarray + # r2 = table.read() + r2 = table[::] # Equivalent to table.read() + + # This generates too much output. Activate only when + # self.nrowsinbuf is very small (<10) + if common.verbose: + print("First 10 table values:") + for row in table.iterrows(0, 10): + print(row) + print("The first 5 read recarray values:") + print(r2[:5]) + print("Records should look like:") + print(r[:5]) + + for name1, name2 in zip(r.dtype.names, r2.dtype.names): + self.assertTrue(common.allequal(r[name1], r2[name2])) + + # The following can give false errors when columns with extended + # precision data type are present in the record. + # It is probably due to some difference in the value of bits used + # for patting (longdoubles use just 80 bits but are stored in 96 or + # 128 bits in numpy arrays) + # self.assertEqual(r.tobytes(), r2.tobytes()) + + def test01(self): + """Checking saving a Table with default values (using different Row)""" + + # Create a table + table = self.h5file.create_table( + self.h5file.root, "table", self.record + ) + + table.nrowsinbuf = 46 # minimum amount that reproduces a problem + # Take a number of records a bit greater + nrows = int(table.nrowsinbuf * 1.1) + # Fill the table with nrows records + for i in range(nrows): + if i == 3: + table.row["var2"] = 2 + if i == 4: + table.row["var3"] = 3 + # This injects the row values. + table.row.append() + + # We need to flush the buffers in table in order to get an + # accurate number of records on it. + table.flush() + + # Create a recarray with the same default values + values = [b"abcd", 1, 2, 3.1, 4.2, 5, "e", 1, 1j, 1 + 0j] + formats = "S4,i4,i2,f8,f4,u2,S1,b1,c8,c16".split(",") + + if hasattr(tb, "Float16Col"): + values.append(6.4) + formats.append("f2") + if hasattr(tb, "Float96Col"): + values.append(6.4) + formats.append("f12") + if hasattr(tb, "Float128Col"): + values.append(6.4) + formats.append("f16") + if hasattr(tb, "Complex192Col"): + values.append(1.0 - 0.0j) + formats.append("c24") + if hasattr(tb, "Complex256Col"): + values.append(1.0 - 0.0j) + formats.append("c32") + + r = np.rec.array([tuple(values)] * nrows, formats=",".join(formats)) + + # Assign the value exceptions + r["f1"][3] = 2 + r["f2"][4] = 3 + + # Read the table in another recarray + # r2 = table.read() + r2 = table[::] # Equivalent to table.read() + + # This generates too much output. Activate only when + # self.nrowsinbuf is very small (<10) + if common.verbose: + print("First 10 table values:") + for row in table.iterrows(0, 10): + print(row) + print("The first 5 read recarray values:") + print(r2[:5]) + print("Records should look like:") + print(r[:5]) + + for name1, name2 in zip(r.dtype.names, r2.dtype.names): + self.assertTrue(common.allequal(r[name1], r2[name2])) + + # The following can give false errors when columns with extended + # precision data type are present in the record. + # It is probably due to some difference in the value of bits used + # for patting (longdoubles use just 80 bits but are stored in 96 or + # 128 bits in numpy arrays) + # self.assertEqual(r.tobytes(), r2.tobytes()) + + +class OldRecordDefaultValues(DefaultValues): + title = "OldRecordDefaultValues" + record = OldRecord + + +class Record2(tb.IsDescription): + var1 = tb.StringCol(itemsize=4, dflt=b"abcd") # 4-character String + var2 = tb.IntCol(dflt=1) # integer + var3 = tb.Int16Col(dflt=2) # short integer + var4 = tb.Float64Col(dflt=3.1) # double (double-precision) + + +class LengthTestCase(common.TempFileMixin, common.PyTablesTestCase): + record = Record + nrows = 20 + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + self.rootgroup = self.h5file.root + self.populateFile() + + def populateFile(self): + # Create a table + table = self.h5file.create_table( + self.h5file.root, "table", self.record, title="__length__ test" + ) + # Get the row object associated with the new table + row = table.row + + # Fill the table + for i in range(self.nrows): + row.append() + + # Flush the buffer for this table + table.flush() + self.table = table + + def test01_lengthrows(self): + """Checking __length__ in Table.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_lengthrows..." % self.__class__.__name__) + + # Number of rows + len(self.table) == self.nrows + + def test02_lengthcols(self): + """Checking __length__ in Cols.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_lengthcols..." % self.__class__.__name__) + + # Number of columns + if self.record is Record: + len(self.table.cols) == 8 + elif self.record is Record2: + len(self.table.cols) == 4 + + def test03_lengthcol(self): + """Checking __length__ in Column.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_lengthcol..." % self.__class__.__name__) + + # Number of rows for all columns column + for colname in self.table.colnames: + len(getattr(self.table.cols, colname)) == self.nrows + + +class Length1TestCase(LengthTestCase): + record = Record + nrows = 20 + + +class Length2TestCase(LengthTestCase): + record = Record2 + nrows = 100 + + +class WhereAppendTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Tests `Table.append_where()` method.""" + + class SrcTblDesc(tb.IsDescription): + id = tb.IntCol() + v1 = tb.FloatCol() + v2 = tb.StringCol(itemsize=8) + + def setUp(self): + super().setUp() + + tbl = self.h5file.create_table("/", "test", self.SrcTblDesc) + row = tbl.row + + row["id"] = 1 + row["v1"] = 1.5 + row["v2"] = "a" * 8 + row.append() + + row["id"] = 2 + row["v1"] = 2.5 + row["v2"] = "b" * 6 + row.append() + + tbl.flush() + + def test00_same(self): + """Query with same storage.""" + + DstTblDesc = self.SrcTblDesc + + tbl1 = self.h5file.root.test + tbl2 = self.h5file.create_table("/", "test2", DstTblDesc) + + tbl1.append_where(tbl2, "id > 1") + + # Rows resulting from the query are those in the new table. + it2 = iter(tbl2) + for r1 in tbl1.where("id > 1"): + r2 = next(it2) + self.assertTrue( + r1["id"] == r2["id"] + and r1["v1"] == r2["v1"] + and r1["v2"] == r2["v2"] + ) + + # There are no more rows. + self.assertRaises(StopIteration, next, it2) + + def test01_compatible(self): + """Query with compatible storage.""" + + class DstTblDesc(tb.IsDescription): + id = tb.FloatCol() # float, not int + v1 = tb.FloatCol() + v2 = tb.StringCol(itemsize=16) # a longer column + v3 = tb.FloatCol() # extra column + + tbl1 = self.h5file.root.test + tbl2 = self.h5file.create_table("/", "test2", DstTblDesc) + + tbl1.append_where(tbl2, "id > 1") + + # Rows resulting from the query are those in the new table. + it2 = iter(tbl2) + for r1 in tbl1.where("id > 1"): + r2 = next(it2) + self.assertTrue( + r1["id"] == r2["id"] + and r1["v1"] == r2["v1"] + and r1["v2"] == r2["v2"] + ) + + # There are no more rows. + self.assertRaises(StopIteration, next, it2) + + def test02_lessPrecise(self): + """Query with less precise storage.""" + + class DstTblDesc(tb.IsDescription): + id = tb.IntCol() + v1 = tb.IntCol() # int, not float + v2 = tb.StringCol(itemsize=8) + + tbl1 = self.h5file.root.test + tbl2 = self.h5file.create_table("/", "test2", DstTblDesc) + + tbl1.append_where(tbl2, "id > 1") + + # Rows resulting from the query are those in the new table. + it2 = iter(tbl2) + for r1 in tbl1.where("id > 1"): + r2 = next(it2) + self.assertTrue( + r1["id"] == r2["id"] + and int(r1["v1"]) == r2["v1"] + and r1["v2"] == r2["v2"] + ) + + # There are no more rows. + self.assertRaises(StopIteration, next, it2) + + def test03_incompatible(self): + """Query with incompatible storage.""" + + class DstTblDesc(tb.IsDescription): + id = tb.StringCol(itemsize=4) # string, not int + v1 = tb.FloatCol() + v2 = tb.StringCol(itemsize=8) + + tbl1 = self.h5file.root.test + tbl2 = self.h5file.create_table("/", "test2", DstTblDesc) + + self.assertRaises( + NotImplementedError, tbl1.append_where, tbl2, 'v1 == b"1"' + ) + + def test04_noColumn(self): + """Query with storage lacking columns.""" + + class DstTblDesc(tb.IsDescription): + # no ``id`` field + v1 = tb.FloatCol() + v2 = tb.StringCol(itemsize=8) + + tbl1 = self.h5file.root.test + tbl2 = self.h5file.create_table("/", "test2", DstTblDesc) + + self.assertRaises(KeyError, tbl1.append_where, tbl2, "id > 1") + + def test05_otherFile(self): + """Appending to a table in another file.""" + + h5fname2 = tempfile.mktemp(suffix=".h5") + + try: + with tb.open_file(h5fname2, "w") as h5file2: + tbl1 = self.h5file.root.test + tbl2 = h5file2.create_table("/", "test", self.SrcTblDesc) + + # RW to RW. + tbl1.append_where(tbl2, "id > 1") + + # RW to RO. + with tb.open_file(h5fname2, "r") as h5file2: + tbl2 = h5file2.root.test + self.assertRaises( + tb.FileModeError, tbl1.append_where, tbl2, "id > 1" + ) + + # RO to RO. + self._reopen("r") + tbl1 = self.h5file.root.test + self.assertRaises( + tb.FileModeError, tbl1.append_where, tbl2, "id > 1" + ) + + # RO to RW. + with tb.open_file(h5fname2, "a") as h5file2: + tbl2 = h5file2.root.test + tbl1.append_where(tbl2, "id > 1") + finally: + if Path(h5fname2).is_file(): + Path(h5fname2).unlink() + + def test06_wholeTable(self): + """Append whole table.""" + + DstTblDesc = self.SrcTblDesc + + tbl1 = self.h5file.root.test + tbl2 = self.h5file.create_table("/", "test2", DstTblDesc) + + tbl1.append_where(tbl2) + + # Rows resulting from the query are those in the new table. + it2 = iter(tbl2) + for r1 in tbl1.__iter__(): + r2 = next(it2) + self.assertTrue( + r1["id"] == r2["id"] + and r1["v1"] == r2["v1"] + and r1["v2"] == r2["v2"] + ) + + # There are no more rows. + self.assertRaises(StopIteration, next, it2) + + +class DerivedTableTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + self.h5file.create_table("/", "original", Record) + + def test00(self): + """Deriving a table from the description of another.""" + + tbl1 = self.h5file.root.original + tbl2 = self.h5file.create_table("/", "derived", tbl1.description) + + self.assertEqual(tbl1.description, tbl2.description) + + +class ChunkshapeTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + self.h5file.create_table("/", "table", Record, chunkshape=13) + + def test00(self): + """Test setting the chunkshape in a table (no reopen).""" + + tbl = self.h5file.root.table + if common.verbose: + print("chunkshape-->", tbl.chunkshape) + self.assertEqual(tbl.chunkshape, (13,)) + + def test01(self): + """Test setting the chunkshape in a table (reopen).""" + + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, "r") + tbl = self.h5file.root.table + if common.verbose: + print("chunkshape-->", tbl.chunkshape) + self.assertEqual(tbl.chunkshape, (13,)) + + +# Test for appending zero-sized recarrays +class ZeroSizedTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + + # Create a Table + t = self.h5file.create_table( + "/", "table", {"c1": tb.Int32Col(), "c2": tb.Float64Col()} + ) + # Append a single row + t.append([(1, 2.2)]) + + def test01_canAppend(self): + """Appending zero length recarray.""" + + t = self.h5file.root.table + a = np.empty(shape=(0,), dtype="i4,f8") + t.append(a) + self.assertEqual(t.nrows, 1, "The number of rows should be 1.") + + +# Case for testing ticket #103, i.e. selections in columns which are +# aligned but that its data length is not an exact multiple of the +# length of the record. This exposes the problem only in 32-bit +# machines, because in 64-bit machine, 'c2' is unaligned. However, +# this should check most platforms where, while not unaligned, +# len(datatype) > boundary_alignment is fullfilled. +class IrregularStrideTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + + class IRecord(tb.IsDescription): + c1 = tb.Int32Col(pos=1) + c2 = tb.Float64Col(pos=2) + + table = self.h5file.create_table("/", "table", IRecord) + for i in range(10): + table.row["c1"] = i + table.row["c2"] = i + table.row.append() + table.flush() + + def test00(self): + """Selecting rows in a table with irregular stride (but aligned).""" + + table = self.h5file.root.table + coords1 = table.get_where_list("c1<5") + coords2 = table.get_where_list("c2<5") + if common.verbose: + print("\nSelected coords1-->", coords1) + print("Selected coords2-->", coords2) + self.assertTrue( + common.allequal(coords1, np.arange(5, dtype=tb.utils.SizeType)) + ) + self.assertTrue( + common.allequal(coords2, np.arange(5, dtype=tb.utils.SizeType)) + ) + + +class Issue262TestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + + class IRecord(tb.IsDescription): + c1 = tb.Int32Col(pos=1) + c2 = tb.Float64Col(pos=2) + + table = self.h5file.create_table("/", "table", IRecord) + table.nrowsinbuf = 3 + + for i in range(20): + table.row["c1"] = i + table.row["c2"] = i + table.row.append() + + table.row["c1"] = i % 29 + table.row["c2"] = 300 - i + table.row.append() + + table.row["c1"] = 300 - i + table.row["c2"] = 100 + i % 30 + table.row.append() + + table.flush() + + def test_gh260(self): + """Regression test for gh-260""" + + table = self.h5file.root.table + coords1 = table.get_where_list("(c1>5)&(c2<30)", start=0, step=2) + coords2 = table.get_where_list("(c1>5)&(c2<30)", start=1, step=2) + data = table.read() + data = data[np.where((data["c1"] > 5) & (data["c2"] < 30))] + + if common.verbose: + print() + print("Selected coords1-->", coords1) + print("Selected coords2-->", coords2) + print("Selected data-->", data) + self.assertEqual(len(coords1) + len(coords2), len(data)) + + def test_gh262_01(self): + """Regression test for gh-262 (start=0, step=1)""" + + table = self.h5file.root.table + data = table.get_where_list("(c1>5)&(~(c1>5))", start=0, step=1) + + if common.verbose: + print() + print("data -->", data) + self.assertEqual(len(data), 0) + + def test_gh262_02(self): + """Regression test for gh-262 (start=1, step=1)""" + + table = self.h5file.root.table + data = table.get_where_list("(c1>5)&(~(c1>5))", start=1, step=1) + + if common.verbose: + print() + print("data -->", data) + self.assertEqual(len(data), 0) + + def test_gh262_03(self): + """Regression test for gh-262 (start=0, step=2)""" + + table = self.h5file.root.table + data = table.get_where_list("(c1>5)&(~(c1>5))", start=0, step=2) + + if common.verbose: + print() + print("data -->", data) + self.assertEqual(len(data), 0) + + def test_gh262_04(self): + """Regression test for gh-262 (start=1, step=2)""" + + table = self.h5file.root.table + data = table.get_where_list("(c1>5)&(~(c1>5))", start=1, step=2) + + if common.verbose: + print() + print("data -->", data) + self.assertEqual(len(data), 0) + + +class TruncateTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + + table = self.h5file.create_table("/", "table", self.IRecord) + # Fill just a couple of rows + for i in range(2): + table.row["c1"] = i + table.row["c2"] = i + table.row.append() + table.flush() + # The defaults + self.dflts = table.coldflts + + def test00_truncate(self): + """Checking Table.truncate() method (truncating to 0 rows)""" + + table = self.h5file.root.table + # Truncate to 0 elements + table.truncate(0) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + table = self.h5file.root.table + + if common.verbose: + print("table-->", table.read()) + + self.assertEqual(table.nrows, 0) + for row in table: + self.assertEqual(row["c1"], row.nrow) + + def test01_truncate(self): + """Checking Table.truncate() method (truncating to 1 rows)""" + + table = self.h5file.root.table + # Truncate to 1 element + table.truncate(1) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + table = self.h5file.root.table + + if common.verbose: + print("table-->", table.read()) + + self.assertEqual(table.nrows, 1) + for row in table: + self.assertEqual(row["c1"], row.nrow) + + def test02_truncate(self): + """Checking Table.truncate() method (truncating to == self.nrows)""" + + table = self.h5file.root.table + # Truncate to 2 elements + table.truncate(2) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + table = self.h5file.root.table + + if common.verbose: + print("table-->", table.read()) + + self.assertEqual(table.nrows, 2) + for row in table: + self.assertEqual(row["c1"], row.nrow) + + def test03_truncate(self): + """Checking Table.truncate() method (truncating to > self.nrows)""" + + table = self.h5file.root.table + # Truncate to 4 elements + table.truncate(4) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + table = self.h5file.root.table + + if common.verbose: + print("table-->", table.read()) + + self.assertEqual(table.nrows, 4) + # Check the original values + for row in table.iterrows(start=0, stop=2): + self.assertEqual(row["c1"], row.nrow) + # Check that the added rows have the default values + for row in table.iterrows(start=2, stop=4): + self.assertEqual(row["c1"], self.dflts["c1"]) + self.assertEqual(row["c2"], self.dflts["c2"]) + + +class TruncateOpen1(TruncateTestCase): + class IRecord(tb.IsDescription): + c1 = tb.Int32Col(pos=1) + c2 = tb.FloatCol(pos=2) + + close = 0 + + +class TruncateOpen2(TruncateTestCase): + class IRecord(tb.IsDescription): + c1 = tb.Int32Col(pos=1, dflt=3) + c2 = tb.FloatCol(pos=2, dflt=-3.1) + + close = 0 + + +class TruncateClose1(TruncateTestCase): + class IRecord(tb.IsDescription): + c1 = tb.Int32Col(pos=1) + c2 = tb.FloatCol(pos=2) + + close = 1 + + +class TruncateClose2(TruncateTestCase): + class IRecord(tb.IsDescription): + c1 = tb.Int32Col(pos=1, dflt=4) + c2 = tb.FloatCol(pos=2, dflt=3.1) + + close = 1 + + +class PointSelectionTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + + N = 100 + + self.working_keyset = [ + [0, 1], + [0, -1], + ] + self.not_working_keyset = [ + [0, N], + [0, N + 1], + [0, -N - 1], + ] + + # Limits for selections + self.limits = [ + (0, 1), # just one element + (20, -10), # no elements + (-10, 4), # several elements + (0, 10), # several elements (again) + ] + + # Create a sample tables + self.data = data = np.arange(N) + self.recarr = recarr = np.empty(N, dtype="i4,f4") + recarr["f0"][:] = data + recarr["f1"][:] = data + self.table = self.h5file.create_table( + self.h5file.root, "table", recarr + ) + + def test01a_read(self): + """Test for point-selections (read, boolean keys).""" + + data = self.data + recarr = self.recarr + table = self.table + for value1, value2 in self.limits: + key = (data >= value1) & (data < value2) + if common.verbose: + print("Selection to test:", key) + a = recarr[key] + b = table[key] + if common.verbose: + print("NumPy selection:", a) + print("PyTables selection:", b) + np.testing.assert_array_equal( + a, b, "NumPy array and PyTables selections does not match." + ) + + def test01b_read(self): + """Test for point-selections (read, tuples of integers keys).""" + + data = self.data + recarr = self.recarr + table = self.table + for value1, value2 in self.limits: + key = np.where((data >= value1) & (data < value2)) + if common.verbose: + print("Selection to test:", key, type(key)) + a = recarr[key] + b = table[key] + np.testing.assert_array_equal( + a, b, "NumPy array and PyTables selections does not match." + ) + + def test01c_read(self): + """Test for point-selections (read, tuples of floats keys).""" + + data = self.data + recarr = self.recarr + table = self.table + for value1, value2 in self.limits: + key = np.where((data >= value1) & (data < value2)) + if common.verbose: + print("Selection to test:", key) + recarr[key] + fkey = np.array(key, "f4") + self.assertRaises(TypeError, table.__getitem__, fkey) + + def test01d_read(self): + """Test for point-selections (read, numpy keys).""" + + data = self.data + recarr = self.recarr + table = self.table + for value1, value2 in self.limits: + key = np.where((data >= value1) & (data < value2))[0] + if common.verbose: + print("Selection to test:", key, type(key)) + a = recarr[key] + b = table[key] + np.testing.assert_array_equal( + a, b, "NumPy array and PyTables selections does not match." + ) + + def test01e_read(self): + """Test for point-selections (read, list keys).""" + + data = self.data + recarr = self.recarr + table = self.table + for value1, value2 in self.limits: + key = np.where((data >= value1) & (data < value2))[0].tolist() + if common.verbose: + print("Selection to test:", key, type(key)) + a = recarr[key] + b = table[key] + np.testing.assert_array_equal( + a, b, "NumPy array and PyTables selections does not match." + ) + + def test01f_read(self): + recarr = self.recarr + table = self.table + + for key in self.working_keyset: + if common.verbose: + print("Selection to test:", key) + a = recarr[key] + b = table[key] + np.testing.assert_array_equal( + a, b, "NumPy array and PyTables selections does not match." + ) + + def test01g_read(self): + table = self.table + + for key in self.not_working_keyset: + if common.verbose: + print("Selection to test:", key) + + self.assertRaises(IndexError, table.__getitem__, key) + + def test02a_write(self): + """Test for point-selections (write, boolean keys).""" + + data = self.data + recarr = self.recarr + table = self.table + for value1, value2 in self.limits: + key = np.where((data >= value1) & (data < value2)) + if common.verbose: + print("Selection to test:", key) + s = recarr[key] + # Modify the s recarray + s["f0"][:] = data[: len(s)] * 2 + s["f1"][:] = data[: len(s)] * 3 + # Modify recarr and table + recarr[key] = s + table[key] = s + a = recarr[:] + b = table[:] + np.testing.assert_array_equal( + a, b, "NumPy array and PyTables modifications does not match." + ) + + def test02b_write(self): + """Test for point-selections (write, integer keys).""" + + data = self.data + recarr = self.recarr + table = self.table + for value1, value2 in self.limits: + key = np.where((data >= value1) & (data < value2)) + if common.verbose: + print("Selection to test:", key) + s = recarr[key] + # Modify the s recarray + s["f0"][:] = data[: len(s)] * 2 + s["f1"][:] = data[: len(s)] * 3 + # Modify recarr and table + recarr[key] = s + table[key] = s + a = recarr[:] + b = table[:] + np.testing.assert_array_equal( + a, b, "NumPy array and PyTables modifications does not match." + ) + + +# Test for building very large MD columns without defaults +class MDLargeColTestCase(common.TempFileMixin, common.PyTablesTestCase): + def test01_create(self): + """Create a Table with a very large MD column. Ticket #211.""" + N = 2**18 # 4x larger than maximum object header size (64 KB) + cols = {"col1": tb.Int8Col(shape=N, dflt=0)} + tbl = self.h5file.create_table("/", "test", cols) + tbl.row.append() # add a single row + tbl.flush() + if self.reopen: + self._reopen("a") + tbl = self.h5file.root.test + # Check the value + if common.verbose: + print("First row-->", tbl[0]["col1"]) + np.testing.assert_array_equal(tbl[0]["col1"], np.zeros(N, "i1")) + + +class MDLargeColNoReopen(MDLargeColTestCase): + reopen = False + + +class MDLargeColReopen(MDLargeColTestCase): + reopen = True + + +# Test with itertools.groupby that iterates on exhausted Row iterator +# See ticket #264. +class ExhaustedIter(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + + class Observations(tb.IsDescription): + market_id = tb.IntCol(pos=0) + scenario_id = tb.IntCol(pos=1) + value = tb.Float32Col(pos=3) + + table = self.h5file.create_table( + "/", "observations", Observations, chunkshape=32 + ) + + # fill the database + observations = np.arange(225) + row = table.row + for market_id in range(5): + for scenario_id in range(3): + for obs in observations: + row["market_id"] = market_id + row["scenario_id"] = scenario_id + row["value"] = obs + row.append() + table.flush() + + def average(self, values): + return sum(values, 0.0) / len(values) + + def f_scenario(self, row): + return row["scenario_id"] + + def test00_groupby(self): + """Checking iterating an exhausted iterator (ticket #264)""" + rows = self.h5file.root.observations.where("(market_id == 3)") + scenario_means = [] + for scenario_id, rows_grouped in itertools.groupby( + rows, self.f_scenario + ): + vals = [row["value"] for row in rows_grouped] + scenario_means.append(self.average(vals)) + if common.verbose: + print("Means -->", scenario_means) + self.assertEqual(scenario_means, [112.0, 112.0, 112.0]) + + def test01_groupby(self): + """Checking iterating an exhausted iterator (ticket #264). Reopen.""" + + self._reopen() + + rows = self.h5file.root.observations.where("(market_id == 3)") + scenario_means = [] + for scenario_id, rows_grouped in itertools.groupby( + rows, self.f_scenario + ): + vals = [row["value"] for row in rows_grouped] + scenario_means.append(self.average(vals)) + if common.verbose: + print("Means -->", scenario_means) + self.assertEqual(scenario_means, [112.0, 112.0, 112.0]) + + +class SpecialColnamesTestCase(common.TempFileMixin, common.PyTablesTestCase): + def test00_check_names(self): + f = self.h5file + a = np.array( + [(1, 2, 3)], dtype=[("a", int), ("_b", int), ("__c", int)] + ) + t = f.create_table(f.root, "test", a) + self.assertEqual(len(t.colnames), 3, "Number of columns incorrect") + if common.verbose: + print("colnames -->", t.colnames) + for name, name2 in zip(t.colnames, ("a", "_b", "__c")): + self.assertEqual(name, name2) + + +class RowContainsTestCase(common.TempFileMixin, common.PyTablesTestCase): + def test00_row_contains(self): + f = self.h5file + a = np.array([(1, 2, 3)], dtype="i1,i2,i4") + t = f.create_table(f.root, "test", a) + row = [r for r in t.iterrows()][0] + if common.verbose: + print("row -->", row[:]) + for item in (1, 2, 3): + self.assertIn(item, row) + self.assertNotIn(4, row) + + +class AccessClosedTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + self.table = self.h5file.create_table( + self.h5file.root, "table", Record + ) + + row = self.table.row + for i in range(10): + row["var1"] = "%04d" % i + row["var2"] = i + row["var3"] = i % 3 + row.append() + self.table.flush() + + def test_read(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.table.read) + + def test_getitem(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.table.__getitem__, 0) + + def test_setitem(self): + data = self.table[0] + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.table.__setitem__, 0, data) + + def test_append(self): + data = self.table[0] + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.table.append, data) + + def test_readWhere(self): + self.h5file.close() + self.assertRaises( + tb.ClosedNodeError, self.table.read_where, "var2 > 3" + ) + + def test_whereAppend(self): + self.h5file.close() + self.assertRaises( + tb.ClosedNodeError, self.table.append_where, self.table, "var2 > 3" + ) + + def test_getWhereList(self): + self.h5file.close() + self.assertRaises( + tb.ClosedNodeError, self.table.get_where_list, "var2 > 3" + ) + + def test_readSorted(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.table.read_sorted, "var2") + + def test_readCoordinates(self): + self.h5file.close() + self.assertRaises( + tb.ClosedNodeError, self.table.read_coordinates, [2, 5] + ) + + +class ColumnIterationTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + self.buffer_size = self.h5file.params["IO_BUFFER_SIZE"] + + def create_non_nested_table(self, nrows, dtype): + array = np.empty((nrows,), dtype) + for name in dtype.names: + array[name] = np.random.randint(0, 10_000, nrows) + table = self.h5file.create_table("/", "table", dtype) + table.append(array) + return array, table + + def iterate(self, array, table): + row_num = 0 + for item in table.cols.f0: + self.assertEqual(item, array["f0"][row_num]) + row_num += 1 + self.assertEqual(row_num, len(array)) + + def test_less_than_io_buffer(self): + dtype = np.rec.format_parser(["i8"] * 3, [], []).dtype + rows_in_buffer = self.buffer_size // dtype[0].itemsize + array, table = self.create_non_nested_table(rows_in_buffer // 2, dtype) + self.iterate(array, table) + + def test_more_than_io_buffer(self): + dtype = np.rec.format_parser(["i8"] * 3, [], []).dtype + rows_in_buffer = self.buffer_size // dtype[0].itemsize + array, table = self.create_non_nested_table(rows_in_buffer * 3, dtype) + self.iterate(array, table) + + def test_partially_filled_buffer(self): + dtype = np.rec.format_parser(["i8"] * 3, [], []).dtype + rows_in_buffer = self.buffer_size // dtype[0].itemsize + array, table = self.create_non_nested_table( + rows_in_buffer * 2 + 2, dtype + ) + self.iterate(array, table) + + def test_zero_length_table(self): + dtype = np.rec.format_parser(["i8"] * 3, [], []).dtype + array, table = self.create_non_nested_table(0, dtype) + self.assertEqual(len(table), 0) + self.iterate(array, table) + + +class TestCreateTableArgs(common.TempFileMixin, common.PyTablesTestCase): + obj = np.array( + [("aaaa", 1, 2.1), ("bbbb", 2, 3.2)], + dtype=[("name", "S4"), ("icol", np.int32), ("fcol", np.float32)], + ) + where = "/" + name = "table" + description, _ = tb.description.descr_from_dtype(obj.dtype) + title = "title" + filters = None + expectedrows = 10_000 + chunkshape = None + byteorder = None + createparents = False + + def test_positional_args_01(self): + self.h5file.create_table( + self.where, + self.name, + self.description, + self.title, + self.filters, + self.expectedrows, + ) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (0,)) + self.assertEqual(ptarr.nrows, 0) + self.assertEqual(tuple(ptarr.colnames), self.obj.dtype.names) + + def test_positional_args_02(self): + ptarr = self.h5file.create_table( + self.where, + self.name, + self.description, + self.title, + self.filters, + self.expectedrows, + ) + ptarr.append(self.obj) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (len(self.obj),)) + self.assertEqual(ptarr.nrows, len(self.obj)) + self.assertEqual(tuple(ptarr.colnames), self.obj.dtype.names) + self.assertEqual(nparr.dtype, self.obj.dtype) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_positional_args_obj(self): + self.h5file.create_table( + self.where, + self.name, + None, + self.title, + self.filters, + self.expectedrows, + self.chunkshape, + self.byteorder, + self.createparents, + self.obj, + ) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (len(self.obj),)) + self.assertEqual(ptarr.nrows, len(self.obj)) + self.assertEqual(tuple(ptarr.colnames), self.obj.dtype.names) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj(self): + self.h5file.create_table( + self.where, self.name, title=self.title, obj=self.obj + ) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (len(self.obj),)) + self.assertEqual(ptarr.nrows, len(self.obj)) + self.assertEqual(tuple(ptarr.colnames), self.obj.dtype.names) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_description_01(self): + ptarr = self.h5file.create_table( + self.where, + self.name, + title=self.title, + description=self.description, + ) + ptarr.append(self.obj) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (len(self.obj),)) + self.assertEqual(ptarr.nrows, len(self.obj)) + self.assertEqual(tuple(ptarr.colnames), self.obj.dtype.names) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_description_02(self): + ptarr = self.h5file.create_table( + self.where, + self.name, + title=self.title, + description=self.description, + ) + # ptarr.append(self.obj) + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (0,)) + self.assertEqual(ptarr.nrows, 0) + self.assertEqual(tuple(ptarr.colnames), self.obj.dtype.names) + + def test_kwargs_obj_description(self): + ptarr = self.h5file.create_table( + self.where, + self.name, + title=self.title, + obj=self.obj, + description=self.description, + ) + + self._reopen() + + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read() + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (len(self.obj),)) + self.assertEqual(ptarr.nrows, len(self.obj)) + self.assertEqual(tuple(ptarr.colnames), self.obj.dtype.names) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj_description_error_01(self): + self.assertRaises( + TypeError, + self.h5file.create_table, + self.where, + self.name, + title=self.title, + obj=self.obj, + description=Record, + ) + + def test_kwargs_obj_description_error_02(self): + self.assertRaises( + TypeError, + self.h5file.create_table, + self.where, + self.name, + title=self.title, + obj=self.obj, + description=Record(), + ) + + def test_kwargs_obj_description_error_03(self): + self.assertRaises( + TypeError, + self.h5file.create_table, + self.where, + self.name, + title=self.title, + obj=self.obj, + description=RecordDescriptionDict, + ) + + +class TestCreateTableColumnAttrs( + common.TempFileMixin, common.PyTablesTestCase +): + """ + Testing the attachment of column attributes (metadata) during table layout + creation using an `IsDescription` subclass. + """ + + where = "/" + name = "table" + freq_attrs = {"val": 13.3, "unit": "Hz", "description": "Ref. freq"} + labels_attrs = {"nbits": 10} + + def test_col_attr_01(self): + """ + Tests if the set column attrs set via `IsDescription` subclass are + available in the table. + """ + + class TableEntry(tb.IsDescription): + # Adding column attrs at description level + freq = tb.Float32Col(attrs=self.freq_attrs) + labels = tb.StringCol(itemsize=2, attrs=self.labels_attrs) + + self.h5file.create_table(self.where, self.name, TableEntry) + + self._reopen() + + table = self.h5file.get_node(self.where, self.name) + # for k, v in self.freq_attrs.items(): + # # self.assertTrue(table.cols.freq.attrs.contains(k)) + # self.assertTrue(table.cols.freq.attrs[k] == self.freq_attrs[k]) + for k, v in self.labels_attrs.items(): + # self.assertTrue(table.cols.labels.attrs.contains(k)) + self.assertTrue(table.cols.labels.attrs[k] == self.labels_attrs[k]) + + def test_col_attr_02(self): + """ + Tests if the `ColumnAttributeSet` works for adding and changing attrs + per column in the existing table. + """ + + class TableEntry(tb.IsDescription): + # Not adding attrs + freq = tb.Float32Col() + labels = tb.StringCol(itemsize=2) + + table = self.h5file.create_table(self.where, self.name, TableEntry) + for k, v in self.freq_attrs.items(): + table.cols.freq.attrs[k] = v + for k, v in self.labels_attrs.items(): + table.cols.labels.attrs[k] = v + + self._reopen() + + table = self.h5file.get_node(self.where, self.name) + for k, v in self.freq_attrs.items(): + self.assertTrue(table.cols.freq.attrs.contains(k)) + self.assertTrue(table.cols.freq.attrs[k] == self.freq_attrs[k]) + for k, v in self.labels_attrs.items(): + self.assertTrue(table.cols.labels.attrs.contains(k)) + self.assertTrue(table.cols.labels.attrs[k] == self.labels_attrs[k]) + + def test_col_attr_03(self): + """ + Similar test as *_02 but using the .name access. + """ + + class TableEntry(tb.IsDescription): + col1 = tb.Float32Col() + + table = self.h5file.create_table(self.where, self.name, TableEntry) + table.cols.col1.attrs.val = 1 + table.cols.col1.attrs.unit = "N" + + self._reopen() + + table = self.h5file.get_node(self.where, self.name) + self.assertTrue(table.cols.col1.attrs.val == 1) + self.assertTrue(table.cols.col1.attrs.unit == "N") + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + # common.heavy = 1 # uncomment this only for testing purposes + + for n in range(niter): + theSuite.addTest(common.make_suite(BasicWriteTestCase)) + theSuite.addTest(common.make_suite(OldRecordBasicWriteTestCase)) + theSuite.addTest(common.make_suite(DictWriteTestCase)) + theSuite.addTest(common.make_suite(NumPyDTWriteTestCase)) + theSuite.addTest(common.make_suite(RecArrayOneWriteTestCase)) + theSuite.addTest(common.make_suite(RecArrayTwoWriteTestCase)) + theSuite.addTest(common.make_suite(RecArrayThreeWriteTestCase)) + theSuite.addTest(common.make_suite(RecArrayAlignedWriteTestCase)) + theSuite.addTest(common.make_suite(CompressBloscTablesTestCase)) + theSuite.addTest(common.make_suite(CompressBlosc2TablesTestCase)) + theSuite.addTest(common.make_suite(CompressBloscShuffleTablesTestCase)) + theSuite.addTest( + common.make_suite(CompressBlosc2ShuffleTablesTestCase) + ) + theSuite.addTest( + common.make_suite(CompressBloscBitShuffleTablesTestCase) + ) + theSuite.addTest( + common.make_suite(CompressBlosc2BitShuffleTablesTestCase) + ) + theSuite.addTest(common.make_suite(CompressBloscBloscLZTablesTestCase)) + theSuite.addTest( + common.make_suite(CompressBlosc2BloscLZTablesTestCase) + ) + theSuite.addTest(common.make_suite(CompressBloscLZ4TablesTestCase)) + theSuite.addTest(common.make_suite(CompressBlosc2LZ4TablesTestCase)) + theSuite.addTest(common.make_suite(CompressBloscLZ4HCTablesTestCase)) + theSuite.addTest(common.make_suite(CompressBlosc2LZ4HCTablesTestCase)) + theSuite.addTest(common.make_suite(CompressBloscSnappyTablesTestCase)) + theSuite.addTest(common.make_suite(CompressBloscZlibTablesTestCase)) + theSuite.addTest(common.make_suite(CompressBlosc2ZlibTablesTestCase)) + theSuite.addTest(common.make_suite(CompressBloscZstdTablesTestCase)) + theSuite.addTest(common.make_suite(CompressBlosc2ZstdTablesTestCase)) + theSuite.addTest(common.make_suite(CompressLZOTablesTestCase)) + theSuite.addTest(common.make_suite(CompressLZOShuffleTablesTestCase)) + theSuite.addTest(common.make_suite(CompressZLIBTablesTestCase)) + theSuite.addTest(common.make_suite(CompressZLIBShuffleTablesTestCase)) + theSuite.addTest(common.make_suite(Fletcher32TablesTestCase)) + theSuite.addTest(common.make_suite(AllFiltersTablesTestCase)) + theSuite.addTest(common.make_suite(CompressTwoTablesTestCase)) + theSuite.addTest(common.make_suite(SizeOnDiskInMemoryPropertyTestCase)) + theSuite.addTest(common.make_suite(NonNestedTableReadTestCase)) + theSuite.addTest(common.make_suite(TableReadByteorderTestCase)) + theSuite.addTest(common.make_suite(IterRangeTestCase)) + theSuite.addTest(common.make_suite(RecArrayRangeTestCase)) + theSuite.addTest(common.make_suite(GetColRangeTestCase)) + theSuite.addTest(common.make_suite(GetItemTestCase)) + theSuite.addTest(common.make_suite(SetItemTestCase1)) + theSuite.addTest(common.make_suite(SetItemTestCase2)) + theSuite.addTest(common.make_suite(SetItemTestCase3)) + theSuite.addTest(common.make_suite(SetItemTestCase4)) + theSuite.addTest(common.make_suite(UpdateRowTestCase1)) + theSuite.addTest(common.make_suite(UpdateRowTestCase2)) + theSuite.addTest(common.make_suite(UpdateRowTestCase3)) + theSuite.addTest(common.make_suite(UpdateRowTestCase4)) + theSuite.addTest(common.make_suite(RecArrayIO1)) + theSuite.addTest(common.make_suite(RecArrayIO2)) + theSuite.addTest(common.make_suite(OpenCopyTestCase)) + theSuite.addTest(common.make_suite(CloseCopyTestCase)) + theSuite.addTest(common.make_suite(AlignedOpenCopyTestCase)) + theSuite.addTest(common.make_suite(AlignedCloseCopyTestCase)) + theSuite.addTest(common.make_suite(AlignedNoPaddingOpenCopyTestCase)) + theSuite.addTest(common.make_suite(CopyIndex1TestCase)) + theSuite.addTest(common.make_suite(CopyIndex2TestCase)) + theSuite.addTest(common.make_suite(CopyIndex3TestCase)) + theSuite.addTest(common.make_suite(CopyIndex4TestCase)) + theSuite.addTest(common.make_suite(CopyIndex5TestCase)) + theSuite.addTest(common.make_suite(CopyIndex6TestCase)) + theSuite.addTest(common.make_suite(CopyIndex7TestCase)) + theSuite.addTest(common.make_suite(CopyIndex8TestCase)) + theSuite.addTest(common.make_suite(CopyIndex9TestCase)) + theSuite.addTest(common.make_suite(DefaultValues)) + theSuite.addTest(common.make_suite(OldRecordDefaultValues)) + theSuite.addTest(common.make_suite(Length1TestCase)) + theSuite.addTest(common.make_suite(Length2TestCase)) + theSuite.addTest(common.make_suite(WhereAppendTestCase)) + theSuite.addTest(common.make_suite(DerivedTableTestCase)) + theSuite.addTest(common.make_suite(ChunkshapeTestCase)) + theSuite.addTest(common.make_suite(ZeroSizedTestCase)) + theSuite.addTest(common.make_suite(IrregularStrideTestCase)) + theSuite.addTest(common.make_suite(Issue262TestCase)) + theSuite.addTest(common.make_suite(TruncateOpen1)) + theSuite.addTest(common.make_suite(TruncateOpen2)) + theSuite.addTest(common.make_suite(TruncateClose1)) + theSuite.addTest(common.make_suite(TruncateClose2)) + theSuite.addTest(common.make_suite(PointSelectionTestCase)) + theSuite.addTest(common.make_suite(MDLargeColNoReopen)) + theSuite.addTest(common.make_suite(MDLargeColReopen)) + theSuite.addTest(common.make_suite(ExhaustedIter)) + theSuite.addTest(common.make_suite(SpecialColnamesTestCase)) + theSuite.addTest(common.make_suite(RowContainsTestCase)) + theSuite.addTest(common.make_suite(AccessClosedTestCase)) + theSuite.addTest(common.make_suite(ColumnIterationTestCase)) + theSuite.addTest(common.make_suite(TestCreateTableArgs)) + theSuite.addTest(common.make_suite(TestCreateTableColumnAttrs)) + + if common.heavy: + theSuite.addTest(common.make_suite(CompressBzip2TablesTestCase)) + theSuite.addTest(common.make_suite(CompressBzip2ShuffleTablesTestCase)) + theSuite.addTest(common.make_suite(CopyIndex10TestCase)) + theSuite.addTest(common.make_suite(CopyIndex11TestCase)) + theSuite.addTest(common.make_suite(CopyIndex12TestCase)) + theSuite.addTest(common.make_suite(LargeRowSize)) + theSuite.addTest(common.make_suite(BigTablesTestCase)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_tablesMD.py b/venv/Lib/site-packages/tables/tests/test_tablesMD.py new file mode 100644 index 0000000..c0be718 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_tablesMD.py @@ -0,0 +1,2510 @@ +import sys + +import numpy as np + +import tables as tb +from tables.tests import common + +# It is important that columns are ordered according to their names +# to ease the comparison with structured arrays. + + +# Test Record class +class Record(tb.IsDescription): + var0 = tb.StringCol(itemsize=4, dflt=b"", shape=2) # 4-char str array + var1 = tb.StringCol(itemsize=4, dflt=[b"abcd", b"efgh"], shape=(2, 2)) + var1_ = tb.IntCol(dflt=((1, 1),), shape=2) # integer array + var2 = tb.IntCol(dflt=((1, 1), (1, 1)), shape=(2, 2)) # integer array + var3 = tb.Int16Col(dflt=2) # short integer + var4 = tb.FloatCol(dflt=3.1) # double (double-precision) + var5 = tb.Float32Col(dflt=4.2) # float (single-precision) + var6 = tb.UInt16Col(dflt=5) # unsigned short integer + var7 = tb.StringCol(itemsize=1, dflt=b"e") # 1-character String + + +# Dictionary definition +RecordDescriptionDict = { + "var0": tb.StringCol(itemsize=4, dflt=b"", shape=2), # 4-char str array + "var1": tb.StringCol(itemsize=4, dflt=[b"abcd", b"efgh"], shape=(2, 2)), + # 'var0': StringCol(itemsize=4, shape=2), # 4-character String + # 'var1': StringCol(itemsize=4, shape=(2,2)), # 4-character String + "var1_": tb.IntCol(shape=2), # integer array + "var2": tb.IntCol(shape=(2, 2)), # integer array + "var3": tb.Int16Col(), # short integer + "var4": tb.FloatCol(), # double (double-precision) + "var5": tb.Float32Col(), # float (single-precision) + "var6": tb.Int16Col(), # unsigned short integer + "var7": tb.StringCol(itemsize=1), # 1-character String +} + +# Record class with numpy dtypes (mixed shapes is checked here) + + +class RecordDT(tb.IsDescription): + var0 = tb.Col.from_dtype(np.dtype("2S4"), dflt=b"") # shape in dtype + var1 = tb.Col.from_dtype( + np.dtype(("S4", (2, 2))), dflt=[b"abcd", b"efgh"] + ) # shape is a mix + var1_ = tb.Col.from_dtype( + np.dtype("2i4"), dflt=((1, 1),) + ) # shape in dtype + var2 = tb.Col.from_sctype( + "i4", shape=(2, 2), dflt=((1, 1), (1, 1)) + ) # shape is a mix + var3 = tb.Col.from_dtype(np.dtype("i2"), dflt=2) + var4 = tb.Col.from_dtype(np.dtype("2f8"), dflt=3.1) + var5 = tb.Col.from_dtype(np.dtype("f4"), dflt=4.2) + var6 = tb.Col.from_dtype(np.dtype("()u2"), dflt=5) + var7 = tb.Col.from_dtype(np.dtype("S1"), dflt=b"e") # no shape + + +class BasicTestCase(common.TempFileMixin, common.PyTablesTestCase): + # file = "test.h5" + open_mode = "w" + title = "This is the table title" + expectedrows = 100 + appendrows = 20 + compress = 0 + complib = "zlib" # Default compression library + record = Record + recarrayinit = 0 + maxshort = 1 << 15 + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + self.rootgroup = self.h5file.root + self.populateFile() + self.h5file.close() + + def initRecArray(self): + record = self.recordtemplate + row = record[0] + buflist = [] + # Fill the recarray + for i in range(self.expectedrows): + tmplist = [] + # Both forms (list or chararray) work + var0 = ["%04d" % (self.expectedrows - i)] * 2 + tmplist.append(var0) + var1 = [["%04d" % (self.expectedrows - i)] * 2] * 2 + tmplist.append(var1) + var1_ = (i, 1) + tmplist.append(var1_) + var2 = ((i, 1), (1, 1)) # *-* + tmplist.append(var2) + var3 = i % self.maxshort + tmplist.append(var3) + if isinstance(row["var4"], np.ndarray): + tmplist.append([float(i), float(i * i)]) + else: + tmplist.append(float(i)) + if isinstance(row["var5"], np.ndarray): + tmplist.append(np.array((float(i),) * 4)) + else: + tmplist.append(float(i)) + # var6 will be like var3 but byteswaped + tmplist.append(((var3 >> 8) & 0xFF) + ((var3 << 8) & 0xFF00)) + var7 = var1[0][0][-1] + tmplist.append(var7) + buflist.append(tuple(tmplist)) + + self.record = np.rec.array( + buflist, dtype=record.dtype, shape=self.expectedrows + ) + + def populateFile(self): + group = self.rootgroup + if self.recarrayinit: + # Initialize a starting buffer, if any + self.initRecArray() + for j in range(3): + # Create a table + filters = tb.Filters(complevel=self.compress, complib=self.complib) + if j < 2: + byteorder = sys.byteorder + else: + # table2 will be byteswapped + byteorder = {"little": "big", "big": "little"}[sys.byteorder] + table = self.h5file.create_table( + group, + "table" + str(j), + self.record, + title=self.title, + filters=filters, + expectedrows=self.expectedrows, + byteorder=byteorder, + ) + if not self.recarrayinit: + # Get the row object associated with the new table + row = table.row + + # Fill the table + for i in range(self.expectedrows): + s = "%04d" % (self.expectedrows - i) + row["var0"] = s.encode("ascii") + row["var1"] = s.encode("ascii") + row["var7"] = s[-1].encode("ascii") + row["var1_"] = (i, 1) + row["var2"] = ((i, 1), (1, 1)) # *-* + row["var3"] = i % self.maxshort + if isinstance(row["var4"], np.ndarray): + row["var4"] = [float(i), float(i * i)] + else: + row["var4"] = float(i) + if isinstance(row["var5"], np.ndarray): + row["var5"] = np.array((float(i),) * 4) + else: + row["var5"] = float(i) + # var6 will be like var3 but byteswaped + row["var6"] = ((row["var3"] >> 8) & 0xFF) + ( + (row["var3"] << 8) & 0xFF00 + ) + row.append() + + # Flush the buffer for this table + table.flush() + # Create a new group (descendant of group) + group2 = self.h5file.create_group(group, "group" + str(j)) + # Iterate over this new group (group2) + group = group2 + + def test00_description(self): + """Checking table description and descriptive fields.""" + + self.h5file = tb.open_file(self.h5fname) + + tbl = self.h5file.get_node("/table0") + desc = tbl.description + + if isinstance(self.record, dict): + columns = self.record + elif isinstance(self.record, np.ndarray): + descr, _ = tb.description.descr_from_dtype(self.record.dtype) + columns = descr._v_colobjects + elif isinstance(self.record, np.dtype): + descr, _ = tb.description.descr_from_dtype(self.record) + columns = descr._v_colobjects + else: + # This is an ordinary description. + columns = self.record.columns + + # Check table and description attributes at the same time. + # These checks are only valid for non-nested tb. + + # Column names. + expectedNames = [ + "var0", + "var1", + "var1_", + "var2", + "var3", + "var4", + "var5", + "var6", + "var7", + ] + self.assertEqual(expectedNames, list(tbl.colnames)) + self.assertEqual(expectedNames, list(desc._v_names)) + + # Column types. + expectedTypes = [columns[colname].dtype for colname in expectedNames] + self.assertEqual( + expectedTypes, [tbl.coldtypes[v] for v in expectedNames] + ) + self.assertEqual( + expectedTypes, [desc._v_dtypes[v] for v in expectedNames] + ) + + # Column string types. + expectedTypes = [columns[colname].type for colname in expectedNames] + self.assertEqual( + expectedTypes, [tbl.coltypes[v] for v in expectedNames] + ) + self.assertEqual( + expectedTypes, [desc._v_types[v] for v in expectedNames] + ) + + # Column defaults. + for v in expectedNames: + if common.verbose: + print("dflt-->", columns[v].dflt) + print("coldflts-->", tbl.coldflts[v]) + print("desc.dflts-->", desc._v_dflts[v]) + self.assertTrue( + common.areArraysEqual(tbl.coldflts[v], columns[v].dflt) + ) + self.assertTrue( + common.areArraysEqual(desc._v_dflts[v], columns[v].dflt) + ) + + # Column path names. + self.assertEqual(expectedNames, list(desc._v_pathnames)) + + # Column objects. + for colName in expectedNames: + expectedCol = columns[colName] + col = desc._v_colobjects[colName] + self.assertEqual(expectedCol.dtype, col.dtype) + self.assertEqual(expectedCol.type, col.type) + + def test01_readTable(self): + """Checking table read and cuts.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_readTable..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Choose a small value for buffer size + table.nrowsinbuf = 3 + # Read the records and select those with "var2" file less than 20 + result = [ + r["var2"][0][0] for r in table.iterrows() if r["var2"][0][0] < 20 + ] + + if common.verbose: + print("Table:", repr(table)) + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last record in table ==>", table[-1]) + print("Total selected records in table ==> ", len(result)) + nrows = self.expectedrows - 1 + r = [r for r in table.iterrows() if r["var2"][0][0] < 20][-1] + self.assertEqual( + ( + r["var0"][0], + r["var1"][0][0], + r["var1_"][0], + r["var2"][0][0], + r["var7"], + ), + (b"0001", b"0001", nrows, nrows, b"1"), + ) + if isinstance(r["var5"], np.ndarray): + self.assertTrue( + common.allequal(r["var5"], np.array((nrows,) * 4, np.float32)) + ) + else: + self.assertEqual(r["var5"], float(nrows)) + self.assertEqual(len(result), 20) + + def test01b_readTable(self): + """Checking table read and cuts (multidimensional columns case)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b_readTable..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + # Choose a small value for buffer size + table.nrowsinbuf = 3 + # Read the records and select those with "var2" file less than 20 + result1 = [r["var5"] for r in table.iterrows() if r["var2"][0][0] < 20] + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("Last record in table ==>", table[-1]) + print("Total selected records in table ==> ", len(result1)) + nrows = table.nrows + result2 = [r for r in table.iterrows() if r["var2"][0][0] < 20][-1] + if isinstance(result2["var5"], np.ndarray): + self.assertTrue( + common.allequal( + result1[0], np.array((float(0),) * 4, np.float32) + ) + ) + self.assertTrue( + common.allequal( + result1[1], np.array((float(1),) * 4, np.float32) + ) + ) + self.assertTrue( + common.allequal( + result1[2], np.array((float(2),) * 4, np.float32) + ) + ) + self.assertTrue( + common.allequal( + result1[3], np.array((float(3),) * 4, np.float32) + ) + ) + self.assertTrue( + common.allequal( + result1[10], np.array((float(10),) * 4, np.float32) + ) + ) + self.assertTrue( + common.allequal( + result2["var5"], + np.array((float(nrows - 1),) * 4, np.float32), + ) + ) + else: + self.assertEqual(result2["var5"], float(nrows - 1)) + self.assertEqual(len(result1), 20) + + # Read the records and select those with "var2" file less than 20 + result1 = [r["var1"] for r in table.iterrows() if r["var2"][0][0] < 20] + result2 = [r for r in table.iterrows() if r["var2"][0][0] < 20][-1] + + if result2["var1"].dtype.char == "S": + a = np.array([["%04d" % (self.expectedrows - 0)] * 2] * 2, "S") + self.assertTrue(common.allequal(result1[0], a)) + a = np.array([["%04d" % (self.expectedrows - 1)] * 2] * 2, "S") + self.assertTrue(common.allequal(result1[1], a)) + a = np.array([["%04d" % (self.expectedrows - 2)] * 2] * 2, "S") + self.assertTrue(common.allequal(result1[2], a)) + a = np.array([["%04d" % (self.expectedrows - 3)] * 2] * 2, "S") + self.assertTrue(common.allequal(result1[3], a)) + a = np.array([["%04d" % (self.expectedrows - 10)] * 2] * 2, "S") + self.assertTrue(common.allequal(result1[10], a)) + a = np.array([["%04d" % (1)] * 2] * 2, "S") + self.assertTrue(common.allequal(result2["var1"], a)) + else: + self.assertEqual(result1["var1"], "0001") + self.assertEqual(len(result1), 20) + + def test01c_readTable(self): + """Checking shape of multidimensional columns.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01c_readTable..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + if common.verbose: + print("var2 col shape:", table.cols.var2.shape) + print("Should be:", table.cols.var2[:].shape) + self.assertEqual(table.cols.var2.shape, table.cols.var2[:].shape) + + def test02_AppendRows(self): + """Checking whether appending record rows works or not.""" + + # Now, open it, but in "append" mode + self.h5file = tb.open_file(self.h5fname, mode="a") + self.rootgroup = self.h5file.root + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_AppendRows..." % self.__class__.__name__) + + # Get a table + table = self.h5file.get_node("/group0/table1") + # Get their row object + row = table.row + if common.verbose: + print("Nrows in old", table._v_pathname, ":", table.nrows) + print("Record Format ==>", table.description._v_nested_formats) + print("Record Size ==>", table.rowsize) + # Append some rows + for i in range(self.appendrows): + s = "%04d" % (self.appendrows - i) + row["var0"] = s.encode("ascii") + row["var1"] = s.encode("ascii") + row["var7"] = s[-1].encode("ascii") + row["var1_"] = (i, 1) + row["var2"] = ((i, 1), (1, 1)) # *-* + row["var3"] = i % self.maxshort + if isinstance(row["var4"], np.ndarray): + row["var4"] = [float(i), float(i * i)] + else: + row["var4"] = float(i) + if isinstance(row["var5"], np.ndarray): + row["var5"] = np.array((float(i),) * 4) + else: + row["var5"] = float(i) + row.append() + + # Flush the buffer for this table and read it + table.flush() + result = [ + r["var2"][0][0] for r in table.iterrows() if r["var2"][0][0] < 20 + ] + row = [r for r in table.iterrows() if r["var2"][0][0] < 20][-1] + + nrows = self.appendrows - 1 + self.assertEqual( + ( + row["var0"][0], + row["var1"][0][0], + row["var1_"][0], + row["var2"][0][0], + row["var7"], + ), + (b"0001", b"0001", nrows, nrows, b"1"), + ) + if isinstance(row["var5"], np.ndarray): + self.assertTrue( + common.allequal( + row["var5"], np.array((float(nrows),) * 4, np.float32) + ) + ) + else: + self.assertEqual(row["var5"], float(nrows)) + if self.appendrows <= 20: + add = self.appendrows + else: + add = 20 + self.assertEqual(len(result), 20 + add) # because we appended new rows + # del table + + # CAVEAT: The next test only works for tables with rows < 2**15 + def test03_endianess(self): + """Checking if table is endianess aware.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_endianess..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/group0/group1/table2") + + # Read the records and select the ones with "var3" column less than 20 + result = [r["var2"] for r in table.iterrows() if r["var3"] < 20] + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + print("On-disk byteorder ==>", table.byteorder) + print("Last record in table ==>", table[-1]) + print("Total selected records in table ==>", len(result)) + nrows = self.expectedrows - 1 + r = list(table.iterrows())[-1] + self.assertEqual((r["var1"][0][0], r["var3"]), (b"0001", nrows)) + self.assertEqual(len(result), 20) + + +class BasicWriteTestCase(BasicTestCase): + title = "BasicWrite" + + +class DictWriteTestCase(BasicTestCase): + # This checks also unidimensional arrays as columns + title = "DictWrite" + record = RecordDescriptionDict + nrows = 21 + nrowsinbuf = 3 # Choose a small value for the buffer size + start = 0 + stop = 10 + step = 3 + + +class RecordDTWriteTestCase(BasicTestCase): + title = "RecordDTWriteTestCase" + record = RecordDT + + +# Pure NumPy dtype +class NumPyDTWriteTestCase(BasicTestCase): + title = "NumPyDTWriteTestCase" + record = np.dtype("(2,)S4,(2,2)S4,(2,)i4,(2,2)i4,i2,2f8,f4,i2,S1") + record.names = "var0,var1,var1_,var2,var3,var4,var5,var6,var7".split(",") + + +class RecArrayOneWriteTestCase(BasicTestCase): + title = "RecArrayOneWrite" + record = np.rec.array( + None, + formats="(2,)S4,(2,2)S4,(2,)i4,(2,2)i4,i2,2f8,f4,i2,S1", + names="var0,var1,var1_,var2,var3,var4,var5,var6,var7", + shape=0, + ) + + +class RecArrayTwoWriteTestCase(BasicTestCase): + title = "RecArrayTwoWrite" + expectedrows = 100 + recarrayinit = 1 + recordtemplate = np.rec.array( + None, + formats="(2,)S4,(2,2)S4,(2,)i4,(2,2)i4,i2,f8,f4,i2,S1", + names="var0,var1,var1_,var2,var3,var4,var5,var6,var7", + shape=1, + ) + + +class RecArrayThreeWriteTestCase(BasicTestCase): + title = "RecArrayThreeWrite" + expectedrows = 100 + recarrayinit = 1 + recordtemplate = np.rec.array( + None, + formats="(2,)S4,(2,2)S4,(2,)i4,(2,2)i4,i2,2f8,4f4,i2,S1", + names="var0,var1,var1_,var2,var3,var4,var5,var6,var7", + shape=1, + ) + + +class RecArrayAlignedWriteTestCase(BasicTestCase): + title = "RecArrayThreeWrite" + expectedrows = 100 + recarrayinit = 1 + recordtemplate = np.rec.array( + None, + formats="(2,)S4,(2,2)S4,(2,)i4,(2,2)i4,i2,2f8,4f4,i2,S1", + names="var0,var1,var1_,var2,var3,var4,var5,var6,var7", + shape=1, + aligned=True, + ) + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class CompressBloscTablesTestCase(BasicTestCase): + title = "CompressBloscTables" + compress = 1 + complib = "blosc" + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class CompressLZOTablesTestCase(BasicTestCase): + title = "CompressLZOTables" + compress = 1 + complib = "lzo" + + +@common.unittest.skipIf( + not common.bzip2_avail, "BZIP2 compression library not available" +) +class CompressBzip2TablesTestCase(BasicTestCase): + title = "CompressBzip2Tables" + compress = 1 + complib = "bzip2" + + +class CompressZLIBTablesTestCase(BasicTestCase): + title = "CompressOneTables" + compress = 1 + complib = "zlib" + + +class CompressTwoTablesTestCase(BasicTestCase): + title = "CompressTwoTables" + compress = 1 + # This checks also unidimensional arrays as columns + record = RecordDescriptionDict + + +class BigTablesTestCase(BasicTestCase): + title = "BigTables" + # 10000 rows takes much more time than we can afford for tests + # reducing to 1000 would be more than enough + # F. Alted 2004-01-19 + + # expectedrows = 10000 + # appendrows = 1000 + expectedrows = 1000 + appendrows = 100 + + +class BasicRangeTestCase(common.TempFileMixin, common.PyTablesTestCase): + # file = "test.h5" + open_mode = "w" + title = "This is the table title" + record = Record + maxshort = 1 << 15 + expectedrows = 100 + compress = 0 + # Default values + nrows = 20 + nrowsinbuf = 3 # Choose a small value for the buffer size + start = 1 + stop = nrows + checkrecarray = 0 + checkgetCol = 0 + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + self.rootgroup = self.h5file.root + self.populateFile() + self.h5file.close() + + def populateFile(self): + group = self.rootgroup + for j in range(3): + # Create a table + table = self.h5file.create_table( + group, + "table" + str(j), + self.record, + title=self.title, + filters=tb.Filters(self.compress), + expectedrows=self.expectedrows, + ) + # Get the row object associated with the new table + row = table.row + + # Fill the table + for i in range(self.expectedrows): + row["var1"] = "%04d" % (self.expectedrows - i) + row["var7"] = row["var1"][0][0][-1] + row["var2"] = i + row["var3"] = i % self.maxshort + if isinstance(row["var4"], np.ndarray): + row["var4"] = [float(i), float(i * i)] + else: + row["var4"] = float(i) + if isinstance(row["var5"], np.ndarray): + row["var5"] = np.array((float(i),) * 4) + else: + row["var5"] = float(i) + # var6 will be like var3 but byteswaped + row["var6"] = ((row["var3"] >> 8) & 0xFF) + ( + (row["var3"] << 8) & 0xFF00 + ) + row.append() + + # Flush the buffer for this table + table.flush() + # Create a new group (descendant of group) + group2 = self.h5file.create_group(group, "group" + str(j)) + # Iterate over this new group (group2) + group = group2 + + def check_range(self): + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + table = self.h5file.get_node("/table0") + + table.nrowsinbuf = self.nrowsinbuf + resrange = slice(self.start, self.stop, self.step).indices(table.nrows) + reslength = len(list(range(*resrange))) + if self.checkrecarray: + recarray = table.read(self.start, self.stop, self.step) + result = [] + for nrec in range(len(recarray)): + if recarray["var2"][nrec][0][0] < self.nrows and 0 < self.step: + result.append(recarray["var2"][nrec][0][0]) + elif ( + recarray["var2"][nrec][0][0] > self.nrows and 0 > self.step + ): + result.append(recarray["var2"][nrec][0][0]) + elif self.checkgetCol: + column = table.read(self.start, self.stop, self.step, "var2") + result = [] + for nrec in range(len(column)): + if column[nrec][0][0] < self.nrows and 0 < self.step: # *-* + result.append(column[nrec][0][0]) # *-* + elif column[nrec][0][0] > self.nrows and 0 > self.step: # *-* + result.append(column[nrec][0][0]) # *-* + else: + if 0 < self.step: + result = [ + r["var2"][0][0] + for r in table.iterrows(self.start, self.stop, self.step) + if r["var2"][0][0] < self.nrows + ] + elif 0 > self.step: + result = [ + r["var2"][0][0] + for r in table.iterrows(self.start, self.stop, self.step) + if r["var2"][0][0] > self.nrows + ] + + if self.start < 0: + startr = self.expectedrows + self.start + else: + startr = self.start + + if self.stop is None: + if self.checkrecarray or self.checkgetCol: + # data read using the read method + stopr = startr + 1 + else: + # data read using the iterrows method + stopr = self.nrows + elif self.stop < 0: + stopr = self.expectedrows + self.stop + else: + stopr = self.stop + + if self.nrows < stopr: + stopr = self.nrows + + if common.verbose: + print("Nrows in", table._v_pathname, ":", table.nrows) + if reslength: + if self.checkrecarray: + print("Last record *read* in recarray ==>", recarray[-1]) + elif self.checkgetCol: + print("Last value *read* in getCol ==>", column[-1]) + else: + r = list(table.iterrows(self.start, self.stop, self.step))[ + -1 + ] + print("Last record *read* in table range ==>", r) + print("Total number of selected records ==>", len(result)) + print("Selected records:\n", result) + print( + "Selected records should look like:\n", + list(range(startr, stopr, self.step)), + ) + print("start, stop, step ==>", startr, stopr, self.step) + + self.assertEqual(result, list(range(startr, stopr, self.step))) + if not (self.checkrecarray or self.checkgetCol): + if startr < stopr and 0 < self.step: + r = [ + r["var2"] + for r in table.iterrows(self.start, self.stop, self.step) + if r["var2"][0][0] < self.nrows + ][-1] + if self.nrows > self.expectedrows: + self.assertEqual( + r[0][0], + list(range(self.start, self.stop, self.step))[-1], + ) + else: + self.assertEqual( + r[0][0], list(range(startr, stopr, self.step))[-1] + ) + elif startr > stopr and 0 > self.step: + r = [ + r["var2"] + for r in table.iterrows(self.start, self.stop, self.step) + if r["var2"][0][0] > self.nrows + ][0] + if self.nrows < self.expectedrows: + self.assertEqual( + r[0][0], + list(range(self.start, self.stop or -1, self.step))[0], + ) + else: + self.assertEqual( + r[0][0], list(range(startr, stopr or -1, self.step))[0] + ) + + # Close the file + self.h5file.close() + + def test01_range(self): + """Checking ranges in table iterators (case1)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_range..." % self.__class__.__name__) + + # Case where step < nrowsinbuf < 2 * step + self.nrows = 21 + self.nrowsinbuf = 3 + self.start = 0 + self.stop = self.expectedrows + self.step = 2 + + self.check_range() + + def test01a_range(self): + """Checking ranges in table iterators (case1)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_range..." % self.__class__.__name__) + + # Case where step < nrowsinbuf < 2 * step + self.nrows = 21 + self.nrowsinbuf = 3 + self.start = self.expectedrows - 1 + self.stop = None + self.step = -2 + + self.check_range() + + def test02_range(self): + """Checking ranges in table iterators (case2)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_range..." % self.__class__.__name__) + + # Case where step < nrowsinbuf < 10 * step + self.nrows = 21 + self.nrowsinbuf = 31 + self.start = 11 + self.stop = self.expectedrows + self.step = 3 + + self.check_range() + + def test03_range(self): + """Checking ranges in table iterators (case3)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_range..." % self.__class__.__name__) + + # Case where step < nrowsinbuf < 1.1 * step + self.nrows = self.expectedrows + self.nrowsinbuf = 11 # Choose a small value for the buffer size + self.start = 0 + self.stop = self.expectedrows + self.step = 10 + + self.check_range() + + def test04_range(self): + """Checking ranges in table iterators (case4)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_range..." % self.__class__.__name__) + + # Case where step == nrowsinbuf + self.nrows = self.expectedrows + self.nrowsinbuf = 11 # Choose a small value for the buffer size + self.start = 1 + self.stop = self.expectedrows + self.step = 11 + + self.check_range() + + def test05_range(self): + """Checking ranges in table iterators (case5)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05_range..." % self.__class__.__name__) + + # Case where step > 1.1 * nrowsinbuf + self.nrows = 21 + self.nrowsinbuf = 10 # Choose a small value for the buffer size + self.start = 1 + self.stop = self.expectedrows + self.step = 11 + + self.check_range() + + def test06_range(self): + """Checking ranges in table iterators (case6)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06_range..." % self.__class__.__name__) + + # Case where step > 3 * nrowsinbuf + self.nrows = 3 + self.nrowsinbuf = 3 # Choose a small value for the buffer size + self.start = 2 + self.stop = self.expectedrows + self.step = 10 + + self.check_range() + + def test07_range(self): + """Checking ranges in table iterators (case7)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test07_range..." % self.__class__.__name__) + + # Case where start == stop + self.nrows = 2 + self.nrowsinbuf = 3 # Choose a small value for the buffer size + self.start = self.nrows + self.stop = self.nrows + self.step = 10 + + self.check_range() + + def test08_range(self): + """Checking ranges in table iterators (case8)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08_range..." % self.__class__.__name__) + + # Case where start > stop + self.nrows = 2 + self.nrowsinbuf = 3 # Choose a small value for the buffer size + self.start = self.nrows + 1 + self.stop = self.nrows + self.step = 1 + + self.check_range() + + def test09_range(self): + """Checking ranges in table iterators (case9)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test09_range..." % self.__class__.__name__) + + # Case where stop = None + self.nrows = 100 + self.nrowsinbuf = 3 # Choose a small value for the buffer size + self.start = 1 + self.stop = 2 + self.step = 1 + + self.check_range() + + def test10_range(self): + """Checking ranges in table iterators (case10)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test10_range..." % self.__class__.__name__) + + # Case where start < 0 and stop = 0 + self.nrows = self.expectedrows + self.nrowsinbuf = 5 # Choose a small value for the buffer size + self.start = -6 + self.startr = self.expectedrows + self.start + self.stop = 0 + self.stopr = self.expectedrows + self.stop + self.step = 2 + + self.check_range() + + def test11_range(self): + """Checking ranges in table iterators (case11)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test11_range..." % self.__class__.__name__) + + # Case where start < 0 and stop < 0 + self.nrows = self.expectedrows + self.nrowsinbuf = 5 # Choose a small value for the buffer size + self.start = -6 + self.startr = self.expectedrows + self.start + self.stop = -2 + self.stopr = self.expectedrows + self.stop + self.step = 1 + + self.check_range() + + def test12_range(self): + """Checking ranges in table iterators (case12)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test12_range..." % self.__class__.__name__) + + # Case where start < 0 and stop < 0 and start > stop + self.nrows = self.expectedrows + self.nrowsinbuf = 5 # Choose a small value for the buffer size + self.start = -1 + self.startr = self.expectedrows + self.start + self.stop = -2 + self.stopr = self.expectedrows + self.stop + self.step = 1 + + self.check_range() + + def test13_range(self): + """Checking ranges in table iterators (case13)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test13_range..." % self.__class__.__name__) + + # Case where step < 0 + self.step = -11 + try: + self.check_range() + except ValueError: + if common.verbose: + type, value, traceback = sys.exc_info() + print("\nGreat!, the next ValueError was catched!") + self.h5file.close() + # else: + # self.fail("expected a ValueError") + + # Case where step == 0 + self.step = 0 + try: + self.check_range() + except ValueError: + if common.verbose: + type, value, traceback = sys.exc_info() + print("\nGreat!, the next ValueError was catched!") + self.h5file.close() + # else: + # self.fail("expected a ValueError") + + +class IterRangeTestCase(BasicRangeTestCase): + pass + + +class RecArrayRangeTestCase(BasicRangeTestCase): + checkrecarray = 1 + + +class GetColRangeTestCase(BasicRangeTestCase): + checkgetCol = 1 + + def test01_nonexistentField(self): + """Checking non-existing Field in getCol method""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01_nonexistentField..." + % self.__class__.__name__ + ) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + self.root = self.h5file.root + table = self.h5file.get_node("/table0") + + with self.assertRaises(KeyError): + table.read(field="non-existent-column") + + +class Rec(tb.IsDescription): + col1 = tb.IntCol(pos=1, shape=(2,)) + col2 = tb.StringCol(itemsize=3, pos=2, shape=(3,)) + col3 = tb.FloatCol(pos=3, shape=(3, 2)) + + +class RecArrayIO(common.TempFileMixin, common.PyTablesTestCase): + + def test00(self): + """Checking saving a normal recarray""" + + # Create a recarray + intlist1 = [[456, 23] * 3] * 2 + intlist2 = np.array([[2, 2] * 3] * 2, dtype=int) + arrlist1 = [["dbe"] * 2] * 3 + arrlist2 = [["de"] * 2] * 3 + floatlist1 = [[1.2, 2.3] * 3] * 4 + floatlist2 = np.array([[4.5, 2.4] * 3] * 4) + b = [ + (intlist1, arrlist1, floatlist1), + (intlist2, arrlist2, floatlist2), + ] + r = np.rec.array( + b, formats="(2,6)i4,(3,2)S3,(4,6)f8", names="col1,col2,col3" + ) + + # Save it in a table: + self.h5file.create_table(self.h5file.root, "recarray", r) + + # Read it again + r2 = self.h5file.root.recarray.read() + + self.assertEqual(r.tobytes(), r2.tobytes()) + + def test01(self): + """Checking saving a recarray with an offset in its buffer""" + + # Create a recarray + intlist1 = [[456, 23] * 3] * 2 + intlist2 = np.array([[2, 2] * 3] * 2, dtype=int) + arrlist1 = [["dbe"] * 2] * 3 + arrlist2 = [["de"] * 2] * 3 + floatlist1 = [[1.2, 2.3] * 3] * 4 + floatlist2 = np.array([[4.5, 2.4] * 3] * 4) + b = [ + (intlist1, arrlist1, floatlist1), + (intlist2, arrlist2, floatlist2), + ] + r = np.rec.array( + b, formats="(2,6)i4,(3,2)S3,(4,6)f8", names="col1,col2,col3" + ) + + # Get a view of the recarray + r1 = r[1:] + + # Save it in a table: + self.h5file.create_table(self.h5file.root, "recarray", r1) + + # Read it again + r2 = self.h5file.root.recarray.read() + + self.assertEqual(r1.tobytes(), r2.tobytes()) + + def test02(self): + """Checking saving a slice of a large recarray""" + + # Create a recarray + intlist1 = [[[23, 24, 35] * 6] * 6] + intlist2 = np.array([[[2, 3, 4] * 6] * 6], dtype=int) + arrlist1 = [["dbe"] * 2] * 3 + arrlist2 = [["de"] * 2] * 3 + floatlist1 = [[1.2, 2.3] * 3] * 4 + floatlist2 = np.array([[4.5, 2.4] * 3] * 4) + b = [ + (intlist1, arrlist1, floatlist1), + (intlist2, arrlist2, floatlist2), + ] + r = np.rec.array( + b * 300, + formats="(1,6,18)i4,(3,2)S3,(4,6)f8", + names="col1,col2,col3", + ) + + # Get a slice of recarray + r1 = r[290:292] + + # Save it in a table: + self.h5file.create_table(self.h5file.root, "recarray", r1) + + # Read it again + r2 = self.h5file.root.recarray.read() + + self.assertEqual(r1.tobytes(), r2.tobytes()) + + def test03(self): + """Checking saving a slice of a strided recarray""" + + # Create a recarray + intlist1 = [[[23, 24, 35] * 6] * 6] + intlist2 = np.array([[[2, 3, 4] * 6] * 6], dtype=int) + arrlist1 = [["dbe"] * 2] * 3 + arrlist2 = [["de"] * 2] * 3 + floatlist1 = [[1.2, 2.3] * 3] * 4 + floatlist2 = np.array([[4.5, 2.4] * 3] * 4) + b = [ + (intlist1, arrlist1, floatlist1), + (intlist2, arrlist2, floatlist2), + ] + r = np.rec.array( + b * 300, + formats="(1,6,18)i4,(3,2)S3,(4,6)f8", + names="col1,col2,col3", + shape=600, + ) + + # Get a strided recarray + r2 = r[::2] + + # Get a slice + r1 = r2[148:] + + # Save it in a table: + self.h5file.create_table(self.h5file.root, "recarray", r1) + + # Read it again + r2 = self.h5file.root.recarray.read() + + self.assertEqual(r1.tobytes(), r2.tobytes()) + + def test08a(self): + """Checking modifying one column (single column version, list)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08a..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # Append new rows + s0, s1, s2, s3 = ["dbe"] * 3, ["ded"] * 3, ["db1"] * 3, ["de1"] * 3 + f0, f1, f2, f3 = ( + [[1.2] * 2] * 3, + [[1.3] * 2] * 3, + [[1.4] * 2] * 3, + [[1.5] * 2] * 3, + ) + r = np.rec.array( + [([456, 457], s0, f0), ([2, 3], s1, f1)], + formats="(2,)i4,(3,)S3,(3,2)f8", + ) + table.append(r) + table.append([([457, 458], s2, f2), ([5, 6], s3, f3)]) + + # Modify just one existing column + table.cols.col1[1:] = [[[2, 3], [3, 4], [4, 5]]] + + # Create the modified recarray + r1 = np.rec.array( + [ + ([456, 457], s0, f0), + ([2, 3], s1, f1), + ([3, 4], s2, f2), + ([4, 5], s3, f3), + ], + formats="(2,)i4,(3,)S3,(3,2)f8", + names="col1,col2,col3", + ) + + # Read the modified table + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test08b(self): + """Checking modifying one column (single column version, recarray)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08b..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # Append new rows + s0, s1, s2, s3 = ["dbe"] * 3, ["ded"] * 3, ["db1"] * 3, ["de1"] * 3 + f0, f1, f2, f3 = ( + [[1.2] * 2] * 3, + [[1.3] * 2] * 3, + [[1.4] * 2] * 3, + [[1.5] * 2] * 3, + ) + r = np.rec.array( + [([456, 457], s0, f0), ([2, 3], s1, f1)], + formats="(2,)i4,(3,)S3,(3,2)f8", + ) + table.append(r) + table.append([([457, 458], s2, f2), ([5, 6], s3, f3)]) + + # Modify just one existing column + columns = np.rec.fromarrays( + np.array([[[2, 3], [3, 4], [4, 5]]]), formats="i4" + ) + table.modify_columns(start=1, columns=columns, names=["col1"]) + + # Create the modified recarray + r1 = np.rec.array( + [ + ([456, 457], s0, f0), + ([2, 3], s1, f1), + ([3, 4], s2, f2), + ([4, 5], s3, f3), + ], + formats="(2,)i4,(3,)S3,(3,2)f8", + names="col1,col2,col3", + ) + + # Read the modified table + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test08b2(self): + """Checking modifying one column (single column version, recarray, + modify_column)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test08b2..." % self.__class__.__name__) + + # Create a new table: + table = self.h5file.create_table(self.h5file.root, "recarray", Rec) + + # Append new rows + s0, s1, s2, s3 = ["dbe"] * 3, ["ded"] * 3, ["db1"] * 3, ["de1"] * 3 + f0, f1, f2, f3 = ( + [[1.2] * 2] * 3, + [[1.3] * 2] * 3, + [[1.4] * 2] * 3, + [[1.5] * 2] * 3, + ) + r = np.rec.array( + [([456, 457], s0, f0), ([2, 3], s1, f1)], + formats="(2,)i4,(3,)S3,(3,2)f8", + ) + table.append(r) + table.append([([457, 458], s2, f2), ([5, 6], s3, f3)]) + + # Modify just one existing column + columns = np.rec.fromarrays( + np.array([[[2, 3], [3, 4], [4, 5]]]), formats="i4" + ) + table.modify_column(start=1, column=columns, colname="col1") + + # Create the modified recarray + r1 = np.rec.array( + [ + ([456, 457], s0, f0), + ([2, 3], s1, f1), + ([3, 4], s2, f2), + ([4, 5], s3, f3), + ], + formats="(2,)i4,(3,)S3,(3,2)f8", + names="col1,col2,col3", + ) + + # Read the modified table + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + +class DefaultValues(common.TempFileMixin, common.PyTablesTestCase): + + def test00(self): + """Checking saving a Table MD with default values""" + + # Create a table + table = self.h5file.create_table(self.h5file.root, "table", Record) + + # Take a number of records a bit large + # nrows = int(table.nrowsinbuf * 1.1) + nrows = 5 # for test + # Fill the table with nrows records + for i in range(nrows): + if i == 3 or i == 4: + table.row["var2"] = ((2, 2), (2, 2)) # *-* + # This injects the row values. + table.row.append() + + # We need to flush the buffers in table in order to get an + # accurate number of records on it. + table.flush() + + # Create a recarray with the same default values + buffer = [ + ( + ["\x00"] + * 2, # just "" does not initialize the buffer properly + [["abcd", "efgh"]] * 2, + (1, 1), + ((1, 1), (1, 1)), + 2, + 3.1, + 4.2, + 5, + "e", + ) + ] + r = np.rec.array( + buffer * nrows, + formats="(2,)S4,(2,2)S4,(2,)i4,(2,2)i4,i2,f8,f4,u2,S1", + names=[ + "var0", + "var1", + "var1_", + "var2", + "var3", + "var4", + "var5", + "var6", + "var7", + ], + ) # *-* + + # Assign the value exceptions + r["var2"][3] = ((2, 2), (2, 2)) # *-* + r["var2"][4] = ((2, 2), (2, 2)) # *-* + + # Read the table in another recarray + r2 = table.read() + + # This generates too much output. Activate only when + # self.nrowsinbuf is very small (<10) + if common.verbose and 1: + print("Table values:") + print(r2) + print("Record values:") + print(r) + + # Both checks do work, however, tobytes() seems more stringent. + self.assertEqual(r.tobytes(), r2.tobytes()) + # self.assertTrue(common.areArraysEqual(r,r2)) + + +class RecordT(tb.IsDescription): + var0 = tb.IntCol(dflt=1, shape=()) # native int + var1 = tb.IntCol(dflt=[1], shape=(1,)) # 1-D int (one element) + var2_s = tb.IntCol(dflt=[1, 1], shape=2) # 1-D int (two elements) + var2 = tb.IntCol(dflt=[1, 1], shape=(2,)) # 1-D int (two elements) + var3 = tb.IntCol(dflt=[[0, 0], [1, 1]], shape=(2, 2)) # 2-D int + + +class ShapeTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + self.populateFile() + + def populateFile(self): + table = self.h5file.create_table(self.h5file.root, "table", RecordT) + row = table.row + # Fill the table with some rows with default values + for i in range(1): + row.append() + + # Flush the buffer for this table + table.flush() + + def test00(self): + """Checking scalar shapes""" + + if self.reopen: + self._reopen() + table = self.h5file.root.table + + if common.verbose: + print("The values look like:", table.cols.var0[:]) + print("They should look like:", [1]) + + # The real check + self.assertEqual(table.cols.var0[:].tolist(), [1]) + + def test01(self): + """Checking undimensional (one element) shapes""" + + if self.reopen: + self._reopen() + table = self.h5file.root.table + + if common.verbose: + print("The values look like:", table.cols.var1[:]) + print("They should look like:", [[1]]) + + # The real check + self.assertEqual(table.cols.var1[:].tolist(), [[1]]) + + def test02(self): + """Checking undimensional (two elements) shapes""" + + if self.reopen: + self._reopen() + table = self.h5file.root.table + + if common.verbose: + print("The values look like:", table.cols.var2[:]) + print("They should look like:", [[1, 1]]) + + # The real check + self.assertEqual(table.cols.var2[:].tolist(), [[1, 1]]) + self.assertEqual(table.cols.var2_s[:].tolist(), [[1, 1]]) + + def test03(self): + """Checking bidimensional shapes""" + + if self.reopen: + self._reopen() + table = self.h5file.root.table + + if common.verbose: + print("The values look like:", table.cols.var3[:]) + print("They should look like:", [[[0, 0], [1, 1]]]) + + # The real check + self.assertEqual(table.cols.var3[:].tolist(), [[[0, 0], [1, 1]]]) + + +class ShapeTestCase1(ShapeTestCase): + reopen = 0 + + +class ShapeTestCase2(ShapeTestCase): + reopen = 1 + + +class SetItemTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + + # Create a new table: + self.table = self.h5file.create_table( + self.h5file.root, "recarray", Rec + ) + self.table.nrowsinbuf = self.buffersize # set buffer value + + def test01(self): + """Checking modifying one table row with __setitem__""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify just one existing row + table[2] = (456, "db2", 1.2) + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (2, "ded", 1.3), + (456, "db2", 1.2), + (5, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test01b(self): + """Checking modifying one table row with __setitem__ (long index)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify just one existing row + table[2] = (456, "db2", 1.2) + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (2, "ded", 1.3), + (456, "db2", 1.2), + (5, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test02(self): + """Modifying one row, with a step (__setitem__)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify two existing rows + rows = np.rec.array([(457, "db1", 1.2)], formats=formats) + table[1:3:2] = rows + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (457, "db1", 1.2), + (457, "db1", 1.2), + (5, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test03(self): + """Checking modifying several rows at once (__setitem__)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify two existing rows + rows = np.rec.array( + [(457, "db1", 1.2), (5, "de1", 1.3)], formats=formats + ) + + # table.modify_rows(start=1, rows=rows) + table[1:3] = rows + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (457, "db1", 1.2), + (5, "de1", 1.3), + (5, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test04(self): + """Modifying several rows at once, with a step (__setitem__)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify two existing rows + rows = np.rec.array( + [(457, "db1", 1.2), (6, "de2", 1.3)], formats=formats + ) + # table[1:4:2] = rows + table[1::2] = rows + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (457, "db1", 1.2), + (457, "db1", 1.2), + (6, "de2", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test05(self): + """Checking modifying one column (single element, __setitem__)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify just one existing column + table.cols.col1[1] = -1 + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (-1, "ded", 1.3), + (457, "db1", 1.2), + (5, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test06a(self): + """Checking modifying one column (several elements, __setitem__)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify just one existing column + table.cols.col1[1:4] = [(2, 2), (3, 3), (4, 4)] + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (2, "ded", 1.3), + (3, "db1", 1.2), + (4, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test06b(self): + """Checking modifying one column (iterator, __setitem__)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify just one existing column + with self.assertRaises(NotImplementedError): + for row in table.iterrows(): + row["col1"] = row.nrow + 1 + row.append() + table.flush() + + def test07(self): + """Modifying one column (several elements, __setitem__, step)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (1, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify just one existing column + table.cols.col1[1:4:2] = [(2, 2), (3, 3)] + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (2, "ded", 1.3), + (457, "db1", 1.2), + (3, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test08(self): + """Modifying one column (one element, __setitem__, step)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify just one existing column + table.cols.col1[1:4:3] = [(2, 2)] + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (2, "ded", 1.3), + (457, "db1", 1.2), + (5, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test09(self): + """Modifying beyond the table extend (__setitem__, step)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Try to modify beyond the extent + # This will silently exclude the non-fitting rows + rows = np.rec.array( + [(457, "db1", 1.2), (6, "de2", 1.3)], formats=formats + ) + table[1::2] = rows + + # How it should look like + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (457, "db1", 1.2), + (457, "db1", 1.2), + (6, "de2", 1.3), + ], + formats=formats, + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + +class SetItemTestCase1(SetItemTestCase): + reopen = 0 + buffersize = 1 + + +class SetItemTestCase2(SetItemTestCase): + reopen = 1 + buffersize = 2 + + +class SetItemTestCase3(SetItemTestCase): + reopen = 0 + buffersize = 1000 + + +class SetItemTestCase4(SetItemTestCase): + reopen = 1 + buffersize = 1000 + + +class UpdateRowTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + + # Create a new table: + self.table = self.h5file.create_table( + self.h5file.root, "recarray", Rec + ) + self.table.nrowsinbuf = self.buffersize # set buffer value + + def test01(self): + """Checking modifying one table row with Row.update""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify just one existing row + for row in table.iterrows(2, 3): + row["col1"], row["col2"], row["col3"] = [456, "db2", 1.2] + row.update() + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (2, "ded", 1.3), + (456, "db2", 1.2), + (5, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test02(self): + """Modifying one row, with a step (Row.update)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify two existing rows + for row in table.iterrows(1, 3, 2): + if row.nrow == 1: + row["col1"], row["col2"], row["col3"] = (457, "db1", 1.2) + elif row.nrow == 3: + row["col1"], row["col2"], row["col3"] = (6, "de2", 1.3) + row.update() + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (457, "db1", 1.2), + (457, "db1", 1.2), + (5, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test03(self): + """Checking modifying several rows at once (Row.update)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify two existing rows + for row in table.iterrows(1, 3): + if row.nrow == 1: + row["col1"], row["col2"], row["col3"] = (457, "db1", 1.2) + elif row.nrow == 2: + row["col1"], row["col2"], row["col3"] = (5, "de1", 1.3) + row.update() + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (457, "db1", 1.2), + (5, "de1", 1.3), + (5, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test04(self): + """Modifying several rows at once, with a step (Row.update)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify two existing rows + for row in table.iterrows(1, stop=4, step=2): + if row.nrow == 1: + row["col1"], row["col2"], row["col3"] = (457, "db1", 1.2) + elif row.nrow == 3: + row["col1"], row["col2"], row["col3"] = (6, "de2", 1.3) + row.update() + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (457, "db1", 1.2), + (457, "db1", 1.2), + (6, "de2", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test05(self): + """Checking modifying one column (single element, Row.update)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify just one existing column + for row in table.iterrows(1, 2): + row["col1"] = -1 + row.update() + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (-1, "ded", 1.3), + (457, "db1", 1.2), + (5, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test06(self): + """Checking modifying one column (several elements, Row.update)""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (2, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify just one existing column + for row in table.iterrows(1, 4): + row["col1"] = row.nrow + 1 + row.update() + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (2, "ded", 1.3), + (3, "db1", 1.2), + (4, "de1", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test07(self): + """Modifying values from a selection""" + + table = self.table + formats = table.description._v_nested_formats + + # append new rows + r = np.rec.array([(456, "dbe", 1.2), (1, "ded", 1.3)], formats=formats) + table.append(r) + table.append([(457, "db1", 1.2), (5, "de1", 1.3)]) + + # Modify just rows with col1 < 456 + for row in table.iterrows(): + if row["col1"][0] < 456: + row["col1"] = 2 + row["col2"] = "ada" + row.update() + + # Create the modified recarray + r1 = np.rec.array( + [ + (456, "dbe", 1.2), + (2, "ada", 1.3), + (457, "db1", 1.2), + (2, "ada", 1.3), + ], + formats=formats, + names="col1,col2,col3", + ) + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, 4) + + def test08(self): + """Modifying a large table (Row.update)""" + + table = self.table + formats = table.description._v_nested_formats + + nrows = 100 + + # append new rows + row = table.row + for i in range(nrows): + row["col1"] = i - 1 + row["col2"] = "a" + str(i - 1) + row["col3"] = -1.0 + row.append() + table.flush() + + # Modify all the rows + for row in table.iterrows(): + row["col1"] = row.nrow + row["col2"] = "b" + str(row.nrow) + row["col3"] = 0.0 + row.update() + + # Create the modified recarray + r1 = np.rec.array( + None, shape=nrows, formats=formats, names="col1,col2,col3" + ) + for i in range(nrows): + r1["col1"][i] = i + r1["col2"][i] = "b" + str(i) + r1["col3"][i] = 0.0 + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, nrows) + + def test08b(self): + """Setting values on a large table without calling Row.update""" + + table = self.table + formats = table.description._v_nested_formats + + nrows = 100 + + # append new rows + row = table.row + for i in range(nrows): + row["col1"] = i - 1 + row["col2"] = "a" + str(i - 1) + row["col3"] = -1.0 + row.append() + table.flush() + + # Modify all the rows (actually don't) + for row in table.iterrows(): + row["col1"] = row.nrow + row["col2"] = "b" + str(row.nrow) + row["col3"] = 0.0 + # row.update() + + # Create the modified recarray + r1 = np.rec.array( + None, shape=nrows, formats=formats, names="col1,col2,col3" + ) + for i in range(nrows): + r1["col1"][i] = i - 1 + r1["col2"][i] = "a" + str(i - 1) + r1["col3"][i] = -1.0 + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, nrows) + + def test09(self): + """Modifying selected values on a large table""" + + table = self.table + formats = table.description._v_nested_formats + + nrows = 100 + + # append new rows + row = table.row + for i in range(nrows): + row["col1"] = i - 1 + row["col2"] = "a" + str(i - 1) + row["col3"] = -1.0 + row.append() + table.flush() + + # Modify selected rows + for row in table.iterrows(): + if row["col1"][0] > nrows - 3: + row["col1"] = row.nrow + row["col2"] = "b" + str(row.nrow) + row["col3"] = 0.0 + row.update() + + # Create the modified recarray + r1 = np.rec.array( + None, shape=nrows, formats=formats, names="col1,col2,col3" + ) + for i in range(nrows): + r1["col1"][i] = i - 1 + r1["col2"][i] = "a" + str(i - 1) + r1["col3"][i] = -1.0 + + # modify just the last line + r1["col1"][i] = i + r1["col2"][i] = "b" + str(i) + r1["col3"][i] = 0.0 + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, nrows) + + def test09b(self): + """Modifying selected values on a large table (alternate values)""" + + table = self.table + formats = table.description._v_nested_formats + + nrows = 100 + + # append new rows + row = table.row + for i in range(nrows): + row["col1"] = i - 1 + row["col2"] = "a" + str(i - 1) + row["col3"] = -1.0 + row.append() + table.flush() + + # Modify selected rows + for row in table.iterrows(step=10): + row["col1"] = row.nrow + row["col2"] = "b" + str(row.nrow) + row["col3"] = 0.0 + row.update() + + # Create the modified recarray + r1 = np.rec.array( + None, shape=nrows, formats=formats, names="col1,col2,col3" + ) + for i in range(nrows): + if i % 10 > 0: + r1["col1"][i] = i - 1 + r1["col2"][i] = "a" + str(i - 1) + r1["col3"][i] = -1.0 + else: + r1["col1"][i] = i + r1["col2"][i] = "b" + str(i) + r1["col3"][i] = 0.0 + + # Read the modified table + if self.reopen: + self._reopen() + table = self.h5file.root.recarray + table.nrowsinbuf = self.buffersize # set buffer value + r2 = table.read() + if common.verbose: + print("Original table-->", repr(r2)) + print("Should look like-->", repr(r1)) + self.assertEqual(r1.tobytes(), r2.tobytes()) + self.assertEqual(table.nrows, nrows) + + +class UpdateRowTestCase1(UpdateRowTestCase): + reopen = 0 + buffersize = 1 + + +class UpdateRowTestCase2(UpdateRowTestCase): + reopen = 1 + buffersize = 2 + + +class UpdateRowTestCase3(UpdateRowTestCase): + reopen = 0 + buffersize = 1000 + + +class UpdateRowTestCase4(UpdateRowTestCase): + reopen = 1 + buffersize = 1000 + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + # common.heavy = 1 # Uncomment this only for testing purposes + + for n in range(niter): + theSuite.addTest(common.make_suite(BasicWriteTestCase)) + theSuite.addTest(common.make_suite(DictWriteTestCase)) + theSuite.addTest(common.make_suite(RecordDTWriteTestCase)) + theSuite.addTest(common.make_suite(NumPyDTWriteTestCase)) + theSuite.addTest(common.make_suite(RecArrayOneWriteTestCase)) + theSuite.addTest(common.make_suite(RecArrayTwoWriteTestCase)) + theSuite.addTest(common.make_suite(RecArrayThreeWriteTestCase)) + theSuite.addTest(common.make_suite(RecArrayAlignedWriteTestCase)) + theSuite.addTest(common.make_suite(CompressZLIBTablesTestCase)) + theSuite.addTest(common.make_suite(CompressTwoTablesTestCase)) + theSuite.addTest(common.make_suite(IterRangeTestCase)) + theSuite.addTest(common.make_suite(RecArrayRangeTestCase)) + theSuite.addTest(common.make_suite(GetColRangeTestCase)) + theSuite.addTest(common.make_suite(DefaultValues)) + theSuite.addTest(common.make_suite(RecArrayIO)) + theSuite.addTest(common.make_suite(ShapeTestCase1)) + theSuite.addTest(common.make_suite(ShapeTestCase2)) + theSuite.addTest(common.make_suite(SetItemTestCase1)) + theSuite.addTest(common.make_suite(SetItemTestCase2)) + theSuite.addTest(common.make_suite(SetItemTestCase3)) + theSuite.addTest(common.make_suite(SetItemTestCase4)) + theSuite.addTest(common.make_suite(UpdateRowTestCase1)) + theSuite.addTest(common.make_suite(UpdateRowTestCase2)) + theSuite.addTest(common.make_suite(UpdateRowTestCase3)) + theSuite.addTest(common.make_suite(UpdateRowTestCase4)) + theSuite.addTest(common.make_suite(CompressBloscTablesTestCase)) + theSuite.addTest(common.make_suite(CompressLZOTablesTestCase)) + if common.heavy: + theSuite.addTest(common.make_suite(CompressBzip2TablesTestCase)) + theSuite.addTest(common.make_suite(BigTablesTestCase)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_timestamps.py b/venv/Lib/site-packages/tables/tests/test_timestamps.py new file mode 100644 index 0000000..696922b --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_timestamps.py @@ -0,0 +1,187 @@ +"""This test unit checks control of dataset timestamps with track_times.""" + +import sys +import time +import hashlib +from pathlib import Path + +import tables as tb +from tables.tests import common + +HEXDIGEST = "2aafb84ab739bb4ae61d2939dc010bfd" + + +class Record(tb.IsDescription): + var1 = tb.StringCol(itemsize=4) # 4-character String + var2 = tb.IntCol() # integer + var3 = tb.Int16Col() # short integer + + +class TrackTimesMixin: + def _add_datasets(self, group, j, track_times): + # Create a table + table = self.h5file.create_table( + group, + f"table{j}", + Record, + title=self.title, + filters=None, + track_times=track_times, + ) + # Get the record object associated with the new table + d = table.row + # Fill the table + for i in range(self.nrows): + d["var1"] = "%04d" % (self.nrows - i) + d["var2"] = i + d["var3"] = i * 2 + d.append() # This injects the Record values + # Flush the buffer for this table + table.flush() + + # Create a couple of arrays in each group + var1List = [x["var1"] for x in table.iterrows()] + var3List = [x["var3"] for x in table.iterrows()] + + self.h5file.create_array( + group, f"array{j}", var1List, f"col {j}", track_times=track_times + ) + + # Create CArrays as well + self.h5file.create_carray( + group, + name=f"carray{j}", + obj=var3List, + title=f"col {j + 2}", + track_times=track_times, + ) + + # Create EArrays as well + ea = self.h5file.create_earray( + group, + f"earray{j}", + tb.StringAtom(itemsize=4), + (0,), + f"col {j + 4}", + track_times=track_times, + ) + # And fill them with some values + ea.append(var1List) + + # Finally VLArrays too + vla = self.h5file.create_vlarray( + group, + f"vlarray{j}", + tb.Int16Atom(), + f"col {j + 6}", + track_times=track_times, + ) + # And fill them with some values + vla.append(var3List) + + +class TimestampTestCase( + TrackTimesMixin, common.TempFileMixin, common.PyTablesTestCase +): + title = "A title" + nrows = 10 + + def setUp(self): + super().setUp() + self.populateFile() + + def populateFile(self): + group = self.h5file.root + for j in range(4): + track_times = bool(j % 2) + self._add_datasets(group, j, track_times) + + def test00_checkTimestamps(self): + """Checking retrieval of timestamps""" + + for pattern in ( + "/table{}", + "/array{}", + "/carray{}", + "/earray{}", + "/vlarray{}", + ): + # Verify that: + # - if track_times was False, ctime is 0 + # - if track_times was True, ctime is not 0, + # and has either stayed the same or incremented + tracked_ctimes = [] + for j in range(4): + track_times = bool(j % 2) + node = pattern.format(j) + obj = self.h5file.get_node(node) + # Test property retrieval + self.assertEqual(obj.track_times, track_times) + timestamps = obj._get_obj_timestamps() + self.assertEqual(timestamps.atime, 0) + self.assertEqual(timestamps.mtime, 0) + self.assertEqual(timestamps.btime, 0) + if not track_times: + self.assertEqual(timestamps.ctime, 0) + else: + self.assertNotEqual(timestamps.ctime, 0) + tracked_ctimes.append(timestamps.ctime) + self.assertGreaterEqual(tracked_ctimes[1], tracked_ctimes[0]) + + +class BitForBitTestCase( + TrackTimesMixin, common.TempFileMixin, common.PyTablesTestCase +): + title = "A title" + nrows = 10 + + def repopulateFile(self, track_times): + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, mode="w") + group = self.h5file.root + self._add_datasets(group, 1, track_times) + self.h5file.close() + + def test00_checkReproducibility(self): + """Checking bit-for-bit reproducibility with no track_times""" + + self.repopulateFile(track_times=False) + hexdigest_wo_track_1 = self._get_digest(self.h5fname) + self.repopulateFile(track_times=True) + hexdigest_w_track_1 = self._get_digest(self.h5fname) + time.sleep(1) + self.repopulateFile(track_times=True) + hexdigest_w_track_2 = self._get_digest(self.h5fname) + self.repopulateFile(track_times=False) + hexdigest_wo_track_2 = self._get_digest(self.h5fname) + self.assertEqual(HEXDIGEST, hexdigest_wo_track_1) + self.assertEqual(hexdigest_wo_track_1, hexdigest_wo_track_2) + self.assertNotEqual(hexdigest_wo_track_1, hexdigest_w_track_1) + self.assertNotEqual(hexdigest_w_track_1, hexdigest_w_track_2) + + def _get_digest(self, filename): + md5 = hashlib.md5() + for data in Path(filename).read_bytes(): + md5.update(data) + + hexdigest = md5.hexdigest() + + return hexdigest + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + # common.heavy = 1 # Uncomment this only for testing purposes! + + for i in range(niter): + theSuite.addTest(common.make_suite(TimestampTestCase)) + theSuite.addTest(common.make_suite(BitForBitTestCase)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_timetype.py b/venv/Lib/site-packages/tables/tests/test_timetype.py new file mode 100644 index 0000000..a50d991 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_timetype.py @@ -0,0 +1,565 @@ +"""Unit test for the Time datatypes.""" + +import numpy as np + +import tables as tb +from tables.tests import common + + +class LeafCreationTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Tests creating Tables, VLArrays an EArrays with Time data.""" + + def test00_UnidimLeaves(self): + """Creating new nodes with unidimensional time elements.""" + + # Table creation. + class MyTimeRow(tb.IsDescription): + intcol = tb.IntCol() + t32col = tb.Time32Col() + t64col = tb.Time64Col() + + self.h5file.create_table("/", "table", MyTimeRow) + + # VLArray creation. + self.h5file.create_vlarray("/", "vlarray4", tb.Time32Atom()) + self.h5file.create_vlarray("/", "vlarray8", tb.Time64Atom()) + + # EArray creation. + self.h5file.create_earray("/", "earray4", tb.Time32Atom(), shape=(0,)) + self.h5file.create_earray("/", "earray8", tb.Time64Atom(), shape=(0,)) + + def test01_MultidimLeaves(self): + """Creating new nodes with multidimensional time elements.""" + + # Table creation. + class MyTimeRow(tb.IsDescription): + intcol = tb.IntCol(shape=(2, 1)) + t32col = tb.Time32Col(shape=(2, 1)) + t64col = tb.Time64Col(shape=(2, 1)) + + self.h5file.create_table("/", "table", MyTimeRow) + + # VLArray creation. + self.h5file.create_vlarray( + "/", "vlarray4", tb.Time32Atom(shape=(2, 1)) + ) + self.h5file.create_vlarray( + "/", "vlarray8", tb.Time64Atom(shape=(2, 1)) + ) + + # EArray creation. + self.h5file.create_earray( + "/", "earray4", tb.Time32Atom(), shape=(0, 2, 1) + ) + self.h5file.create_earray( + "/", "earray8", tb.Time64Atom(), shape=(0, 2, 1) + ) + + +class OpenTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Tests opening a file with Time nodes.""" + + # The description used in the test Table. + class MyTimeRow(tb.IsDescription): + t32col = tb.Time32Col(shape=(2, 1)) + t64col = tb.Time64Col(shape=(2, 1)) + + # The atoms used in the test VLArrays. + myTime32Atom = tb.Time32Atom(shape=(2, 1)) + myTime64Atom = tb.Time64Atom(shape=(2, 1)) + + def setUp(self): + super().setUp() + + # Create test Table. + self.h5file.create_table("/", "table", self.MyTimeRow) + + # Create test VLArrays. + self.h5file.create_vlarray("/", "vlarray4", self.myTime32Atom) + self.h5file.create_vlarray("/", "vlarray8", self.myTime64Atom) + + self._reopen() + + def test00_OpenFile(self): + """Opening a file with Time nodes.""" + + # Test the Table node. + tbl = self.h5file.root.table + self.assertEqual( + tbl.coldtypes["t32col"], + self.MyTimeRow.columns["t32col"].dtype, + "Column dtypes do not match.", + ) + self.assertEqual( + tbl.coldtypes["t64col"], + self.MyTimeRow.columns["t64col"].dtype, + "Column dtypes do not match.", + ) + + # Test the VLArray nodes. + vla4 = self.h5file.root.vlarray4 + self.assertEqual( + vla4.atom.dtype, + self.myTime32Atom.dtype, + "Atom types do not match.", + ) + self.assertEqual( + vla4.atom.shape, + self.myTime32Atom.shape, + "Atom shapes do not match.", + ) + + vla8 = self.h5file.root.vlarray8 + self.assertEqual( + vla8.atom.dtype, + self.myTime64Atom.dtype, + "Atom types do not match.", + ) + self.assertEqual( + vla8.atom.shape, + self.myTime64Atom.shape, + "Atom shapes do not match.", + ) + + def test01_OpenFileStype(self): + """Opening a file with Time nodes, comparing Atom.stype.""" + + # Test the Table node. + tbl = self.h5file.root.table + self.assertEqual( + tbl.coltypes["t32col"], + self.MyTimeRow.columns["t32col"].type, + "Column types do not match.", + ) + self.assertEqual( + tbl.coltypes["t64col"], + self.MyTimeRow.columns["t64col"].type, + "Column types do not match.", + ) + + # Test the VLArray nodes. + vla4 = self.h5file.root.vlarray4 + self.assertEqual( + vla4.atom.type, self.myTime32Atom.type, "Atom types do not match." + ) + + vla8 = self.h5file.root.vlarray8 + self.assertEqual( + vla8.atom.type, self.myTime64Atom.type, "Atom types do not match." + ) + + +class CompareTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Tests whether stored and retrieved time data is kept the same.""" + + # The description used in the test Table. + class MyTimeRow(tb.IsDescription): + t32col = tb.Time32Col(pos=0) + t64col = tb.Time64Col(shape=(2,), pos=1) + + # The atoms used in the test VLArrays. + myTime32Atom = tb.Time32Atom(shape=(2,)) + myTime64Atom = tb.Time64Atom(shape=(2,)) + + def test00_Compare32VLArray(self): + """Comparing written 32-bit time data with read data in a VLArray.""" + + wtime = np.array((1_234_567_890,) * 2, np.int32) + + # Create test VLArray with data. + vla = self.h5file.create_vlarray("/", "test", self.myTime32Atom) + vla.append(wtime) + self._reopen() + + # Check the written data. + rtime = self.h5file.root.test.read()[0][0] + self.h5file.close() + self.assertTrue( + common.allequal(rtime, wtime), + "Stored and retrieved values do not match.", + ) + + def test01_Compare64VLArray(self): + """Comparing written 64-bit time data with read data in a VLArray.""" + + wtime = np.array((1_234_567_890.123456,) * 2, np.float64) + + # Create test VLArray with data. + vla = self.h5file.create_vlarray("/", "test", self.myTime64Atom) + vla.append(wtime) + self._reopen() + + # Check the written data. + rtime = self.h5file.root.test.read()[0][0] + self.h5file.close() + self.assertTrue( + common.allequal(rtime, wtime), + "Stored and retrieved values do not match.", + ) + + def test01b_Compare64VLArray(self): + """Comparing several written and read 64-bit time values in a + VLArray.""" + + # Create test VLArray with data. + vla = self.h5file.create_vlarray("/", "test", self.myTime64Atom) + + # Size of the test. + nrows = vla.nrowsinbuf + 34 # Add some more rows than buffer. + # Only for home checks; the value above should check better + # the I/O with multiple buffers. + # nrows = 10 + + for i in range(nrows): + j = i * 2 + vla.append((j + 0.012, j + 1 + 0.012)) + self._reopen() + + # Check the written data. + arr = self.h5file.root.test.read() + self.h5file.close() + + arr = np.array(arr) + orig_val = np.arange(0, nrows * 2, dtype=np.int32) + 0.012 + orig_val.shape = (nrows, 1, 2) + if common.verbose: + print("Original values:", orig_val) + print("Retrieved values:", arr) + self.assertTrue( + common.allequal(arr, orig_val), + "Stored and retrieved values do not match.", + ) + + def test02_CompareTable(self): + """Comparing written time data with read data in a Table.""" + + wtime = 1_234_567_890.123456 + + # Create test Table with data. + tbl = self.h5file.create_table("/", "test", self.MyTimeRow) + row = tbl.row + row["t32col"] = int(wtime) + row["t64col"] = (wtime, wtime) + row.append() + self._reopen() + + # Check the written data. + recarr = self.h5file.root.test.read(0) + self.h5file.close() + + self.assertEqual( + recarr["t32col"][0], + int(wtime), + "Stored and retrieved values do not match.", + ) + + comp = recarr["t64col"][0] == np.array((wtime, wtime)) + self.assertTrue( + np.all(comp), "Stored and retrieved values do not match." + ) + + def test02b_CompareTable(self): + """Comparing several written and read time values in a Table.""" + + # Create test Table with data. + tbl = self.h5file.create_table("/", "test", self.MyTimeRow) + + # Size of the test. + nrows = tbl.nrowsinbuf + 34 # Add some more rows than buffer. + # Only for home checks; the value above should check better + # the I/O with multiple buffers. + # nrows = 10 + + row = tbl.row + for i in range(nrows): + row["t32col"] = i + j = i * 2 + row["t64col"] = (j + 0.012, j + 1 + 0.012) + row.append() + + self._reopen() + + # Check the written data. + recarr = self.h5file.root.test.read() + self.h5file.close() + + # Time32 column. + orig_val = np.arange(nrows, dtype=np.int32) + if common.verbose: + print("Original values:", orig_val) + print("Retrieved values:", recarr["t32col"][:]) + self.assertTrue( + np.all(recarr["t32col"][:] == orig_val), + "Stored and retrieved values do not match.", + ) + + # Time64 column. + orig_val = np.arange(0, nrows * 2, dtype=np.int32) + 0.012 + orig_val.shape = (nrows, 2) + if common.verbose: + print("Original values:", orig_val) + print("Retrieved values:", recarr["t64col"][:]) + self.assertTrue( + common.allequal(recarr["t64col"][:], orig_val, np.float64), + "Stored and retrieved values do not match.", + ) + + def test03_Compare64EArray(self): + """Comparing written 64-bit time data with read data in an EArray.""" + + wtime = 1_234_567_890.123456 + + # Create test EArray with data. + ea = self.h5file.create_earray( + "/", "test", tb.Time64Atom(), shape=(0,) + ) + ea.append((wtime,)) + self._reopen() + + # Check the written data. + rtime = self.h5file.root.test[0] + self.h5file.close() + self.assertTrue( + common.allequal(rtime, wtime), + "Stored and retrieved values do not match.", + ) + + def test03b_Compare64EArray(self): + """Comparing several written and read 64-bit time values in an + EArray.""" + + # Create test EArray with data. + ea = self.h5file.create_earray( + "/", "test", tb.Time64Atom(), shape=(0, 2) + ) + + # Size of the test. + nrows = ea.nrowsinbuf + 34 # Add some more rows than buffer. + # Only for home checks; the value above should check better + # the I/O with multiple buffers. + # nrows = 10 + + for i in range(nrows): + j = i * 2 + ea.append(((j + 0.012, j + 1 + 0.012),)) + self._reopen() + + # Check the written data. + arr = self.h5file.root.test.read() + self.h5file.close() + + orig_val = np.arange(0, nrows * 2, dtype=np.int32) + 0.012 + orig_val.shape = (nrows, 2) + if common.verbose: + print("Original values:", orig_val) + print("Retrieved values:", arr) + self.assertTrue( + common.allequal(arr, orig_val), + "Stored and retrieved values do not match.", + ) + + +class UnalignedTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Tests writing and reading unaligned time values in a table.""" + + # The description used in the test Table. + # Time fields are unaligned because of 'i8col'. + class MyTimeRow(tb.IsDescription): + i8col = tb.Int8Col(pos=0) + t32col = tb.Time32Col(pos=1) + t64col = tb.Time64Col(shape=(2,), pos=2) + + def test00_CompareTable(self): + """Comparing written unaligned time data with read data in a Table.""" + + # Create test Table with data. + tbl = self.h5file.create_table("/", "test", self.MyTimeRow) + + # Size of the test. + nrows = tbl.nrowsinbuf + 34 # Add some more rows than buffer. + # Only for home checks; the value above should check better + # the I/O with multiple buffers. + # nrows = 10 + + row = tbl.row + for i in range(nrows): + row["i8col"] = np.array(i).astype("i1") + row["t32col"] = i + j = i * 2 + row["t64col"] = (j + 0.012, j + 1 + 0.012) + row.append() + + self._reopen() + + # Check the written data. + recarr = self.h5file.root.test.read() + self.h5file.close() + + # Int8 column. + orig_val = np.arange(nrows, dtype=np.int8) + if common.verbose: + print("Original values:", orig_val) + print("Retrieved values:", recarr["i8col"][:]) + self.assertTrue( + np.all(recarr["i8col"][:] == orig_val), + "Stored and retrieved values do not match.", + ) + + # Time32 column. + orig_val = np.arange(nrows, dtype=np.int32) + if common.verbose: + print("Original values:", orig_val) + print("Retrieved values:", recarr["t32col"][:]) + self.assertTrue( + np.all(recarr["t32col"][:] == orig_val), + "Stored and retrieved values do not match.", + ) + + # Time64 column. + orig_val = np.arange(0, nrows * 2, dtype=np.int32) + 0.012 + orig_val.shape = (nrows, 2) + if common.verbose: + print("Original values:", orig_val) + print("Retrieved values:", recarr["t64col"][:]) + self.assertTrue( + common.allequal(recarr["t64col"][:], orig_val, np.float64), + "Stored and retrieved values do not match.", + ) + + +class BigEndianTestCase(common.PyTablesTestCase): + """Tests for reading big-endian time values in arrays and nested tables.""" + + def setUp(self): + super().setUp() + filename = common.test_filename("times-nested-be.h5") + self.h5file = tb.open_file(filename, "r") + + def tearDown(self): + self.h5file.close() + super().tearDown() + + def test00a_Read32Array(self): + """Checking Time32 type in arrays.""" + + # Check the written data. + earr = self.h5file.root.earr32[:] + + # Generate the expected Time32 array. + start = 1_178_896_298 + nrows = 10 + orig_val = np.arange(start, start + nrows, dtype=np.int32) + + if common.verbose: + print("Retrieved values:", earr) + print("Should look like:", orig_val) + self.assertTrue( + np.all(earr == orig_val), + "Retrieved values do not match the expected values.", + ) + + def test00b_Read64Array(self): + """Checking Time64 type in arrays.""" + + # Check the written data. + earr = self.h5file.root.earr64[:] + + # Generate the expected Time64 array. + start = 1_178_896_298.832258 + nrows = 10 + orig_val = np.arange(start, start + nrows, dtype=np.float64) + + if common.verbose: + print("Retrieved values:", earr) + print("Should look like:", orig_val) + self.assertTrue( + np.allclose(earr, orig_val, rtol=1.0e-15), + "Retrieved values do not match the expected values.", + ) + + def test01a_ReadPlainColumn(self): + """Checking Time32 type in plain columns.""" + + # Check the written data. + tbl = self.h5file.root.tbl + t32 = tbl.cols.t32[:] + + # Generate the expected Time32 array. + start = 1_178_896_298 + nrows = 10 + orig_val = np.arange(start, start + nrows, dtype=np.int32) + + if common.verbose: + print("Retrieved values:", t32) + print("Should look like:", orig_val) + self.assertTrue( + np.all(t32 == orig_val), + "Retrieved values do not match the expected values.", + ) + + def test01b_ReadNestedColumn(self): + """Checking Time64 type in nested columns.""" + + # Check the written data. + tbl = self.h5file.root.tbl + t64 = tbl.cols.nested.t64[:] + + # Generate the expected Time64 array. + start = 1_178_896_298.832258 + nrows = 10 + orig_val = np.arange(start, start + nrows, dtype=np.float64) + + if common.verbose: + print("Retrieved values:", t64) + print("Should look like:", orig_val) + self.assertTrue( + np.allclose(t64, orig_val, rtol=1.0e-15), + "Retrieved values do not match the expected values.", + ) + + def test02_ReadNestedColumnTwice(self): + """Checking Time64 type in nested columns (read twice).""" + + # Check the written data. + tbl = self.h5file.root.tbl + dummy = tbl.cols.nested.t64[:] + self.assertIsNotNone(dummy) + t64 = tbl.cols.nested.t64[:] + + # Generate the expected Time64 array. + start = 1_178_896_298.832258 + nrows = 10 + orig_val = np.arange(start, start + nrows, dtype=np.float64) + + if common.verbose: + print("Retrieved values:", t64) + print("Should look like:", orig_val) + self.assertTrue( + np.allclose(t64, orig_val, rtol=1.0e-15), + "Retrieved values do not match the expected values.", + ) + + +def suite(): + """suite() -> test suite + + Returns a test suite consisting of all the test cases in the module. + """ + + theSuite = common.unittest.TestSuite() + + theSuite.addTest(common.make_suite(LeafCreationTestCase)) + theSuite.addTest(common.make_suite(OpenTestCase)) + theSuite.addTest(common.make_suite(CompareTestCase)) + theSuite.addTest(common.make_suite(UnalignedTestCase)) + theSuite.addTest(common.make_suite(BigEndianTestCase)) + + return theSuite + + +if __name__ == "__main__": + import sys + + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_tree.py b/venv/Lib/site-packages/tables/tests/test_tree.py new file mode 100644 index 0000000..7578b82 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_tree.py @@ -0,0 +1,1241 @@ +import sys +import tempfile +import warnings +from time import perf_counter as clock +from pathlib import Path + +import tables as tb +from tables.tests import common + + +# Test Record class +class Record(tb.IsDescription): + var1 = tb.StringCol(itemsize=4) # 4-character String + var2 = tb.IntCol() # integer + var3 = tb.Int16Col() # short integer + var4 = tb.FloatCol() # double (double-precision) + var5 = tb.Float32Col() # float (single-precision) + + +class TreeTestCase(common.TempFileMixin, common.PyTablesTestCase): + open_mode = "w" + title = "This is the table title" + expectedrows = 10 + appendrows = 5 + + def setUp(self): + super().setUp() + + # Create an instance of HDF5 Table + self.populateFile() + self.h5file.close() + + def populateFile(self): + group = self.h5file.root + maxshort = 1 << 15 + # maxint = 2147483647 # (2 ** 31 - 1) + for j in range(3): + # Create a table + table = self.h5file.create_table( + group, + "table" + str(j), + Record, + title=self.title, + filters=None, + expectedrows=self.expectedrows, + ) + # Get the record object associated with the new table + d = table.row + # Fill the table + for i in range(self.expectedrows): + d["var1"] = "%04d" % (self.expectedrows - i) + d["var2"] = i + d["var3"] = i % maxshort + d["var4"] = float(i) + d["var5"] = float(i) + d.append() # This injects the Record values + # Flush the buffer for this table + table.flush() + + # Create a couple of arrays in each group + var1List = [x["var1"] for x in table.iterrows()] + var4List = [x["var4"] for x in table.iterrows()] + + self.h5file.create_array(group, "var1", var1List, "1") + self.h5file.create_array(group, "var4", var4List, "4") + + # Create a new group (descendant of group) + group2 = self.h5file.create_group(group, "group" + str(j)) + # Iterate over this new group (group2) + group = group2 + + def test00_getNode(self): + """Checking the File.get_node() with string node names""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00_getNode..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + nodelist = ["/", "/table0", "/group0/var1", "/group0/group1/var4"] + nodenames = [] + for node in nodelist: + object = self.h5file.get_node(node) + nodenames.append(object._v_pathname) + + self.assertEqual(nodenames, nodelist) + if common.verbose: + print("get_node(pathname) test passed") + nodegroups = [ + "/", + "/group0", + "/group0/group1", + "/group0/group1/group2", + ] + nodenames = ["var1", "var4"] + nodepaths = [] + for group in nodegroups: + for name in nodenames: + try: + object = self.h5file.get_node(group, name) + except LookupError: + pass + else: + nodepaths.append(object._v_pathname) + + self.assertEqual( + nodepaths, + [ + "/var1", + "/var4", + "/group0/var1", + "/group0/var4", + "/group0/group1/var1", + "/group0/group1/var4", + ], + ) + + if common.verbose: + print("get_node(groupname, name) test passed") + nodelist = [ + "/", + "/group0", + "/group0/group1", + "/group0/group1/group2", + "/table0", + ] + nodenames = [] + groupobjects = [] + # warnings.filterwarnings("error", category=UserWarning) + for node in nodelist: + try: + object = self.h5file.get_node(node, classname="Group") + except LookupError: + if common.verbose: + type, value, traceback = sys.exc_info() + print("\nGreat!, the next LookupError was catched!") + print(value) + else: + nodenames.append(object._v_pathname) + groupobjects.append(object) + + self.assertEqual( + nodenames, + ["/", "/group0", "/group0/group1", "/group0/group1/group2"], + ) + if common.verbose: + print("get_node(groupname, classname='Group') test passed") + + # Reset the warning + # warnings.filterwarnings("default", category=UserWarning) + + nodenames = ["var1", "var4"] + nodearrays = [] + for group in groupobjects: + for name in nodenames: + try: + object = self.h5file.get_node(group, name, "Array") + except Exception: + pass + else: + nodearrays.append(object._v_pathname) + + self.assertEqual( + nodearrays, + [ + "/var1", + "/var4", + "/group0/var1", + "/group0/var4", + "/group0/group1/var1", + "/group0/group1/var4", + ], + ) + if common.verbose: + print("get_node(groupobject, name, classname='Array') test passed") + + def test01_getNodeClass(self): + """Checking the File.get_node() with instances""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01_getNodeClass..." % self.__class__.__name__ + ) + + self.h5file = tb.open_file(self.h5fname, "r") + + # This tree ways of get_node usage should return a table instance + table = self.h5file.get_node("/group0/table1") + self.assertIsInstance(table, tb.Table) + table = self.h5file.get_node("/group0", "table1") + self.assertIsInstance(table, tb.Table) + table = self.h5file.get_node(self.h5file.root.group0, "table1") + self.assertIsInstance(table, tb.Table) + + # This should return an array instance + arr = self.h5file.get_node("/group0/var1") + self.assertIsInstance(arr, tb.Array) + self.assertIsInstance(arr, tb.Leaf) + + # And this a Group + group = self.h5file.get_node("/group0", "group1", "Group") + self.assertIsInstance(group, tb.Group) + + def test02_listNodes(self): + """Checking the File.list_nodes() method""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_listNodes..." % self.__class__.__name__) + + # Made the warnings to raise an error + # warnings.filterwarnings("error", category=UserWarning) + self.h5file = tb.open_file(self.h5fname, "r") + + self.assertRaises( + TypeError, self.h5file.list_nodes, "/", "NoSuchClass" + ) + + nodelist = [ + "/", + "/group0", + "/group0/table1", + "/group0/group1/group2", + "/var1", + ] + nodenames = [] + objects = [] + for node in nodelist: + try: + objectlist = self.h5file.list_nodes(node) + except Exception: + pass + else: + objects.extend(objectlist) + for object in objectlist: + nodenames.append(object._v_pathname) + + self.assertEqual( + nodenames, + [ + "/group0", + "/table0", + "/var1", + "/var4", + "/group0/group1", + "/group0/table1", + "/group0/var1", + "/group0/var4", + ], + ) + if common.verbose: + print("list_nodes(pathname) test passed") + + nodenames = [] + for node in objects: + try: + objectlist = self.h5file.list_nodes(node) + except Exception: + pass + else: + for object in objectlist: + nodenames.append(object._v_pathname) + + self.assertEqual( + nodenames, + [ + "/group0/group1", + "/group0/table1", + "/group0/var1", + "/group0/var4", + "/group0/group1/group2", + "/group0/group1/table2", + "/group0/group1/var1", + "/group0/group1/var4", + ], + ) + + if common.verbose: + print("list_nodes(groupobject) test passed") + + nodenames = [] + for node in objects: + try: + objectlist = self.h5file.list_nodes(node, "Leaf") + except TypeError: + if common.verbose: + type, value, traceback = sys.exc_info() + print("\nGreat!, the next TypeError was catched!") + print(value) + else: + for object in objectlist: + nodenames.append(object._v_pathname) + + self.assertEqual( + nodenames, + [ + "/group0/table1", + "/group0/var1", + "/group0/var4", + "/group0/group1/table2", + "/group0/group1/var1", + "/group0/group1/var4", + ], + ) + + if common.verbose: + print("list_nodes(groupobject, classname = 'Leaf') test passed") + + nodenames = [] + for node in objects: + try: + objectlist = self.h5file.list_nodes(node, "Table") + except TypeError: + if common.verbose: + type, value, traceback = sys.exc_info() + print("\nGreat!, the next TypeError was catched!") + print(value) + else: + for object in objectlist: + nodenames.append(object._v_pathname) + + self.assertEqual( + nodenames, ["/group0/table1", "/group0/group1/table2"] + ) + + if common.verbose: + print("list_nodes(groupobject, classname = 'Table') test passed") + + # Reset the warning + # warnings.filterwarnings("default", category=UserWarning) + + def test02b_iterNodes(self): + """Checking the File.iter_nodes() method""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02b_iterNodes..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + + self.assertRaises( + TypeError, self.h5file.list_nodes, "/", "NoSuchClass" + ) + + nodelist = [ + "/", + "/group0", + "/group0/table1", + "/group0/group1/group2", + "/var1", + ] + nodenames = [] + objects = [] + for node in nodelist: + try: + objectlist = [o for o in self.h5file.iter_nodes(node)] + except Exception: + pass + else: + objects.extend(objectlist) + for object in objectlist: + nodenames.append(object._v_pathname) + + self.assertEqual( + nodenames, + [ + "/group0", + "/table0", + "/var1", + "/var4", + "/group0/group1", + "/group0/table1", + "/group0/var1", + "/group0/var4", + ], + ) + if common.verbose: + print("iter_nodes(pathname) test passed") + + nodenames = [] + for node in objects: + try: + objectlist = [o for o in self.h5file.iter_nodes(node)] + except Exception: + pass + else: + for object in objectlist: + nodenames.append(object._v_pathname) + + self.assertEqual( + nodenames, + [ + "/group0/group1", + "/group0/table1", + "/group0/var1", + "/group0/var4", + "/group0/group1/group2", + "/group0/group1/table2", + "/group0/group1/var1", + "/group0/group1/var4", + ], + ) + + if common.verbose: + print("iter_nodes(groupobject) test passed") + + nodenames = [] + for node in objects: + try: + objectlist = [o for o in self.h5file.iter_nodes(node, "Leaf")] + except TypeError: + if common.verbose: + type, value, traceback = sys.exc_info() + print("\nGreat!, the next TypeError was catched!") + print(value) + else: + for object in objectlist: + nodenames.append(object._v_pathname) + + self.assertEqual( + nodenames, + [ + "/group0/table1", + "/group0/var1", + "/group0/var4", + "/group0/group1/table2", + "/group0/group1/var1", + "/group0/group1/var4", + ], + ) + + if common.verbose: + print("iter_nodes(groupobject, classname = 'Leaf') test passed") + + nodenames = [] + for node in objects: + try: + objectlist = [o for o in self.h5file.iter_nodes(node, "Table")] + except TypeError: + if common.verbose: + type, value, traceback = sys.exc_info() + print("\nGreat!, the next TypeError was catched!") + print(value) + else: + for object in objectlist: + nodenames.append(object._v_pathname) + + self.assertEqual( + nodenames, ["/group0/table1", "/group0/group1/table2"] + ) + + if common.verbose: + print("iter_nodes(groupobject, classname = 'Table') test passed") + + # Reset the warning + # warnings.filterwarnings("default", category=UserWarning) + + def test03_TraverseTree(self): + """Checking the File.walk_groups() method""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test03_TraverseTree..." % self.__class__.__name__ + ) + + self.h5file = tb.open_file(self.h5fname, "r") + groups = [] + tables_ = [] + arrays = [] + for group in self.h5file.walk_groups(): + groups.append(group._v_pathname) + for table in self.h5file.list_nodes(group, "Table"): + tables_.append(table._v_pathname) + for arr in self.h5file.list_nodes(group, "Array"): + arrays.append(arr._v_pathname) + + self.assertEqual( + groups, ["/", "/group0", "/group0/group1", "/group0/group1/group2"] + ) + + self.assertEqual( + tables_, ["/table0", "/group0/table1", "/group0/group1/table2"] + ) + + self.assertEqual( + arrays, + [ + "/var1", + "/var4", + "/group0/var1", + "/group0/var4", + "/group0/group1/var1", + "/group0/group1/var4", + ], + ) + if common.verbose: + print("walk_groups() test passed") + + groups = [] + tables_ = [] + arrays = [] + for group in self.h5file.walk_groups("/group0/group1"): + groups.append(group._v_pathname) + for table in self.h5file.list_nodes(group, "Table"): + tables_.append(table._v_pathname) + for arr in self.h5file.list_nodes(group, "Array"): + arrays.append(arr._v_pathname) + + self.assertEqual(groups, ["/group0/group1", "/group0/group1/group2"]) + + self.assertEqual(tables_, ["/group0/group1/table2"]) + + self.assertEqual( + arrays, ["/group0/group1/var1", "/group0/group1/var4"] + ) + + if common.verbose: + print("walk_groups(pathname) test passed") + + def test04_walkNodes(self): + """Checking File.walk_nodes""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_walkNodes..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + + self.assertRaises( + TypeError, next, self.h5file.walk_nodes("/", "NoSuchClass") + ) + + groups = [] + tables1 = [] + tables2 = [] + arrays = [] + for group in self.h5file.walk_nodes(classname="Group"): + groups.append(group._v_pathname) + for table in group._f_iter_nodes(classname="Table"): + tables1.append(table._v_pathname) + + # Test the recursivity + for table in self.h5file.root._f_walknodes("Table"): + tables2.append(table._v_pathname) + + for arr in self.h5file.walk_nodes(classname="Array"): + arrays.append(arr._v_pathname) + + self.assertEqual( + groups, ["/", "/group0", "/group0/group1", "/group0/group1/group2"] + ) + self.assertEqual( + tables1, ["/table0", "/group0/table1", "/group0/group1/table2"] + ) + self.assertEqual( + tables2, ["/table0", "/group0/table1", "/group0/group1/table2"] + ) + self.assertEqual( + arrays, + [ + "/var1", + "/var4", + "/group0/var1", + "/group0/var4", + "/group0/group1/var1", + "/group0/group1/var4", + ], + ) + + if common.verbose: + print("File.__iter__() and Group.__iter__ test passed") + + groups = [] + tables_ = [] + arrays = [] + for group in self.h5file.walk_nodes( + "/group0/group1", classname="Group" + ): + groups.append(group._v_pathname) + for table in group._f_walknodes("Table"): + tables_.append(table._v_pathname) + for arr in self.h5file.walk_nodes(group, "Array"): + arrays.append(arr._v_pathname) + + self.assertEqual(groups, ["/group0/group1", "/group0/group1/group2"]) + + self.assertEqual(tables_, ["/group0/group1/table2"]) + + self.assertEqual( + arrays, ["/group0/group1/var1", "/group0/group1/var4"] + ) + + if common.verbose: + print("walk_nodes(pathname, classname) test passed") + + def test05_dir(self): + """Checking Group.__dir__""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05_dir..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + + """ + h5file nodes: + '/table0', '/var1', '/var4' + '/group0/table1', '/group0/var1', '/group0/var4', + '/group0/group1/table2', '/group0/group1/var1', '/group0/group1/var4' + """ + root_dir = dir(self.h5file.root) + + # Check some regular attributes. + + self.assertIn("_v_children", root_dir) + self.assertIn("_v_attrs", root_dir) + self.assertIn("_v_groups", root_dir) + self.assertIn("_g_get_child_group_class", root_dir) + self.assertIn("_g_get_child_group_class", root_dir) + self.assertIn("_f_close", root_dir) + + # Check children nodes. + + self.assertIn("group0", root_dir) + self.assertIn("table0", root_dir) + self.assertIn("var1", root_dir) + self.assertNotIn("table1", root_dir) + self.assertNotIn("table2", root_dir) + self.assertSequenceEqual( + sorted(set(root_dir)), sorted(root_dir) + ) # Check for no duplicates. + + root_group0_dir = dir(self.h5file.root.group0) + self.assertIn("group1", root_group0_dir) + self.assertIn("table1", root_group0_dir) + self.assertNotIn("table0", root_group0_dir) + self.assertNotIn("table2", root_group0_dir) + self.assertSequenceEqual( + sorted(set(root_group0_dir)), sorted(root_group0_dir) + ) + + root_group0_group1_dir = dir(self.h5file.root.group0.group1) + self.assertIn("group2", root_group0_group1_dir) + self.assertIn("table2", root_group0_group1_dir) + self.assertNotIn("table0", root_group0_group1_dir) + self.assertNotIn("table1", root_group0_group1_dir) + self.assertNotIn("group0", root_group0_group1_dir) + self.assertNotIn("group1", root_group0_group1_dir) + self.assertSequenceEqual( + sorted(set(root_group0_group1_dir)), sorted(root_group0_group1_dir) + ) + + if common.verbose: + print("Group.__dir__ test passed") + + def test06_v_groups(self): + """Checking Group._v_groups""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06_v_groups..." % self.__class__.__name__) + + self.h5file = tb.open_file(self.h5fname, "r") + + """ + h5file nodes: + '/table0', '/var1', '/var4' + '/group0/table1', '/group0/var1', '/group0/var4', + '/group0/group1/table2', '/group0/group1/var1', '/group0/group1/var4' + """ + self.assertIsInstance(self.h5file.root._v_groups, dict) + group_names = {"group0"} + names = {k for k, v in self.h5file.root._v_groups.iteritems()} + self.assertEqual(group_names, names) + groups = list(self.h5file.root._v_groups.itervalues()) + self.assertEqual(len(groups), len(group_names)) + + for group in groups: + with self.subTest(name=group._v_name): + self.assertIn(group._v_name, group_names) + + if common.verbose: + print("Group._v_groups test passed") + + +class DeepTreeTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Checks for deep hierarchy levels in PyTables trees.""" + + def setUp(self): + super().setUp() + + # Here we put a more conservative limit to deal with more platforms + # With maxdepth = 64 this test would take less than 40 MB + # of main memory to run, which is quite reasonable nowadays. + # With maxdepth = 1024 this test will take around 300 MB. + if common.heavy: + self.maxdepth = 256 # Takes around 60 MB of memory! + else: + self.maxdepth = 64 # This should be safe for most machines + if common.verbose: + print("Maximum depth tested :", self.maxdepth) + + # Open a new empty HDF5 file + group = self.h5file.root + if common.verbose: + print("Depth writing progress: ", end=" ") + + # Iterate until maxdepth + for depth in range(self.maxdepth): + # Save it on the HDF5 file + if common.verbose: + print("%3d," % (depth), end=" ") + # Create a couple of arrays here + self.h5file.create_array( + group, "array", [1, 1], "depth: %d" % depth + ) + self.h5file.create_array( + group, "array2", [1, 1], "depth: %d" % depth + ) + # And also a group + self.h5file.create_group(group, "group2_" + str(depth)) + # Finally, iterate over a new group + group = self.h5file.create_group(group, "group" + str(depth)) + + # Close the file + self.h5file.close() + + def _check_tree(self, filename): + # Open the previous HDF5 file in read-only mode + + with tb.open_file(filename, mode="r") as h5file: + group = h5file.root + if common.verbose: + print("\nDepth reading progress: ", end=" ") + + # Get the metadata on the previosly saved arrays + for depth in range(self.maxdepth): + if common.verbose: + print("%3d," % (depth), end=" ") + + # Check the contents + self.assertEqual(group.array[:], [1, 1]) + self.assertIn("array2", group) + self.assertIn("group2_" + str(depth), group) + + # Iterate over the next group + group = h5file.get_node(group, "group" + str(depth)) + + if common.verbose: + print() # This flush the stdout buffer + + def test00_deepTree(self): + """Creation of a large depth object tree.""" + + self._check_tree(self.h5fname) + + def test01a_copyDeepTree(self): + """Copy of a large depth object tree.""" + + self.h5file = tb.open_file(self.h5fname, mode="r") + h5fname2 = tempfile.mktemp(".h5") + try: + with tb.open_file(h5fname2, mode="w") as h5file2: + if common.verbose: + print("\nCopying deep tree...") + + self.h5file.copy_node( + self.h5file.root, h5file2.root, recursive=True + ) + self.h5file.close() + + self._check_tree(h5fname2) + finally: + if Path(h5fname2).is_file(): + Path(h5fname2).unlink() + + def test01b_copyDeepTree(self): + """Copy of a large depth object tree with small node cache.""" + + self.h5file = tb.open_file(self.h5fname, mode="r", node_cache_slots=10) + h5fname2 = tempfile.mktemp(".h5") + try: + with tb.open_file( + h5fname2, mode="w", node_cache_slots=10 + ) as h5file2: + if common.verbose: + print("\nCopying deep tree...") + + self.h5file.copy_node( + self.h5file.root, h5file2.root, recursive=True + ) + self.h5file.close() + + self._check_tree(h5fname2) + finally: + if Path(h5fname2).is_file(): + Path(h5fname2).unlink() + + def test01c_copyDeepTree(self): + """Copy of a large depth object tree with no node cache.""" + + self.h5file = tb.open_file(self.h5fname, mode="r", node_cache_slots=0) + h5fname2 = tempfile.mktemp(".h5") + try: + with tb.open_file( + h5fname2, mode="w", node_cache_slots=0 + ) as h5file2: + if common.verbose: + print("\nCopying deep tree...") + + self.h5file.copy_node( + self.h5file.root, h5file2.root, recursive=True + ) + self.h5file.close() + + self._check_tree(h5fname2) + finally: + if Path(h5fname2).is_file(): + Path(h5fname2).unlink() + + @common.unittest.skipUnless(common.heavy, "only in heavy mode") + def test01d_copyDeepTree(self): + """Copy of a large depth object tree with static node cache.""" + + self.h5file = tb.open_file( + self.h5fname, mode="r", node_cache_slots=-256 + ) + h5fname2 = tempfile.mktemp(".h5") + try: + with tb.open_file( + h5fname2, mode="w", node_cache_slots=-256 + ) as h5file2: + if common.verbose: + print("\nCopying deep tree...") + + self.h5file.copy_node( + self.h5file.root, h5file2.root, recursive=True + ) + self.h5file.close() + + self._check_tree(h5fname2) + finally: + if Path(h5fname2).is_file(): + Path(h5fname2).unlink() + + +class WideTreeTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Checks for maximum number of children for a Group.""" + + def test00_Leafs(self): + """Checking creation of large number of leafs (1024) per group. + + Variable 'maxchildren' controls this check. PyTables support up + to 4096 children per group, but this would take too much memory + (up to 64 MB) for testing purposes (maybe we can add a test for + big platforms). A 1024 children run takes up to 30 MB. A 512 + children test takes around 25 MB. + + """ + + if common.heavy: + maxchildren = 4096 + else: + maxchildren = 256 + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00_wideTree..." % self.__class__.__name__) + print("Maximum number of children tested :", maxchildren) + + a = [1, 1] + if common.verbose: + print("Children writing progress: ", end=" ") + for child in range(maxchildren): + if common.verbose: + print("%3d," % (child), end=" ") + self.h5file.create_array( + self.h5file.root, "array" + str(child), a, "child: %d" % child + ) + if common.verbose: + print() + + t1 = clock() + a = [1, 1] + + # Open the previous HDF5 file in read-only mode + self._reopen() + if common.verbose: + print( + "\nTime spent opening a file with %d arrays: %s s" + % (maxchildren, clock() - t1) + ) + print("\nChildren reading progress: ", end=" ") + + # Get the metadata on the previosly saved arrays + for child in range(maxchildren): + if common.verbose: + print("%3d," % (child), end=" ") + + # Create an array for later comparison + # Get the actual array + array_ = getattr(self.h5file.root, "array" + str(child)) + b = array_.read() + + # Arrays a and b must be equal + self.assertEqual(a, b) + if common.verbose: + print() # This flush the stdout buffer + + def test01_wideTree(self): + """Checking creation of large number of groups (1024) per group. + + Variable 'maxchildren' controls this check. PyTables support up + to 4096 children per group, but this would take too much memory + (up to 64 MB) for testing purposes (maybe we can add a test for + big platforms). A 1024 children run takes up to 30 MB. A 512 + children test takes around 25 MB. + + """ + + if common.heavy: + # for big platforms! + maxchildren = 4096 + else: + # for standard platforms + maxchildren = 256 + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00_wideTree..." % self.__class__.__name__) + print("Maximum number of children tested :", maxchildren) + + if common.verbose: + print("Children writing progress: ", end=" ") + for child in range(maxchildren): + if common.verbose: + print("%3d," % (child), end=" ") + self.h5file.create_group( + self.h5file.root, "group" + str(child), "child: %d" % child + ) + if common.verbose: + print() + + t1 = clock() + + # Open the previous HDF5 file in read-only mode + self._reopen() + if common.verbose: + print( + "\nTime spent opening a file with %d groups: %s s" + % (maxchildren, clock() - t1) + ) + print("\nChildren reading progress: ", end=" ") + + # Get the metadata on the previosly saved arrays + for child in range(maxchildren): + if common.verbose: + print("%3d," % (child), end=" ") + # Get the actual group + group = getattr(self.h5file.root, "group" + str(child)) + # Arrays a and b must be equal + self.assertEqual(group._v_title, "child: %d" % child) + + if common.verbose: + print() # This flush the stdout buffer + + +class HiddenTreeTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Check for hidden groups, leaves and hierarchies.""" + + def setUp(self): + super().setUp() + + self.visible = [] # list of visible object paths + self.hidden = [] # list of hidden object paths + + # Create some visible nodes: a, g, g/a1, g/a2, g/g, g/g/a. + h5f = self.h5file + h5f.create_array("/", "a", [0]) + g = h5f.create_group("/", "g") + h5f.create_array(g, "a1", [0]) + h5f.create_array(g, "a2", [0]) + g_g = h5f.create_group(g, "g") + h5f.create_array(g_g, "a", [0]) + + self.visible.extend(["/a", "/g", "/g/a1", "/g/a2", "/g/g", "/g/g/a"]) + + # Create some hidden nodes: _p_a, _p_g, _p_g/a, _p_g/_p_a, g/_p_a. + h5f.create_array("/", "_p_a", [0]) + hg = h5f.create_group("/", "_p_g") + h5f.create_array(hg, "a", [0]) + h5f.create_array(hg, "_p_a", [0]) + h5f.create_array(g, "_p_a", [0]) + + self.hidden.extend( + ["/_p_a", "/_p_g", "/_p_g/a", "/_p_g/_p_a", "/g/_p_a"] + ) + + # The test behind commented out because the .objects dictionary + # has been removed (as well as .leaves and .groups) + def _test00_objects(self): + """Absence of hidden nodes in `File.objects`.""" + + objects = self.h5file.objects + + warnings.filterwarnings("ignore", category=DeprecationWarning) + + for vpath in self.visible: + self.assertTrue( + vpath in objects, + "Missing visible node ``%s`` from ``File.objects``." % vpath, + ) + for hpath in self.hidden: + self.assertTrue( + hpath not in objects, + "Found hidden node ``%s`` in ``File.objects``." % hpath, + ) + + warnings.filterwarnings("default", category=DeprecationWarning) + + # The test behind commented out because the .objects dictionary + # has been removed (as well as .leaves and .groups) + def _test00b_objects(self): + """Object dictionaries conformance with ``walk_nodes()``.""" + + def dictCheck(dictName, classname): + file_ = self.h5file + + objects = getattr(file_, dictName) + walkPaths = [ + node._v_pathname for node in file_.walk_nodes("/", classname) + ] + dictPaths = [path for path in objects] + walkPaths.sort() + dictPaths.sort() + self.assertEqual( + walkPaths, + dictPaths, + "nodes in ``%s`` do not match those from ``walk_nodes()``" + % dictName, + ) + self.assertEqual( + len(walkPaths), + len(objects), + "length of ``%s`` differs from that of ``walk_nodes()``" + % dictName, + ) + + warnings.filterwarnings("ignore", category=DeprecationWarning) + + dictCheck("objects", None) + dictCheck("groups", "Group") + dictCheck("leaves", "Leaf") + + warnings.filterwarnings("default", category=DeprecationWarning) + + def test01_getNode(self): + """Node availability via `File.get_node()`.""" + + h5f = self.h5file + + for vpath in self.visible: + h5f.get_node(vpath) + for hpath in self.hidden: + h5f.get_node(hpath) + + def test02_walkGroups(self): + """Hidden group absence in `File.walk_groups()`.""" + + hidden = self.hidden + + for group in self.h5file.walk_groups("/"): + pathname = group._v_pathname + self.assertNotIn( + pathname, hidden, f"Walked across hidden group ``{pathname}``." + ) + + def test03_walkNodes(self): + """Hidden node absence in `File.walk_nodes()`.""" + + hidden = self.hidden + + for node in self.h5file.walk_nodes("/"): + pathname = node._v_pathname + self.assertNotIn( + pathname, hidden, f"Walked across hidden node ``{pathname}``." + ) + + def test04_listNodesVisible(self): + """Listing visible nodes under a visible group (list_nodes).""" + + hidden = self.hidden + + for node in self.h5file.list_nodes("/g"): + pathname = node._v_pathname + self.assertNotIn( + pathname, hidden, f"Listed hidden node ``{pathname}``." + ) + + def test04b_listNodesVisible(self): + """Listing visible nodes under a visible group (iter_nodes).""" + + hidden = self.hidden + + for node in self.h5file.iter_nodes("/g"): + pathname = node._v_pathname + self.assertNotIn( + pathname, hidden, f"Listed hidden node ``{pathname}``." + ) + + def test05_listNodesHidden(self): + """Listing visible nodes under a hidden group (list_nodes).""" + + hidden = self.hidden + + node_to_find = "/_p_g/a" + found_node = False + for node in self.h5file.list_nodes("/_p_g"): + pathname = node._v_pathname + if pathname == node_to_find: + found_node = True + self.assertIn( + pathname, hidden, f"Listed hidden node ``{pathname}``." + ) + + self.assertTrue( + found_node, "Hidden node ``%s`` was not listed." % node_to_find + ) + + def test05b_iterNodesHidden(self): + """Listing visible nodes under a hidden group (iter_nodes).""" + + hidden = self.hidden + + node_to_find = "/_p_g/a" + found_node = False + for node in self.h5file.iter_nodes("/_p_g"): + pathname = node._v_pathname + if pathname == node_to_find: + found_node = True + self.assertIn( + pathname, hidden, f"Listed hidden node ``{pathname}``." + ) + + self.assertTrue( + found_node, "Hidden node ``%s`` was not listed." % node_to_find + ) + + # The test behind commented out because the .objects dictionary + # has been removed (as well as .leaves and .groups) + def _test06_reopen(self): + """Reopening a file with hidden nodes.""" + + self.h5file.close() + self.h5file = tb.open_file(self.h5fname) + self.test00_objects() + + def test07_move(self): + """Moving a node between hidden and visible groups.""" + + is_visible_node = self.h5file.is_visible_node + + self.assertFalse(is_visible_node("/_p_g/a")) + self.h5file.move_node("/_p_g/a", "/g", "a") + self.assertTrue(is_visible_node("/g/a")) + self.h5file.move_node("/g/a", "/_p_g", "a") + self.assertFalse(is_visible_node("/_p_g/a")) + + def test08_remove(self): + """Removing a visible group with hidden children.""" + + self.assertIn("/g/_p_a", self.h5file) + self.h5file.root.g._f_remove(recursive=True) + self.assertNotIn("/g/_p_a", self.h5file) + + +class CreateParentsTestCase(common.TempFileMixin, common.PyTablesTestCase): + """Test the ``createparents`` flag. + + These are mainly for the user interface. More thorough tests on the + workings of the flag can be found in the ``test_do_undo.py`` module. + + """ + + filters = tb.Filters(complevel=4) # simply non-default + + def setUp(self): + super().setUp() + self.h5file.create_array("/", "array", [1]) + self.h5file.create_group("/", "group", filters=self.filters) + + def test01_inside(self): + """Placing a node inside a nonexistent child of itself.""" + self.assertRaises( + tb.NodeError, + self.h5file.move_node, + "/group", + "/group/foo/bar", + createparents=True, + ) + self.assertNotIn("/group/foo", self.h5file) + self.assertRaises( + tb.NodeError, + self.h5file.copy_node, + "/group", + "/group/foo/bar", + recursive=True, + createparents=True, + ) + self.assertNotIn("/group/foo", self.h5fname) + + def test02_filters(self): + """Propagating the filters of created parent groups.""" + + self.h5file.create_group("/group/foo/bar", "baz", createparents=True) + self.assertIn("/group/foo/bar/baz", self.h5file) + for group in self.h5file.walk_groups("/group"): + self.assertEqual(self.filters, group._v_filters) + + +def suite(): + theSuite = common.unittest.TestSuite() + # This counter is useful when detecting memory leaks + niter = 1 + + for i in range(niter): + theSuite.addTest(common.make_suite(TreeTestCase)) + theSuite.addTest(common.make_suite(DeepTreeTestCase)) + theSuite.addTest(common.make_suite(WideTreeTestCase)) + theSuite.addTest(common.make_suite(HiddenTreeTestCase)) + theSuite.addTest(common.make_suite(CreateParentsTestCase)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_types.py b/venv/Lib/site-packages/tables/tests/test_types.py new file mode 100644 index 0000000..033d4b8 --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_types.py @@ -0,0 +1,335 @@ +import sys + +import numpy as np +from packaging.version import parse as parse_version + +import tables as tb +from tables.tests import common + + +# Test Record class +class Record(tb.IsDescription): + var1 = tb.StringCol(itemsize=4) # 4-character String + var2 = tb.Col.from_kind("int") # integer + var3 = tb.Col.from_kind("int", itemsize=2) # short integer + var4 = tb.Col.from_kind("float") # double (double-precision) + var5 = tb.Col.from_kind("float", itemsize=4) # float (single-precision) + var6 = tb.Col.from_kind("complex") # double-precision + var7 = tb.Col.from_kind("complex", itemsize=8) # single-precision + if hasattr(tb, "Float16Atom"): + var8 = tb.Col.from_kind("float", itemsize=2) # half-precision + if hasattr(tb, "Float96Atom"): + var9 = tb.Col.from_kind("float", itemsize=12) # extended-precision + if hasattr(tb, "Float128Atom"): + var10 = tb.Col.from_kind("float", itemsize=16) # extended-precision + if hasattr(tb, "Complex192Atom"): + var11 = tb.Col.from_kind("complex", itemsize=24) # extended-precision + if hasattr(tb, "Complex256Atom"): + var12 = tb.Col.from_kind("complex", itemsize=32) # extended-precision + + +class RangeTestCase(common.TempFileMixin, common.PyTablesTestCase): + title = "This is the table title" + expectedrows = 100 + maxshort = 2**15 + maxint = 2_147_483_648 # (2 ** 31) + compress = 0 + + def setUp(self): + super().setUp() + self.rootgroup = self.h5file.root + + # Create a table + self.table = self.h5file.create_table( + self.rootgroup, "table", Record, self.title + ) + + def test00_range(self): + """Testing the range check.""" + + rec = self.table.row + + # Save a record + i = self.maxshort + rec["var1"] = "%04d" % (i) + rec["var2"] = i + rec["var3"] = np.array(i).astype("i2") + rec["var4"] = float(i) + rec["var5"] = float(i) + rec["var6"] = float(i) + rec["var7"] = complex(i, i) + if hasattr(tb, "Float16Atom"): + rec["var8"] = float(i) + if hasattr(tb, "Float96Atom"): + rec["var9"] = float(i) + if hasattr(tb, "Float128Atom"): + rec["var10"] = float(i) + try: + rec.append() + except ValueError: + if common.verbose: + type, value, traceback = sys.exc_info() + print("\nGreat!, the next ValueError was catched!") + print(value) + pass + else: + if common.verbose: + print( + "\nNow, the range overflow no longer issues a ValueError" + ) + + def test01_type(self): + """Testing the type check.""" + + rec = self.table.row + # Save a record + i = self.maxshort + rec["var1"] = "%04d" % (i) + rec["var2"] = i + rec["var3"] = np.array(i % self.maxshort).astype("i2") + rec["var5"] = float(i) + + # Numpy 1.25 -> ValueError + with self.assertRaises((TypeError, ValueError)): + rec["var4"] = "124c" + + rec["var6"] = float(i) + rec["var7"] = complex(i, i) + if hasattr(tb, "Float16Atom"): + rec["var8"] = float(i) + if hasattr(tb, "Float96Atom"): + rec["var9"] = float(i) + if hasattr(tb, "Float128Atom"): + rec["var10"] = float(i) + + +# Check the dtype read-only attribute +class DtypeTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test00a_table(self): + """Check dtype accessor for Table objects.""" + + a = self.h5file.create_table("/", "table", Record) + self.assertEqual(a.dtype, a.description._v_dtype) + + def test00b_column(self): + """Check dtype accessor for Column objects.""" + + a = self.h5file.create_table("/", "table", Record) + c = a.cols.var3 + self.assertEqual(c.dtype, a.description._v_dtype["var3"]) + + def test01_array(self): + """Check dtype accessor for Array objects.""" + + a = self.h5file.create_array("/", "array", [1, 2]) + self.assertEqual(a.dtype, a.atom.dtype) + + def test02_carray(self): + """Check dtype accessor for CArray objects.""" + + a = self.h5file.create_carray( + "/", "array", atom=tb.FloatAtom(), shape=[1, 2] + ) + self.assertEqual(a.dtype, a.atom.dtype) + + def test03_carray(self): + """Check dtype accessor for EArray objects.""" + + a = self.h5file.create_earray( + "/", "array", atom=tb.FloatAtom(), shape=[0, 2] + ) + self.assertEqual(a.dtype, a.atom.dtype) + + def test04_vlarray(self): + """Check dtype accessor for VLArray objects.""" + + a = self.h5file.create_vlarray("/", "array", tb.FloatAtom()) + self.assertEqual(a.dtype, a.atom.dtype) + + +class ReadFloatTestCase(common.TestFileMixin, common.PyTablesTestCase): + h5fname = common.test_filename("float.h5") + nrows = 5 + ncols = 6 + + def setUp(self): + super().setUp() + x = np.arange(self.ncols) + y = np.arange(self.nrows) + y.shape = (self.nrows, 1) + self.values = x + y + + def test01_read_float16(self): + dtype = "float16" + if hasattr(np, dtype): + ds = getattr(self.h5file.root, dtype) + self.assertNotIsInstance(ds, tb.UnImplemented) + self.assertEqual(ds.shape, (self.nrows, self.ncols)) + self.assertEqual(ds.dtype, dtype) + self.assertTrue( + common.allequal(ds.read(), self.values.astype(dtype)) + ) + else: + with self.assertWarns(UserWarning): + ds = getattr(self.h5file.root, dtype) + self.assertIsInstance(ds, tb.UnImplemented) + + def test02_read_float32(self): + dtype = "float32" + ds = getattr(self.h5file.root, dtype) + self.assertNotIsInstance(ds, tb.UnImplemented) + self.assertEqual(ds.shape, (self.nrows, self.ncols)) + self.assertEqual(ds.dtype, dtype) + self.assertTrue(common.allequal(ds.read(), self.values.astype(dtype))) + + def test03_read_float64(self): + dtype = "float64" + ds = getattr(self.h5file.root, dtype) + self.assertNotIsInstance(ds, tb.UnImplemented) + self.assertEqual(ds.shape, (self.nrows, self.ncols)) + self.assertEqual(ds.dtype, dtype) + self.assertTrue(common.allequal(ds.read(), self.values.astype(dtype))) + + def test04_read_longdouble(self): + dtype = "longdouble" + if hasattr(tb, "Float96Atom") or hasattr(tb, "Float128Atom"): + ds = getattr(self.h5file.root, dtype) + self.assertNotIsInstance(ds, tb.UnImplemented) + self.assertEqual(ds.shape, (self.nrows, self.ncols)) + self.assertEqual(ds.dtype, dtype) + self.assertTrue( + common.allequal(ds.read(), self.values.astype(dtype)) + ) + + if hasattr(tb, "Float96Atom"): + self.assertEqual(ds.dtype, "float96") + elif hasattr(tb, "Float128Atom"): + self.assertEqual(ds.dtype, "float128") + else: + # XXX: check + # the behavior depends on the HDF5 lib configuration + try: + with self.assertWarns(UserWarning): + ds = getattr(self.h5file.root, dtype) + self.assertIsInstance(ds, tb.UnImplemented) + except AssertionError: + ds = getattr(self.h5file.root, dtype) + self.assertEqual(ds.dtype, "float64") + + def test05_read_quadprecision_float(self): + # XXX: check + try: + with self.assertWarns(UserWarning): + ds = self.h5file.root.quadprecision + self.assertIsInstance(ds, tb.UnImplemented) + except AssertionError: + # NOTE: it would be nice to have some sort of message that warns + # against the potential precision loss: the quad-precision + # dataset actually uses 128 bits for each element, not just + # 80 bits (longdouble) + ds = self.h5file.root.quadprecision + self.assertEqual(ds.dtype, "longdouble") + + +class AtomTestCase(common.PyTablesTestCase): + def test_init_parameters_01(self): + atom1 = tb.StringAtom(itemsize=12) + atom2 = atom1.copy() + self.assertEqual(atom1, atom2) + self.assertEqual(str(atom1), str(atom2)) + self.assertIsNot(atom1, atom2) + + def test_init_parameters_02(self): + atom1 = tb.StringAtom(itemsize=12) + atom2 = atom1.copy(itemsize=100, shape=(2, 2)) + self.assertEqual( + atom2, tb.StringAtom(itemsize=100, shape=(2, 2), dflt=b"") + ) + + def test_init_parameters_03(self): + atom1 = tb.StringAtom(itemsize=12) + self.assertRaises(TypeError, atom1.copy, foobar=42) + + def test_from_dtype_01(self): + atom1 = tb.Atom.from_dtype(np.dtype((np.int16, (2, 2)))) + atom2 = tb.Int16Atom(shape=(2, 2), dflt=0) + self.assertEqual(atom1, atom2) + self.assertEqual(str(atom1), str(atom2)) + + def test_from_dtype_02(self): + atom1 = tb.Atom.from_dtype(np.dtype("S5"), dflt=b"hello") + atom2 = tb.StringAtom(itemsize=5, shape=(), dflt=b"hello") + self.assertEqual(atom1, atom2) + self.assertEqual(str(atom1), str(atom2)) + + def test_from_dtype_03(self): + with self.assertWarns(Warning): + atom1 = tb.Atom.from_dtype(np.dtype("U5"), dflt=b"hello") + atom2 = tb.StringAtom(itemsize=5, shape=(), dflt=b"hello") + self.assertEqual(atom1, atom2) + self.assertEqual(str(atom1), str(atom2)) + + def test_from_dtype_04(self): + atom1 = tb.Atom.from_dtype(np.dtype("float64")) + atom2 = tb.Float64Atom(shape=(), dflt=0.0) + self.assertEqual(atom1, atom2) + self.assertEqual(str(atom1), str(atom2)) + + def test_from_kind_01(self): + atom1 = tb.Atom.from_kind("int", itemsize=2, shape=(2, 2)) + atom2 = tb.Int16Atom(shape=(2, 2), dflt=0) + self.assertEqual(atom1, atom2) + self.assertEqual(str(atom1), str(atom2)) + + def test_from_kind_02(self): + atom1 = tb.Atom.from_kind("int", shape=(2, 2)) + atom2 = tb.Int32Atom(shape=(2, 2), dflt=0) + self.assertEqual(atom1, atom2) + self.assertEqual(str(atom1), str(atom2)) + + def test_from_kind_03(self): + atom1 = tb.Atom.from_kind("int", shape=1) + atom2 = tb.Int32Atom(shape=(1,), dflt=0) + self.assertEqual(atom1, atom2) + self.assertEqual(str(atom1), str(atom2)) + + def test_from_kind_04(self): + atom1 = tb.Atom.from_kind("string", itemsize=5, dflt=b"hello") + atom2 = tb.StringAtom(itemsize=5, shape=(), dflt=b"hello") + self.assertEqual(atom1, atom2) + self.assertEqual(str(atom1), str(atom2)) + + def test_from_kind_05(self): + # ValueError: no default item size for kind ``string`` + self.assertRaises( + ValueError, tb.Atom.from_kind, "string", dflt=b"hello" + ) + + def test_from_kind_06(self): + # ValueError: unknown kind: 'Float' + self.assertRaises(ValueError, tb.Atom.from_kind, "Float") + + +def suite(): + import doctest + + theSuite = common.unittest.TestSuite() + + for i in range(1): + # TODO: in numpy 2 the repr of various dtypes has changed breaking the + # doctests. When only numpy 2 is supported re-enable these tests. + if parse_version(np.__version__) < parse_version("2.dev0"): + theSuite.addTest(doctest.DocTestSuite(tb.atom)) + theSuite.addTest(common.make_suite(AtomTestCase)) + theSuite.addTest(common.make_suite(RangeTestCase)) + theSuite.addTest(common.make_suite(DtypeTestCase)) + theSuite.addTest(common.make_suite(ReadFloatTestCase)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_utils.py b/venv/Lib/site-packages/tables/tests/test_utils.py new file mode 100644 index 0000000..dc293bf --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_utils.py @@ -0,0 +1,93 @@ +import sys +from io import StringIO +from unittest.mock import patch + +import tables.scripts.ptdump as ptdump +import tables.scripts.pttree as pttree +import tables.scripts.ptrepack as ptrepack +from tables.tests import common + + +class ptrepackTestCase(common.PyTablesTestCase): + """Test ptrepack""" + + @patch.object(ptrepack, "copy_leaf") + @patch.object(ptrepack.tb, "open_file") + def test_paths_windows(self, mock_open_file, mock_copy_leaf): + """Checking handling of windows filenames: test gh-616""" + + # this filename has a semi-colon to check for + # regression of gh-616 + src_fn = "D:\\window~1\\path\\000\\infile" + src_path = "/" + dst_fn = "another\\path\\" + dst_path = "/path/in/outfile" + + argv = ["ptrepack", src_fn + ":" + src_path, dst_fn + ":" + dst_path] + with patch.object(sys, "argv", argv): + ptrepack.main() + + args, kwargs = mock_open_file.call_args_list[0] + self.assertEqual(args, (src_fn, "r")) + + args, kwargs = mock_copy_leaf.call_args_list[0] + self.assertEqual(args, (src_fn, dst_fn, src_path, dst_path)) + + +class ptdumpTestCase(common.PyTablesTestCase): + """Test ptdump""" + + @patch.object(ptdump.tb, "open_file") + @patch("sys.stdout", new_callable=StringIO) + def test_paths_windows(self, _, mock_open_file): + """Checking handling of windows filenames: test gh-616""" + + # this filename has a semi-colon to check for + # regression of gh-616 (in ptdump) + src_fn = "D:\\window~1\\path\\000\\ptdump" + src_path = "/" + + argv = ["ptdump", src_fn + ":" + src_path] + with patch.object(sys, "argv", argv): + ptdump.main() + + args, kwargs = mock_open_file.call_args_list[0] + self.assertEqual(args, (src_fn, "r")) + + +class pttreeTestCase(common.PyTablesTestCase): + """Test ptdump""" + + @patch.object(pttree.tb, "open_file") + @patch.object(pttree, "get_tree_str") + @patch("sys.stdout", new_callable=StringIO) + def test_paths_windows(self, _, mock_get_tree_str, mock_open_file): + """Checking handling of windows filenames: test gh-616""" + + # this filename has a semi-colon to check for + # regression of gh-616 (in pttree) + src_fn = "D:\\window~1\\path\\000\\pttree" + src_path = "/" + + argv = ["pttree", src_fn + ":" + src_path] + with patch.object(sys, "argv", argv): + pttree.main() + + args, kwargs = mock_open_file.call_args_list[0] + self.assertEqual(args, (src_fn, "r")) + + +def suite(): + theSuite = common.unittest.TestSuite() + + theSuite.addTest(common.make_suite(ptrepackTestCase)) + theSuite.addTest(common.make_suite(ptdumpTestCase)) + theSuite.addTest(common.make_suite(pttreeTestCase)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/test_vlarray.py b/venv/Lib/site-packages/tables/tests/test_vlarray.py new file mode 100644 index 0000000..2cab82e --- /dev/null +++ b/venv/Lib/site-packages/tables/tests/test_vlarray.py @@ -0,0 +1,4799 @@ +import sys + +import numpy as np + +import tables as tb +from tables.tests import common + + +class C: + c = (3, 4.5) + + +class BasicTestCase(common.TempFileMixin, common.PyTablesTestCase): + compress = 0 + complib = "zlib" + shuffle = 0 + bitshuffle = 0 + fletcher32 = 0 + flavor = "numpy" + + def setUp(self): + super().setUp() + + # Create an instance of an HDF5 Table + self.rootgroup = self.h5file.root + self.populateFile() + self.h5file.close() + + def populateFile(self): + group = self.rootgroup + filters = tb.Filters( + complevel=self.compress, + complib=self.complib, + shuffle=self.shuffle, + bitshuffle=self.bitshuffle, + fletcher32=self.fletcher32, + ) + vlarray = self.h5file.create_vlarray( + group, + "vlarray1", + atom=tb.Int32Atom(), + title="ragged array of ints", + filters=filters, + expectedrows=1000, + ) + vlarray.flavor = self.flavor + + # Fill it with 5 rows + vlarray.append([1, 2]) + if self.flavor == "numpy": + vlarray.append(np.array([3, 4, 5], dtype="int32")) + vlarray.append(np.array([], dtype="int32")) # Empty entry + elif self.flavor == "python": + vlarray.append((3, 4, 5)) + vlarray.append(()) # Empty entry + vlarray.append([6, 7, 8, 9]) + vlarray.append([10, 11, 12, 13, 14]) + + def test00_attributes(self): + self.h5file = tb.open_file(self.h5fname, "r") + obj = self.h5file.get_node("/vlarray1") + + self.assertEqual(obj.flavor, self.flavor) + self.assertEqual(obj.shape, (5,)) + self.assertEqual(obj.ndim, 1) + self.assertEqual(obj.nrows, 5) + self.assertEqual(obj.atom.type, "int32") + + def test01_read(self): + """Checking vlarray read.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_read..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + vlarray = self.h5file.get_node("/vlarray1") + + # Choose a small value for buffer size + vlarray.nrowsinbuf = 3 + # Read some rows + row = vlarray.read(0)[0] + row2 = vlarray.read(2)[0] + if common.verbose: + print("Flavor:", vlarray.flavor) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row) + + nrows = 5 + self.assertEqual(nrows, vlarray.nrows) + if self.flavor == "numpy": + self.assertEqual(type(row), np.ndarray) + self.assertTrue( + common.allequal( + row, np.array([1, 2], dtype="int32"), self.flavor + ) + ) + self.assertTrue( + common.allequal(row2, np.array([], dtype="int32"), self.flavor) + ) + elif self.flavor == "python": + self.assertEqual(row, [1, 2]) + self.assertEqual(row2, []) + self.assertEqual(len(row), 2) + + # Check filters: + if self.compress != vlarray.filters.complevel and common.verbose: + print("Error in compress. Class:", self.__class__.__name__) + print("self, vlarray:", self.compress, vlarray.filters.complevel) + self.assertEqual(vlarray.filters.complevel, self.compress) + if self.compress > 0 and tb.which_lib_version(self.complib): + self.assertEqual(vlarray.filters.complib, self.complib) + if self.shuffle != vlarray.filters.shuffle and common.verbose: + print("Error in shuffle. Class:", self.__class__.__name__) + print("self, vlarray:", self.shuffle, vlarray.filters.shuffle) + self.assertEqual(self.shuffle, vlarray.filters.shuffle) + if self.bitshuffle != vlarray.filters.bitshuffle and common.verbose: + print("Error in shuffle. Class:", self.__class__.__name__) + print( + "self, vlarray:", self.bitshuffle, vlarray.filters.bitshuffle + ) + self.assertEqual(self.shuffle, vlarray.filters.shuffle) + if self.fletcher32 != vlarray.filters.fletcher32 and common.verbose: + print("Error in fletcher32. Class:", self.__class__.__name__) + print( + "self, vlarray:", self.fletcher32, vlarray.filters.fletcher32 + ) + self.assertEqual(self.fletcher32, vlarray.filters.fletcher32) + + def test02a_getitem(self): + """Checking vlarray __getitem__ (slices)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02a_getitem..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + vlarray = self.h5file.get_node("/vlarray1") + + rows = [[1, 2], [3, 4, 5], [], [6, 7, 8, 9], [10, 11, 12, 13, 14]] + + slices = [ + slice(None, None, None), + slice(1, 1, 1), + slice(30, None, None), + slice(0, None, None), + slice(3, None, 1), + slice(3, None, 2), + slice(None, 1, None), + slice(None, 2, 1), + slice(None, 30, 2), + slice(None, None, 1), + slice(None, None, 2), + slice(None, None, 3), + ] + for slc in slices: + # Read the rows in slc + rows2 = vlarray[slc] + rows1 = rows[slc] + rows1f = [] + if common.verbose: + print("Flavor:", vlarray.flavor) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Original rows ==>", rows1) + print("Rows read in vlarray ==>", rows2) + + if self.flavor == "numpy": + for val in rows1: + rows1f.append(np.array(val, dtype="int32")) + for i in range(len(rows1f)): + self.assertTrue( + common.allequal(rows2[i], rows1f[i], self.flavor) + ) + elif self.flavor == "python": + self.assertEqual(rows2, rows1) + + def test02b_getitem(self): + """Checking vlarray __getitem__ (scalars)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02b_getitem..." % self.__class__.__name__) + + if self.flavor != "numpy": + # This test is only valid for NumPy + return + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "r") + vlarray = self.h5file.get_node("/vlarray1") + + # Get a numpy array of objects + rows = np.array(vlarray[:], dtype=object) + + for slc in [0, np.array(1), 2, np.array([3]), [4]]: + # Read the rows in slc + rows2 = vlarray[slc] + rows1 = rows[slc] + if common.verbose: + print("Flavor:", vlarray.flavor) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Original rows ==>", rows1) + print("Rows read in vlarray ==>", rows2) + + for i in range(len(rows1)): + self.assertTrue( + common.allequal(rows2[i], rows1[i], self.flavor) + ) + + def test03_append(self): + """Checking vlarray append.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_append..." % self.__class__.__name__) + + # Create an instance of an HDF5 Table + self.h5file = tb.open_file(self.h5fname, "a") + vlarray = self.h5file.get_node("/vlarray1") + + # Append a new row + vlarray.append([7, 8, 9, 10]) + + # Choose a small value for buffer size + vlarray.nrowsinbuf = 3 + + # Read some rows: + row1 = vlarray[0] + row2 = vlarray[2] + row3 = vlarray[-1] + if common.verbose: + print("Flavor:", vlarray.flavor) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row1) + + nrows = 6 + self.assertEqual(nrows, vlarray.nrows) + if self.flavor == "numpy": + self.assertEqual(type(row1), type(np.array([1, 2]))) + self.assertTrue( + common.allequal( + row1, np.array([1, 2], dtype="int32"), self.flavor + ) + ) + self.assertTrue( + common.allequal(row2, np.array([], dtype="int32"), self.flavor) + ) + self.assertTrue( + common.allequal( + row3, np.array([7, 8, 9, 10], dtype="int32"), self.flavor + ) + ) + elif self.flavor == "python": + self.assertEqual(row1, [1, 2]) + self.assertEqual(row2, []) + self.assertEqual(row3, [7, 8, 9, 10]) + self.assertEqual(len(row3), 4) + + def test04_get_row_size(self): + """Checking get_row_size method.""" + + self.h5file = tb.open_file(self.h5fname, "a") + vlarray = self.h5file.get_node("/vlarray1") + + self.assertEqual(vlarray.get_row_size(0), 2 * vlarray.atom.size) + self.assertEqual(vlarray.get_row_size(1), 3 * vlarray.atom.size) + self.assertEqual(vlarray.get_row_size(2), 0 * vlarray.atom.size) + self.assertEqual(vlarray.get_row_size(3), 4 * vlarray.atom.size) + self.assertEqual(vlarray.get_row_size(4), 5 * vlarray.atom.size) + + +class BasicNumPyTestCase(BasicTestCase): + flavor = "numpy" + + +class BasicPythonTestCase(BasicTestCase): + flavor = "python" + + +class ZlibComprTestCase(BasicTestCase): + compress = 1 + complib = "zlib" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscComprTestCase(BasicTestCase): + compress = 9 + shuffle = 0 + complib = "blosc" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscShuffleComprTestCase(BasicTestCase): + compress = 6 + shuffle = 1 + complib = "blosc" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscBitShuffleComprTestCase(BasicTestCase): + compress = 9 + bitshuffle = 1 + complib = "blosc" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +class BloscBloscLZComprTestCase(BasicTestCase): + compress = 9 + shuffle = 1 + complib = "blosc:blosclz" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "lz4" not in tb.blosc_compressor_list(), "lz4 required" +) +class BloscLZ4ComprTestCase(BasicTestCase): + compress = 9 + shuffle = 1 + complib = "blosc:lz4" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "lz4" not in tb.blosc_compressor_list(), "lz4 required" +) +class BloscLZ4HCComprTestCase(BasicTestCase): + compress = 9 + shuffle = 1 + complib = "blosc:lz4hc" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "snappy" not in tb.blosc_compressor_list(), "snappy required" +) +class BloscSnappyComprTestCase(BasicTestCase): + compress = 9 + shuffle = 1 + complib = "blosc:snappy" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "zlib" not in tb.blosc_compressor_list(), "zlib required" +) +class BloscZlibComprTestCase(BasicTestCase): + compress = 9 + shuffle = 1 + complib = "blosc:zlib" + + +@common.unittest.skipIf( + not common.blosc_avail, "BLOSC compression library not available" +) +@common.unittest.skipIf( + "zstd" not in tb.blosc_compressor_list(), "zstd required" +) +class BloscZstdComprTestCase(BasicTestCase): + compress = 9 + shuffle = 1 + complib = "blosc:zstd" + + +@common.unittest.skipIf( + not common.lzo_avail, "LZO compression library not available" +) +class LZOComprTestCase(BasicTestCase): + compress = 1 + complib = "lzo" + + +@common.unittest.skipIf( + not common.bzip2_avail, "BZIP2 compression library not available" +) +class Bzip2ComprTestCase(BasicTestCase): + compress = 1 + complib = "bzip2" + + +class ShuffleComprTestCase(BasicTestCase): + compress = 1 + shuffle = 1 + + +class TypesTestCase(common.TempFileMixin, common.PyTablesTestCase): + open_mode = "w" + compress = 0 + complib = "zlib" # Default compression library + + def test01_StringAtom(self): + """Checking vlarray with NumPy string atoms ('numpy' flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_StringAtom..." % self.__class__.__name__) + + vlarray = self.h5file.create_vlarray( + "/", + "stringAtom", + atom=tb.StringAtom(itemsize=3), + title="Ragged array of strings", + ) + vlarray.flavor = "numpy" + vlarray.append(np.array(["1", "12", "123", "1234", "12345"])) + vlarray.append(np.array(["1", "12345"])) + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + np.testing.assert_array_equal( + row[0], np.array(["1", "12", "123", "123", "123"], "S") + ) + np.testing.assert_array_equal(row[1], np.array(["1", "123"], "S")) + self.assertEqual(len(row[0]), 5) + self.assertEqual(len(row[1]), 2) + + def test01a_StringAtom(self): + """Checking vlarray with NumPy string atoms ('numpy' flavor, + strided)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01a_StringAtom..." % self.__class__.__name__) + + vlarray = self.h5file.create_vlarray( + "/", + "stringAtom", + atom=tb.StringAtom(itemsize=3), + title="Ragged array of strings", + ) + vlarray.flavor = "numpy" + vlarray.append(np.array(["1", "12", "123", "1234", "12345"][::2])) + vlarray.append(np.array(["1", "12345", "2", "321"])[::3]) + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + np.testing.assert_array_equal( + row[0], np.array(["1", "123", "123"], "S") + ) + np.testing.assert_array_equal(row[1], np.array(["1", "321"], "S")) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test01a_2_StringAtom(self): + """Checking vlarray with NumPy string atoms (NumPy flavor, no conv)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01a_2_StringAtom..." % self.__class__.__name__ + ) + + vlarray = self.h5file.create_vlarray( + "/", + "stringAtom", + atom=tb.StringAtom(itemsize=3), + title="Ragged array of strings", + ) + vlarray.flavor = "numpy" + vlarray.append(np.array(["1", "12", "123", "123"])) + vlarray.append(np.array(["1", "2", "321"])) + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + np.testing.assert_array_equal( + row[0], np.array(["1", "12", "123", "123"], "S") + ) + np.testing.assert_array_equal(row[1], np.array(["1", "2", "321"], "S")) + self.assertEqual(len(row[0]), 4) + self.assertEqual(len(row[1]), 3) + + def test01b_StringAtom(self): + """Checking vlarray with NumPy string atoms (python flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b_StringAtom..." % self.__class__.__name__) + + vlarray = self.h5file.create_vlarray( + "/", + "stringAtom2", + atom=tb.StringAtom(itemsize=3), + title="Ragged array of strings", + ) + vlarray.flavor = "python" + vlarray.append(["1", "12", "123", "1234", "12345"]) + vlarray.append(["1", "12345"]) + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing String flavor") + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertEqual(row[0], [b"1", b"12", b"123", b"123", b"123"]) + self.assertEqual(row[1], [b"1", b"123"]) + self.assertEqual(len(row[0]), 5) + self.assertEqual(len(row[1]), 2) + + def test01c_StringAtom(self): + """Checking updating vlarray with NumPy string atoms + ('numpy' flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01c_StringAtom..." % self.__class__.__name__) + + vlarray = self.h5file.create_vlarray( + "/", + "stringAtom", + atom=tb.StringAtom(itemsize=3), + title="Ragged array of strings", + ) + vlarray.flavor = "numpy" + vlarray.append(np.array(["1", "12", "123", "1234", "12345"])) + vlarray.append(np.array(["1", "12345"])) + + # Modify the rows + vlarray[0] = np.array(["1", "123", "12", "", "12345"]) + vlarray[1] = np.array(["44", "4"]) # This should work as well + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], np.array([b"1", b"123", b"12", b"", b"123"]) + ) + ) + self.assertTrue( + common.allequal(row[1], np.array(["44", "4"], dtype="S3")) + ) + self.assertEqual(len(row[0]), 5) + self.assertEqual(len(row[1]), 2) + + def test01d_StringAtom(self): + """Checking updating vlarray with string atoms (String flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01d_StringAtom..." % self.__class__.__name__) + + vlarray = self.h5file.create_vlarray( + "/", + "stringAtom2", + atom=tb.StringAtom(itemsize=3), + title="Ragged array of strings", + ) + vlarray.flavor = "python" + vlarray.append(["1", "12", "123", "1234", "12345"]) + vlarray.append(["1", "12345"]) + + # Modify the rows + vlarray[0] = ["1", "123", "12", "", "12345"] + vlarray[1] = ["44", "4"] + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing String flavor") + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertEqual(row[0], [b"1", b"123", b"12", b"", b"123"]) + self.assertEqual(row[1], [b"44", b"4"]) + self.assertEqual(len(row[0]), 5) + self.assertEqual(len(row[1]), 2) + + def test02_BoolAtom(self): + """Checking vlarray with boolean atoms.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_BoolAtom..." % self.__class__.__name__) + + vlarray = self.h5file.create_vlarray( + "/", + "BoolAtom", + atom=tb.BoolAtom(), + title="Ragged array of Booleans", + ) + vlarray.append([1, 0, 3]) + vlarray.append([1, 0]) + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal(row[0], np.array([1, 0, 1], dtype="bool")) + ) + self.assertTrue( + common.allequal(row[1], np.array([1, 0], dtype="bool")) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test02b_BoolAtom(self): + """Checking setting vlarray with boolean atoms.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02b_BoolAtom..." % self.__class__.__name__) + + vlarray = self.h5file.create_vlarray( + "/", + "BoolAtom", + atom=tb.BoolAtom(), + title="Ragged array of Booleans", + ) + vlarray.append([1, 0, 3]) + vlarray.append([1, 0]) + + # Modify the rows + vlarray[0] = (0, 1, 3) + vlarray[1] = (0, 1) + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal(row[0], np.array([0, 1, 1], dtype="bool")) + ) + self.assertTrue( + common.allequal(row[1], np.array([0, 1], dtype="bool")) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test03_IntAtom(self): + """Checking vlarray with integer atoms.""" + + ttypes = [ + "int8", + "uint8", + "int16", + "uint16", + "int32", + "uint32", + "int64", + # "UInt64", # Unavailable in some platforms + ] + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_IntAtom..." % self.__class__.__name__) + + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + "/", atype, atom=tb.Atom.from_sctype(atype) + ) + vlarray.append([1, 2, 3]) + vlarray.append([1, 0]) + + if self.reopen: + name = vlarray._v_pathname + self._reopen(mode="a") + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal(row[0], np.array([1, 2, 3], dtype=atype)) + ) + self.assertTrue( + common.allequal(row[1], np.array([1, 0], dtype=atype)) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test03a_IntAtom(self): + """Checking vlarray with integer atoms (byteorder swapped)""" + + ttypes = { + "int8": np.int8, + "uint8": np.uint8, + "int16": np.int16, + "uint16": np.uint16, + "int32": np.int32, + "uint32": np.uint32, + "int64": np.int64, + # "uint64": np.int64, # Unavailable in some platforms + } + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03a_IntAtom..." % self.__class__.__name__) + + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + "/", atype, atom=tb.Atom.from_sctype(ttypes[atype]) + ) + a0 = np.array([1, 2, 3], dtype=atype) + a0 = a0.byteswap() + a0 = a0.view(a0.dtype.newbyteorder()) + vlarray.append(a0) + a1 = np.array([1, 0], dtype=atype) + a1 = a1.byteswap() + a1 = a1.view(a1.dtype.newbyteorder()) + vlarray.append(a1) + + if self.reopen: + name = vlarray._v_pathname + self._reopen(mode="a") + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], np.array([1, 2, 3], dtype=ttypes[atype]) + ) + ) + self.assertTrue( + common.allequal(row[1], np.array([1, 0], dtype=ttypes[atype])) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test03b_IntAtom(self): + """Checking updating vlarray with integer atoms.""" + + ttypes = [ + "int8", + "uint8", + "int16", + "uint16", + "int32", + "uint32", + "int64", + # "UInt64", # Unavailable in some platforms + ] + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_IntAtom..." % self.__class__.__name__) + + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + "/", atype, atom=tb.Atom.from_sctype(atype) + ) + vlarray.append([1, 2, 3]) + vlarray.append([1, 0]) + + # Modify rows + vlarray[0] = (3, 2, 1) + vlarray[1] = (0, 1) + + if self.reopen: + name = vlarray._v_pathname + self._reopen(mode="a") + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal(row[0], np.array([3, 2, 1], dtype=atype)) + ) + self.assertTrue( + common.allequal(row[1], np.array([0, 1], dtype=atype)) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test03c_IntAtom(self): + """Checking updating vlarray with integer atoms (byteorder swapped)""" + + ttypes = { + "int8": np.int8, + "uint8": np.uint8, + "int16": np.int16, + "uint16": np.uint16, + "int32": np.int32, + "uint32": np.uint32, + "int64": np.int64, + # "uint64": np.int64, # Unavailable in some platforms + } + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03c_IntAtom..." % self.__class__.__name__) + + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + "/", atype, atom=tb.Atom.from_sctype(ttypes[atype]) + ) + a0 = np.array([1, 2, 3], dtype=atype) + vlarray.append(a0) + a1 = np.array([1, 0], dtype=atype) + vlarray.append(a1) + + # Modify rows + a0 = np.array([3, 2, 1], dtype=atype) + a0 = a0.byteswap() + a0 = a0.view(a0.dtype.newbyteorder()) + vlarray[0] = a0 + a1 = np.array([0, 1], dtype=atype) + a1 = a1.byteswap() + a1 = a1.view(a1.dtype.newbyteorder()) + vlarray[1] = a1 + + if self.reopen: + name = vlarray._v_pathname + self._reopen(mode="a") + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], np.array([3, 2, 1], dtype=ttypes[atype]) + ) + ) + self.assertTrue( + common.allequal(row[1], np.array([0, 1], dtype=ttypes[atype])) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test03d_IntAtom(self): + """Checking updating vlarray with integer atoms (another byteorder)""" + + ttypes = { + "int8": np.int8, + "uint8": np.uint8, + "int16": np.int16, + "uint16": np.uint16, + "int32": np.int32, + "uint32": np.uint32, + "int64": np.int64, + # "uint64": np.int64, # Unavailable in some platforms + } + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03d_IntAtom..." % self.__class__.__name__) + + byteorder = {"little": "big", "big": "little"}[sys.byteorder] + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + "/", + atype, + atom=tb.Atom.from_sctype(ttypes[atype]), + byteorder=byteorder, + ) + a0 = np.array([1, 2, 3], dtype=atype) + vlarray.append(a0) + a1 = np.array([1, 0], dtype=atype) + vlarray.append(a1) + + # Modify rows + a0 = np.array([3, 2, 1], dtype=atype) + a0 = a0.byteswap() + a0 = a0.view(a0.dtype.newbyteorder()) + vlarray[0] = a0 + a1 = np.array([0, 1], dtype=atype) + a1 = a1.byteswap() + a1 = a1.view(a1.dtype.newbyteorder()) + vlarray[1] = a1 + + if self.reopen: + name = vlarray._v_pathname + self._reopen(mode="a") + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + byteorder2 = tb.utils.byteorders[row[0].dtype.byteorder] + if byteorder2 != "irrelevant": + self.assertEqual( + tb.utils.byteorders[row[0].dtype.byteorder], sys.byteorder + ) + self.assertEqual(vlarray.byteorder, byteorder) + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], np.array([3, 2, 1], dtype=ttypes[atype]) + ) + ) + self.assertTrue( + common.allequal(row[1], np.array([0, 1], dtype=ttypes[atype])) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test04_FloatAtom(self): + """Checking vlarray with floating point atoms.""" + + ttypes = [ + "float32", + "float64", + ] + for name in ("float16", "float96", "float128"): + atomname = name.capitalize() + "Atom" + if hasattr(tb, atomname): + ttypes.append(name) + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_FloatAtom..." % self.__class__.__name__) + + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + "/", atype, atom=tb.Atom.from_sctype(atype) + ) + vlarray.append([1.3, 2.2, 3.3]) + vlarray.append([5.96, 0.597]) + + if self.reopen: + name = vlarray._v_pathname + self._reopen(mode="a") + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal(row[0], np.array([1.3, 2.2, 3.3], atype)) + ) + self.assertTrue( + common.allequal(row[1], np.array([5.96, 0.597], atype)) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test04a_FloatAtom(self): + """Checking vlarray with float atoms (byteorder swapped)""" + + ttypes = { + "float32": np.float32, + "float64": np.float64, + } + if hasattr(tb, "Float16Atom"): + ttypes["float16"] = np.float16 + if hasattr(tb, "Float96Atom"): + ttypes["float96"] = np.float96 + if hasattr(tb, "Float128Atom"): + ttypes["float128"] = np.float128 + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04a_FloatAtom..." % self.__class__.__name__) + + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + "/", atype, atom=tb.Atom.from_sctype(ttypes[atype]) + ) + a0 = np.array([1.3, 2.2, 3.3], dtype=atype) + a0 = a0.byteswap() + a0 = a0.view(a0.dtype.newbyteorder()) + vlarray.append(a0) + a1 = np.array([5.96, 0.597], dtype=atype) + a1 = a1.byteswap() + a1 = a1.view(a1.dtype.newbyteorder()) + vlarray.append(a1) + + if self.reopen: + name = vlarray._v_pathname + self._reopen(mode="a") + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], np.array([1.3, 2.2, 3.3], dtype=ttypes[atype]) + ) + ) + self.assertTrue( + common.allequal( + row[1], np.array([5.96, 0.597], dtype=ttypes[atype]) + ) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test04b_FloatAtom(self): + """Checking updating vlarray with floating point atoms.""" + + ttypes = [ + "float32", + "float64", + ] + for name in ("float16", "float96", "float128"): + atomname = name.capitalize() + "Atom" + if hasattr(tb, atomname): + ttypes.append(name) + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04b_FloatAtom..." % self.__class__.__name__) + + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + "/", atype, atom=tb.Atom.from_sctype(atype) + ) + vlarray.append([1.3, 2.2, 3.3]) + vlarray.append([5.96, 0.597]) + + # Modifiy some rows + vlarray[0] = (4.3, 2.2, 4.3) + vlarray[1] = (1.123, 1.1e-3) + + if self.reopen: + name = vlarray._v_pathname + self._reopen(mode="a") + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal(row[0], np.array([4.3, 2.2, 4.3], atype)) + ) + self.assertTrue( + common.allequal(row[1], np.array([1.123, 1.1e-3], atype)) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test04c_FloatAtom(self): + """Checking updating vlarray with float atoms (byteorder swapped)""" + + ttypes = { + "float32": np.float32, + "float64": np.float64, + } + if hasattr(tb, "Float16Atom"): + ttypes["float16"] = np.float16 + if hasattr(tb, "Float96Atom"): + ttypes["float96"] = np.float96 + if hasattr(tb, "Float128Atom"): + ttypes["float128"] = np.float128 + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04c_FloatAtom..." % self.__class__.__name__) + + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + "/", atype, atom=tb.Atom.from_sctype(ttypes[atype]) + ) + a0 = np.array([1.3, 2.2, 3.3], dtype=atype) + vlarray.append(a0) + a1 = np.array([1, 0], dtype=atype) + vlarray.append(a1) + + # Modify rows + a0 = np.array([4.3, 2.2, 4.3], dtype=atype) + a0 = a0.byteswap() + a0 = a0.view(a0.dtype.newbyteorder()) + vlarray[0] = a0 + a1 = np.array([1.123, 1.1e-3], dtype=atype) + a1 = a1.byteswap() + a1 = a1.view(a1.dtype.newbyteorder()) + vlarray[1] = a1 + + if self.reopen: + name = vlarray._v_pathname + self._reopen(mode="a") + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], np.array([4.3, 2.2, 4.3], dtype=ttypes[atype]) + ) + ) + self.assertTrue( + common.allequal( + row[1], np.array([1.123, 1.1e-3], dtype=ttypes[atype]) + ) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test04d_FloatAtom(self): + """Checking updating vlarray with float atoms (another byteorder)""" + + ttypes = { + "float32": np.float32, + "float64": np.float64, + } + if hasattr(tb, "Float16Atom"): + ttypes["float16"] = np.float16 + if hasattr(tb, "Float96Atom"): + ttypes["float96"] = np.float96 + if hasattr(tb, "Float128Atom"): + ttypes["float128"] = np.float128 + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04d_FloatAtom..." % self.__class__.__name__) + + byteorder = {"little": "big", "big": "little"}[sys.byteorder] + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + "/", + atype, + atom=tb.Atom.from_sctype(ttypes[atype]), + byteorder=byteorder, + ) + a0 = np.array([1.3, 2.2, 3.3], dtype=atype) + vlarray.append(a0) + a1 = np.array([1, 0], dtype=atype) + vlarray.append(a1) + + # Modify rows + a0 = np.array([4.3, 2.2, 4.3], dtype=atype) + a0 = a0.byteswap() + a0 = a0.view(a0.dtype.newbyteorder()) + vlarray[0] = a0 + a1 = np.array([1.123, 1.1e-3], dtype=atype) + a1 = a1.byteswap() + a1 = a1.view(a1.dtype.newbyteorder()) + vlarray[1] = a1 + + if self.reopen: + name = vlarray._v_pathname + self._reopen(mode="a") + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.byteorder, byteorder) + self.assertEqual( + tb.utils.byteorders[row[0].dtype.byteorder], sys.byteorder + ) + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], np.array([4.3, 2.2, 4.3], dtype=ttypes[atype]) + ) + ) + self.assertTrue( + common.allequal( + row[1], np.array([1.123, 1.1e-3], dtype=ttypes[atype]) + ) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test04_ComplexAtom(self): + """Checking vlarray with numerical complex atoms.""" + + ttypes = [ + "complex64", + "complex128", + ] + + if hasattr(tb, "Complex192Atom"): + ttypes.append("complex192") + if hasattr(tb, "Complex256Atom"): + ttypes.append("complex256") + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_ComplexAtom..." % self.__class__.__name__) + + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + "/", atype, atom=tb.Atom.from_sctype(atype) + ) + vlarray.append([(1.3 + 0j), (0 + 2.2j), (3.3 + 3.3j)]) + vlarray.append([(0 - 5.96j), (0.597 + 0j)]) + + if self.reopen: + name = vlarray._v_pathname + self._reopen(mode="a") + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], + np.array([(1.3 + 0j), (0 + 2.2j), (3.3 + 3.3j)], atype), + ) + ) + self.assertTrue( + common.allequal( + row[1], np.array([(0 - 5.96j), (0.597 + 0j)], atype) + ) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test04b_ComplexAtom(self): + """Checking modifying vlarray with numerical complex atoms.""" + + ttypes = [ + "complex64", + "complex128", + ] + + if hasattr(tb, "Complex192Atom"): + ttypes.append("complex192") + if hasattr(tb, "Complex256Atom"): + ttypes.append("complex256") + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test04b_ComplexAtom..." % self.__class__.__name__ + ) + + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + "/", atype, atom=tb.Atom.from_sctype(atype) + ) + vlarray.append([(1.3 + 0j), (0 + 2.2j), (3.3 + 3.3j)]) + vlarray.append([(0 - 5.96j), (0.597 + 0j)]) + + # Modify the rows + vlarray[0] = ((1.4 + 0j), (0 + 4.2j), (3.3 + 4.3j)) + vlarray[1] = ((4 - 5.96j), (0.597 + 4j)) + + if self.reopen: + name = vlarray._v_pathname + self._reopen(mode="a") + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], + np.array([(1.4 + 0j), (0 + 4.2j), (3.3 + 4.3j)], atype), + ) + ) + self.assertTrue( + common.allequal( + row[1], np.array([(4 - 5.96j), (0.597 + 4j)], atype) + ) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 2) + + def test05_VLStringAtom(self): + """Checking vlarray with variable length strings.""" + + # Skip the test if the default encoding has been mangled. + if sys.getdefaultencoding() != "ascii": + return + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05_VLStringAtom..." % self.__class__.__name__ + ) + + vlarray = self.h5file.create_vlarray( + "/", "VLStringAtom", atom=tb.VLStringAtom() + ) + vlarray.append(b"asd") + vlarray.append(b"asd\xe4") + vlarray.append(b"aaana") + vlarray.append(b"") + # Check for ticket #62. + self.assertRaises(TypeError, vlarray.append, [b"foo", b"bar"]) + # `VLStringAtom` makes no encoding assumptions. See ticket #51. + self.assertRaises(UnicodeEncodeError, vlarray.append, "asd\xe4") + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 4) + self.assertEqual(row[0], b"asd") + self.assertEqual(row[1], b"asd\xe4") + self.assertEqual(row[2], b"aaana") + self.assertEqual(row[3], b"") + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 4) + self.assertEqual(len(row[2]), 5) + self.assertEqual(len(row[3]), 0) + + def test05b_VLStringAtom(self): + """Checking updating vlarray with variable length strings.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05b_VLStringAtom..." % self.__class__.__name__ + ) + + vlarray = self.h5file.create_vlarray( + "/", "VLStringAtom", atom=tb.VLStringAtom() + ) + vlarray.append(b"asd") + vlarray.append(b"aaana") + + # Modify values + vlarray[0] = b"as4" + vlarray[1] = b"aaanc" + self.assertRaises(ValueError, vlarray.__setitem__, 1, b"shrt") + self.assertRaises(ValueError, vlarray.__setitem__, 1, b"toolong") + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", repr(row[0])) + print("Second row in vlarray ==>", repr(row[1])) + + self.assertEqual(vlarray.nrows, 2) + self.assertEqual(row[0], b"as4") + self.assertEqual(row[1], b"aaanc") + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 5) + + def test06a_Object(self): + """Checking vlarray with object atoms.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06a_Object..." % self.__class__.__name__) + + vlarray = self.h5file.create_vlarray( + "/", "Object", atom=tb.ObjectAtom() + ) + vlarray.append( + [[1, 2, 3], "aaa", "aaa\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd"] + ) + vlarray.append([3, 4, C()]) + vlarray.append(42) + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 3) + self.assertEqual( + row[0], + [[1, 2, 3], "aaa", "aaa\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd"], + ) + list1 = list(row[1]) + obj = list1.pop() + self.assertEqual(list1, [3, 4]) + self.assertEqual(obj.c, C().c) + self.assertEqual(row[2], 42) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 3) + self.assertRaises(TypeError, len, row[2]) + + def test06b_Object(self): + """Checking updating vlarray with object atoms.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06b_Object..." % self.__class__.__name__) + + vlarray = self.h5file.create_vlarray( + "/", "Object", atom=tb.ObjectAtom() + ) + # When updating an object, this seems to change the number + # of bytes that pickle.dumps generates + # vlarray.append( + # ([1,2,3], "aaa", "aaa\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd")) + vlarray.append(([1, 2, 3], "aaa", "\xef\xbf\xbd\xef\xbf\xbd4")) + # vlarray.append([3,4, C()]) + vlarray.append([3, 4, [24]]) + + # Modify the rows + # vlarray[0] = ([1,2,4], "aa4", "aaa\xef\xbf\xbd\xef\xbf\xbd4") + vlarray[0] = ([1, 2, 4], "aa4", "\xef\xbf\xbd\xef\xbf\xbd5") + # vlarray[1] = (3,4, C()) + vlarray[1] = [4, 4, [24]] + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 2) + self.assertEqual( + row[0], ([1, 2, 4], "aa4", "\xef\xbf\xbd\xef\xbf\xbd5") + ) + list1 = list(row[1]) + obj = list1.pop() + self.assertEqual(list1, [4, 4]) + + # self.assertEqual(obj.c, C().c) + self.assertEqual(obj, [24]) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 3) + + def test06c_Object(self): + """Checking vlarray with object atoms (numpy arrays as values)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06c_Object..." % self.__class__.__name__) + + vlarray = self.h5file.create_vlarray( + "/", "Object", atom=tb.ObjectAtom() + ) + vlarray.append(np.array([[1, 2], [0, 4]], "i4")) + vlarray.append(np.array([0, 1, 2, 3], "i8")) + vlarray.append(np.array(42, "i1")) + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 3) + self.assertTrue( + common.allequal(row[0], np.array([[1, 2], [0, 4]], "i4")) + ) + self.assertTrue(common.allequal(row[1], np.array([0, 1, 2, 3], "i8"))) + self.assertTrue(common.allequal(row[2], np.array(42, "i1"))) + + def test06d_Object(self): + """Checking updating vlarray with object atoms (numpy arrays)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test06d_Object..." % self.__class__.__name__) + + vlarray = self.h5file.create_vlarray( + "/", "Object", atom=tb.ObjectAtom() + ) + vlarray.append(np.array([[1, 2], [0, 4]], "i4")) + vlarray.append(np.array([0, 1, 2, 3], "i8")) + vlarray.append(np.array(42, "i1")) + + # Modify the rows. Since PyTables 2.2.1 we use a binary + # pickle for arrays and ObjectAtoms, so the next should take + # the same space than the above. + vlarray[0] = np.array([[1, 0], [0, 4]], "i4") + vlarray[1] = np.array([0, 1, 0, 3], "i8") + vlarray[2] = np.array(22, "i1") + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 3) + self.assertTrue( + common.allequal(row[0], np.array([[1, 0], [0, 4]], "i4")) + ) + self.assertTrue(common.allequal(row[1], np.array([0, 1, 0, 3], "i8"))) + self.assertTrue(common.allequal(row[2], np.array(22, "i1"))) + + def test07_VLUnicodeAtom(self): + """Checking vlarray with variable length Unicode strings.""" + + # Skip the test if the default encoding has been mangled. + if sys.getdefaultencoding() != "ascii": + return + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test07_VLUnicodeAtom..." % self.__class__.__name__ + ) + + vlarray = self.h5file.create_vlarray( + "/", "VLUnicodeAtom", atom=tb.VLUnicodeAtom() + ) + vlarray.append("asd") + vlarray.append("asd\u0140") + vlarray.append("aaana") + vlarray.append("") + # Check for ticket #62. + self.assertRaises(TypeError, vlarray.append, ["foo", "bar"]) + # `VLUnicodeAtom` makes no encoding assumptions. + self.assertRaises(UnicodeDecodeError, vlarray.append, "asd\xe4") + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 4) + self.assertEqual(row[0], "asd") + self.assertEqual(row[1], "asd\u0140") + self.assertEqual(row[2], "aaana") + self.assertEqual(row[3], "") + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 4) + self.assertEqual(len(row[2]), 5) + self.assertEqual(len(row[3]), 0) + + def test07b_VLUnicodeAtom(self): + """Checking updating vlarray with variable length Unicode strings.""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test07b_VLUnicodeAtom..." % self.__class__.__name__ + ) + + vlarray = self.h5file.create_vlarray( + "/", "VLUnicodeAtom", atom=tb.VLUnicodeAtom() + ) + vlarray.append("asd") + vlarray.append("aaan\xe4") + + # Modify values + vlarray[0] = "as\xe4" + vlarray[1] = "aaan\u0140" + self.assertRaises(ValueError, vlarray.__setitem__, 1, "shrt") + self.assertRaises(ValueError, vlarray.__setitem__, 1, "toolong") + + if self.reopen: + name = vlarray._v_pathname + self._reopen() + vlarray = self.h5file.get_node(name) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", repr(row[0])) + print("Second row in vlarray ==>", repr(row[1])) + + self.assertEqual(vlarray.nrows, 2) + self.assertEqual(row[0], "as\xe4") + self.assertEqual(row[1], "aaan\u0140") + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 5) + + +class TypesReopenTestCase(TypesTestCase): + title = "Reopen" + reopen = True + + +class TypesNoReopenTestCase(TypesTestCase): + title = "No reopen" + reopen = False + + +class MDTypesTestCase(common.TempFileMixin, common.PyTablesTestCase): + open_mode = "w" + compress = 0 + complib = "zlib" # Default compression library + + def setUp(self): + super().setUp() + self.rootgroup = self.h5file.root + + def test01_StringAtom(self): + """Checking vlarray with MD NumPy string atoms.""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_StringAtom..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, + "stringAtom", + tb.StringAtom(itemsize=3, shape=(2,)), + "Ragged array of strings", + ) + vlarray.append([["123", "45"], ["45", "123"]]) + vlarray.append([["s", "abc"], ["abc", "f"], ["s", "ab"], ["ab", "f"]]) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, 2) + np.testing.assert_array_equal( + row[0], np.array([["123", "45"], ["45", "123"]], "S") + ) + np.testing.assert_array_equal( + row[1], + np.array( + [["s", "abc"], ["abc", "f"], ["s", "ab"], ["ab", "f"]], "S" + ), + ) + self.assertEqual(len(row[0]), 2) + self.assertEqual(len(row[1]), 4) + + def test01b_StringAtom(self): + """Checking vlarray with MD NumPy string atoms ('python' flavor)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b_StringAtom..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, + "stringAtom", + tb.StringAtom(itemsize=3, shape=(2,)), + "Ragged array of strings", + ) + vlarray.flavor = "python" + vlarray.append([["123", "45"], ["45", "123"]]) + vlarray.append([["s", "abc"], ["abc", "f"], ["s", "ab"], ["ab", "f"]]) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, 2) + self.assertEqual(row[0], [[b"123", b"45"], [b"45", b"123"]]) + self.assertEqual( + row[1], + [[b"s", b"abc"], [b"abc", b"f"], [b"s", b"ab"], [b"ab", b"f"]], + ) + self.assertEqual(len(row[0]), 2) + self.assertEqual(len(row[1]), 4) + + def test01c_StringAtom(self): + """Checking vlarray with MD NumPy string atoms (with offset)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01c_StringAtom..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, + "stringAtom", + tb.StringAtom(itemsize=3, shape=(2,)), + "Ragged array of strings", + ) + vlarray.flavor = "python" + a = np.array([["a", "b"], ["123", "45"], ["45", "123"]], dtype="S3") + vlarray.append(a[1:]) + a = np.array( + [ + ["s", "a"], + ["ab", "f"], + ["s", "abc"], + ["abc", "f"], + ["s", "ab"], + ["ab", "f"], + ] + ) + vlarray.append(a[2:]) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, 2) + self.assertEqual(row[0], [[b"123", b"45"], [b"45", b"123"]]) + self.assertEqual( + row[1], + [[b"s", b"abc"], [b"abc", b"f"], [b"s", b"ab"], [b"ab", b"f"]], + ) + self.assertEqual(len(row[0]), 2) + self.assertEqual(len(row[1]), 4) + + def test01d_StringAtom(self): + """Checking vlarray with MD NumPy string atoms (with stride)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01d_StringAtom..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, + "stringAtom", + tb.StringAtom(itemsize=3, shape=(2,)), + "Ragged array of strings", + ) + vlarray.flavor = "python" + a = np.array([["a", "b"], ["123", "45"], ["45", "123"]], dtype="S3") + vlarray.append(a[1::2]) + a = np.array( + [ + ["s", "a"], + ["ab", "f"], + ["s", "abc"], + ["abc", "f"], + ["s", "ab"], + ["ab", "f"], + ] + ) + vlarray.append(a[::3]) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, 2) + self.assertEqual(row[0], [[b"123", b"45"]]) + self.assertEqual(row[1], [[b"s", b"a"], [b"abc", b"f"]]) + self.assertEqual(len(row[0]), 1) + self.assertEqual(len(row[1]), 2) + + def test02_BoolAtom(self): + """Checking vlarray with MD boolean atoms.""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_BoolAtom..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, + "BoolAtom", + tb.BoolAtom(shape=(3,)), + "Ragged array of Booleans", + ) + vlarray.append([(1, 0, 3), (1, 1, 1), (0, 0, 0)]) + vlarray.append([(1, 0, 0)]) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], + np.array([[1, 0, 1], [1, 1, 1], [0, 0, 0]], dtype="bool"), + ) + ) + self.assertTrue( + common.allequal(row[1], np.array([[1, 0, 0]], dtype="bool")) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 1) + + def test02b_BoolAtom(self): + """Checking vlarray with MD boolean atoms (with offset)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02b_BoolAtom..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, + "BoolAtom", + tb.BoolAtom(shape=(3,)), + "Ragged array of Booleans", + ) + a = np.array( + [(0, 0, 0), (1, 0, 3), (1, 1, 1), (0, 0, 0)], dtype="bool" + ) + vlarray.append(a[1:]) # Create an offset + a = np.array([(1, 1, 1), (1, 0, 0)], dtype="bool") + vlarray.append(a[1:]) # Create an offset + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], + np.array([[1, 0, 1], [1, 1, 1], [0, 0, 0]], dtype="bool"), + ) + ) + self.assertTrue( + common.allequal(row[1], np.array([[1, 0, 0]], dtype="bool")) + ) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 1) + + def test02c_BoolAtom(self): + """Checking vlarray with MD boolean atoms (with strides)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02c_BoolAtom..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, + "BoolAtom", + tb.BoolAtom(shape=(3,)), + "Ragged array of Booleans", + ) + a = np.array( + [(0, 0, 0), (1, 0, 3), (1, 1, 1), (0, 0, 0)], dtype="bool" + ) + vlarray.append(a[1::2]) # Create an strided array + a = np.array([(1, 1, 1), (1, 0, 0), (0, 0, 0)], dtype="bool") + vlarray.append(a[::2]) # Create an strided array + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], np.array([[1, 0, 1], [0, 0, 0]], dtype="bool") + ) + ) + self.assertTrue( + common.allequal( + row[1], np.array([[1, 1, 1], [0, 0, 0]], dtype="bool") + ) + ) + self.assertEqual(len(row[0]), 2) + self.assertEqual(len(row[1]), 2) + + def test03_IntAtom(self): + """Checking vlarray with MD integer atoms.""" + + ttypes = [ + "int8", + "uint8", + "int16", + "uint16", + "int32", + "uint32", + "int64", + # "UInt64", # Unavailable in some platforms + ] + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_IntAtom..." % self.__class__.__name__) + + # Create an string atom + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + root, atype, atom=tb.Atom.from_sctype(atype, (2, 3)) + ) + vlarray.append([np.ones((2, 3), atype), np.zeros((2, 3), atype)]) + vlarray.append([np.ones((2, 3), atype) * 100]) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", repr(row[1])) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], + np.array([np.ones((2, 3)), np.zeros((2, 3))], atype), + ) + ) + self.assertTrue( + common.allequal( + row[1], np.array([np.ones((2, 3)) * 100], atype) + ) + ) + self.assertEqual(len(row[0]), 2) + self.assertEqual(len(row[1]), 1) + + def test04_FloatAtom(self): + """Checking vlarray with MD floating point atoms.""" + + ttypes = [ + "float32", + "float64", + "complex64", + "complex128", + ] + + for name in ("float16", "float96", "float128"): + atomname = name.capitalize() + "Atom" + if hasattr(tb, atomname): + ttypes.append(name) + for itemsize in (192, 256): + atomname = "Complex%dAtom" % itemsize + if hasattr(tb, atomname): + ttypes.append("complex%d" % (itemsize)) + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_FloatAtom..." % self.__class__.__name__) + + # Create an string atom + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + root, atype, atom=tb.Atom.from_sctype(atype, (5, 2, 6)) + ) + vlarray.append( + [np.ones((5, 2, 6), atype) * 1.3, np.zeros((5, 2, 6), atype)] + ) + vlarray.append([np.ones((5, 2, 6), atype) * 2.0e4]) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing type:", atype) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, 2) + self.assertTrue( + common.allequal( + row[0], + np.array( + [np.ones((5, 2, 6)) * 1.3, np.zeros((5, 2, 6))], atype + ), + ) + ) + self.assertTrue( + common.allequal( + row[1], np.array([np.ones((5, 2, 6)) * 2.0e4], atype) + ) + ) + self.assertEqual(len(row[0]), 2) + self.assertEqual(len(row[1]), 1) + + +class MDTypesNumPyTestCase(MDTypesTestCase): + title = "MDTypes" + + +class AppendShapeTestCase(common.TempFileMixin, common.PyTablesTestCase): + open_mode = "w" + + def setUp(self): + super().setUp() + self.rootgroup = self.h5file.root + + def test00_difinputs(self): + """Checking vlarray.append() with different inputs.""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test00_difinputs..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, "vlarray", tb.Int32Atom(), "Ragged array of ints" + ) + vlarray.flavor = "python" + + # Check different ways to input + # All of the next should lead to the same rows + vlarray.append((1, 2, 3)) # a tuple + vlarray.append([1, 2, 3]) # a unique list + vlarray.append(np.array([1, 2, 3], dtype="int32")) # and array + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + vlarray = self.h5file.root.vlarray + + # Read all the vlarray + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 3) + self.assertEqual(row[0], [1, 2, 3]) + self.assertEqual(row[1], [1, 2, 3]) + self.assertEqual(row[2], [1, 2, 3]) + + def test01_toomanydims(self): + """Checking vlarray.append() with too many dimensions.""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_toomanydims..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, + "vlarray", + tb.StringAtom(itemsize=3), + "Ragged array of strings", + ) + # Adding an array with one dimensionality more than allowed + with self.assertRaises(ValueError): + vlarray.append([["123", "456", "3"]]) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + vlarray = self.h5file.root.vlarray + + # Read all the rows (there should be none) + row = vlarray.read() + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + + self.assertEqual(vlarray.nrows, 0) + + def test02_zerodims(self): + """Checking vlarray.append() with a zero-dimensional array""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_zerodims..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, "vlarray", tb.Int32Atom(), "Ragged array of ints" + ) + vlarray.append(np.zeros(dtype="int32", shape=(6, 0))) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + vlarray = self.h5file.root.vlarray + + # Read the only row in vlarray + row = vlarray.read(0)[0] + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", repr(row)) + + self.assertEqual(vlarray.nrows, 1) + self.assertTrue( + common.allequal(row, np.zeros(dtype="int32", shape=(0,))) + ) + self.assertEqual(len(row), 0) + + def test03a_cast(self): + """Checking vlarray.append() with a casted array (upgrading case)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03a_cast..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, "vlarray", tb.Int32Atom(), "Ragged array of ints" + ) + # This type has to be upgraded + vlarray.append(np.array([1, 2], dtype="int16")) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + vlarray = self.h5file.root.vlarray + + # Read the only row in vlarray + row = vlarray.read(0)[0] + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", repr(row)) + + self.assertEqual(vlarray.nrows, 1) + self.assertTrue(common.allequal(row, np.array([1, 2], dtype="int32"))) + self.assertEqual(len(row), 2) + + def test03b_cast(self): + """Checking vlarray.append() with a casted array (downgrading case)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03b_cast..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, "vlarray", tb.Int32Atom(), "Ragged array of ints" + ) + # This type has to be downcasted + vlarray.append(np.array([1, 2], dtype="float64")) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + vlarray = self.h5file.root.vlarray + + # Read the only row in vlarray + row = vlarray.read(0)[0] + if common.verbose: + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", repr(row)) + + self.assertEqual(vlarray.nrows, 1) + self.assertTrue(common.allequal(row, np.array([1, 2], dtype="int32"))) + self.assertEqual(len(row), 2) + + +class OpenAppendShapeTestCase(AppendShapeTestCase): + close = 0 + + +class CloseAppendShapeTestCase(AppendShapeTestCase): + close = 1 + + +class FlavorTestCase(common.TempFileMixin, common.PyTablesTestCase): + open_mode = "w" + compress = 0 + complib = "zlib" # Default compression library + + def setUp(self): + super().setUp() + self.rootgroup = self.h5file.root + + def test01a_EmptyVLArray(self): + """Checking empty vlarrays with different flavors (closing the file)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01_EmptyVLArray..." % self.__class__.__name__ + ) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, "vlarray", tb.Atom.from_kind("int", itemsize=4) + ) + vlarray.flavor = self.flavor + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, "r") + + # Read all the rows (it should be empty): + vlarray = self.h5file.root.vlarray + row = vlarray.read() + if common.verbose: + print("Testing flavor:", self.flavor) + print("Object read:", row, repr(row)) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + + # Check that the object read is effectively empty + self.assertEqual(vlarray.nrows, 0) + self.assertEqual(row, []) + + def test01b_EmptyVLArray(self): + """Checking empty vlarrays with different flavors (no closing file)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test01_EmptyVLArray..." % self.__class__.__name__ + ) + + # Create an string atom + vlarray = self.h5file.create_vlarray( + root, "vlarray", tb.Atom.from_kind("int", itemsize=4) + ) + vlarray.flavor = self.flavor + + # Read all the rows (it should be empty): + row = vlarray.read() + if common.verbose: + print("Testing flavor:", self.flavor) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + + # Check that the object read is effectively empty + self.assertEqual(vlarray.nrows, 0) + self.assertEqual(row, []) + + def test02_BooleanAtom(self): + """Checking vlarray with different flavors (boolean versions)""" + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_BoolAtom..." % self.__class__.__name__) + + # Create an string atom + vlarray = self.h5file.create_vlarray(root, "Bool", tb.BoolAtom()) + vlarray.flavor = self.flavor + vlarray.append([1, 2, 3]) + vlarray.append(()) # Empty row + vlarray.append([100, 0]) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing flavor:", self.flavor) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 3) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 0) + self.assertEqual(len(row[2]), 2) + if self.flavor == "python": + arr1 = [1, 1, 1] + arr2 = [] + arr3 = [1, 0] + elif self.flavor == "numpy": + arr1 = np.array([1, 1, 1], dtype="bool") + arr2 = np.array([], dtype="bool") + arr3 = np.array([1, 0], dtype="bool") + + if self.flavor == "numpy": + self.assertTrue(common.allequal(row[0], arr1, self.flavor)) + self.assertTrue(common.allequal(row[1], arr2, self.flavor)) + self.assertTrue(common.allequal(row[1], arr2, self.flavor)) + else: + # 'python' flavor + self.assertEqual(row[0], arr1) + self.assertEqual(row[1], arr2) + self.assertEqual(row[2], arr3) + + def test03_IntAtom(self): + """Checking vlarray with different flavors (integer versions)""" + + ttypes = [ + "int8", + "uint8", + "int16", + "uint16", + "int32", + "uint32", + "int64", + # Not checked because some platforms does not support it + # "UInt64", + ] + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_IntAtom..." % self.__class__.__name__) + + # Create an string atom + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + root, atype, tb.Atom.from_sctype(atype) + ) + vlarray.flavor = self.flavor + vlarray.append([1, 2, 3]) + vlarray.append(()) + vlarray.append([100, 0]) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing flavor:", self.flavor) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 3) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 0) + self.assertEqual(len(row[2]), 2) + if self.flavor == "python": + arr1 = [1, 2, 3] + arr2 = [] + arr3 = [100, 0] + elif self.flavor == "numpy": + arr1 = np.array([1, 2, 3], dtype=atype) + arr2 = np.array([], dtype=atype) + arr3 = np.array([100, 0], dtype=atype) + + if self.flavor == "numpy": + self.assertTrue(common.allequal(row[0], arr1, self.flavor)) + self.assertTrue(common.allequal(row[1], arr2, self.flavor)) + self.assertTrue(common.allequal(row[2], arr3, self.flavor)) + else: + # "python" flavor + self.assertEqual(row[0], arr1) + self.assertEqual(row[1], arr2) + self.assertEqual(row[2], arr3) + + def test03b_IntAtom(self): + """Checking vlarray flavors (integer versions and closed file)""" + + ttypes = [ + "int8", + "uint8", + "int16", + "uint16", + "int32", + "uint32", + "int64", + # Not checked because some platforms does not support it + # "UInt64", + ] + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_IntAtom..." % self.__class__.__name__) + + # Create an string atom + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + root, atype, tb.Atom.from_sctype(atype) + ) + vlarray.flavor = self.flavor + vlarray.append([1, 2, 3]) + vlarray.append(()) + vlarray.append([100, 0]) + self._reopen(mode="a") # open in "a"ppend mode + root = self.h5file.root # Very important! + vlarray = self.h5file.get_node(root, str(atype)) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing flavor:", self.flavor) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 3) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 0) + self.assertEqual(len(row[2]), 2) + if self.flavor == "python": + arr1 = [1, 2, 3] + arr2 = [] + arr3 = [100, 0] + elif self.flavor == "numpy": + arr1 = np.array([1, 2, 3], dtype=atype) + arr2 = np.array([], dtype=atype) + arr3 = np.array([100, 0], dtype=atype) + + if self.flavor == "numpy": + self.assertTrue(common.allequal(row[0], arr1, self.flavor)) + self.assertTrue(common.allequal(row[1], arr2, self.flavor)) + self.assertTrue(common.allequal(row[2], arr3, self.flavor)) + else: + # Tuple or List flavors + self.assertEqual(row[0], arr1) + self.assertEqual(row[1], arr2) + self.assertEqual(row[2], arr3) + + def test04_FloatAtom(self): + """Checking vlarray with different flavors (floating point versions)""" + + ttypes = [ + "float32", + "float64", + "complex64", + "complex128", + ] + + for name in ("float16", "float96", "float128"): + atomname = name.capitalize() + "Atom" + if hasattr(tb, atomname): + ttypes.append(name) + + for itemsize in (192, 256): + atomname = "Complex%dAtom" % itemsize + if hasattr(tb, atomname): + ttypes.append("complex%d" % (itemsize)) + + root = self.rootgroup + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_FloatAtom..." % self.__class__.__name__) + + # Create an string atom + for atype in ttypes: + vlarray = self.h5file.create_vlarray( + root, atype, tb.Atom.from_sctype(atype) + ) + vlarray.flavor = self.flavor + vlarray.append([1.3, 2.2, 3.3]) + vlarray.append(()) + vlarray.append([5.96, 0.597]) + + # Read all the rows: + row = vlarray.read() + if common.verbose: + print("Testing flavor:", self.flavor) + print("Object read:", row) + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + + self.assertEqual(vlarray.nrows, 3) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 0) + self.assertEqual(len(row[2]), 2) + if self.flavor == "python": + arr1 = list(np.array([1.3, 2.2, 3.3], atype)) + arr2 = list(np.array([], atype)) + arr3 = list(np.array([5.96, 0.597], atype)) + elif self.flavor == "numpy": + arr1 = np.array([1.3, 2.2, 3.3], dtype=atype) + arr2 = np.array([], dtype=atype) + arr3 = np.array([5.96, 0.597], dtype=atype) + + if self.flavor == "numpy": + self.assertTrue(common.allequal(row[0], arr1, self.flavor)) + self.assertTrue(common.allequal(row[1], arr2, self.flavor)) + self.assertTrue(common.allequal(row[2], arr3, self.flavor)) + else: + # Tuple or List flavors + self.assertEqual(row[0], arr1) + self.assertEqual(row[1], arr2) + self.assertEqual(row[2], arr3) + + +class NumPyFlavorTestCase(FlavorTestCase): + flavor = "numpy" + + +class PythonFlavorTestCase(FlavorTestCase): + flavor = "python" + + +class ReadRangeTestCase(common.TempFileMixin, common.PyTablesTestCase): + nrows = 100 + mode = "w" + compress = 0 + complib = "zlib" # Default compression library + + def setUp(self): + super().setUp() + self.rootgroup = self.h5file.root + self.populateFile() + self._reopen() + + def populateFile(self): + group = self.rootgroup + filters = tb.Filters(complevel=self.compress, complib=self.complib) + vlarray = self.h5file.create_vlarray( + group, + "vlarray", + tb.Int32Atom(), + "ragged array if ints", + filters=filters, + expectedrows=1000, + ) + + # Fill it with 100 rows with variable length + for i in range(self.nrows): + vlarray.append(list(range(i))) + + def test01_start(self): + """Checking reads with only a start value""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_start..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Read some rows: + row = [] + row.append(vlarray.read(0)[0]) + row.append(vlarray.read(10)[0]) + row.append(vlarray.read(99)[0]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 0) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 99) + self.assertTrue(common.allequal(row[0], np.arange(0, dtype="int32"))) + self.assertTrue(common.allequal(row[1], np.arange(10, dtype="int32"))) + self.assertTrue(common.allequal(row[2], np.arange(99, dtype="int32"))) + + def test01b_start(self): + """Checking reads with only a start value in a slice""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b_start..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Read some rows: + row = [] + row.append(vlarray[0]) + row.append(vlarray[10]) + row.append(vlarray[99]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 0) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 99) + self.assertTrue(common.allequal(row[0], np.arange(0, dtype="int32"))) + self.assertTrue(common.allequal(row[1], np.arange(10, dtype="int32"))) + self.assertTrue(common.allequal(row[2], np.arange(99, dtype="int32"))) + + def test01np_start(self): + """Checking reads with only a start value in a slice (numpy indexes)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01np_start..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Read some rows: + row = [] + row.append(vlarray[np.int8(0)]) + row.append(vlarray[np.int32(10)]) + row.append(vlarray[np.int64(99)]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 0) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 99) + self.assertTrue(common.allequal(row[0], np.arange(0, dtype="int32"))) + self.assertTrue(common.allequal(row[1], np.arange(10, dtype="int32"))) + self.assertTrue(common.allequal(row[2], np.arange(99, dtype="int32"))) + + def test02_stop(self): + """Checking reads with only a stop value""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_stop..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray.read(stop=1)) + row.append(vlarray.read(stop=10)) + row.append(vlarray.read(stop=99)) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 1) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 99) + self.assertTrue( + common.allequal(row[0][0], np.arange(0, dtype="int32")) + ) + for x in range(10): + self.assertTrue( + common.allequal(row[1][x], np.arange(x, dtype="int32")) + ) + for x in range(99): + self.assertTrue( + common.allequal(row[2][x], np.arange(x, dtype="int32")) + ) + + def test02b_stop(self): + """Checking reads with only a stop value in a slice""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02b_stop..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray[:1]) + row.append(vlarray[:10]) + row.append(vlarray[:99]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 1) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 99) + for x in range(1): + self.assertTrue( + common.allequal(row[0][x], np.arange(0, dtype="int32")) + ) + for x in range(10): + self.assertTrue( + common.allequal(row[1][x], np.arange(x, dtype="int32")) + ) + for x in range(99): + self.assertTrue( + common.allequal(row[2][x], np.arange(x, dtype="int32")) + ) + + def test03_startstop(self): + """Checking reads with a start and stop values""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_startstop..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray.read(0, 10)) + row.append(vlarray.read(5, 15)) + row.append(vlarray.read(0, 100)) # read all the array + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 10) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 100) + for x in range(0, 10): + self.assertTrue( + common.allequal(row[0][x], np.arange(x, dtype="int32")) + ) + for x in range(5, 15): + self.assertTrue( + common.allequal(row[1][x - 5], np.arange(x, dtype="int32")) + ) + for x in range(0, 100): + self.assertTrue( + common.allequal(row[2][x], np.arange(x, dtype="int32")) + ) + + def test03b_startstop(self): + """Checking reads with a start and stop values in slices""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03b_startstop..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray[0:10]) + row.append(vlarray[5:15]) + row.append(vlarray[:]) # read all the array + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 10) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 100) + for x in range(0, 10): + self.assertTrue( + common.allequal(row[0][x], np.arange(x, dtype="int32")) + ) + for x in range(5, 15): + self.assertTrue( + common.allequal(row[1][x - 5], np.arange(x, dtype="int32")) + ) + for x in range(0, 100): + self.assertTrue( + common.allequal(row[2][x], np.arange(x, dtype="int32")) + ) + + def test04_startstopstep(self): + """Checking reads with a start, stop & step values""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test04_startstopstep..." % self.__class__.__name__ + ) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray.read(0, 10, 2)) + row.append(vlarray.read(5, 15, 3)) + row.append(vlarray.read(0, 100, 20)) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 5) + self.assertEqual(len(row[1]), 4) + self.assertEqual(len(row[2]), 5) + for x in range(0, 10, 2): + self.assertTrue( + common.allequal(row[0][x // 2], np.arange(x, dtype="int32")) + ) + for x in range(5, 15, 3): + self.assertTrue( + common.allequal( + row[1][(x - 5) // 3], np.arange(x, dtype="int32") + ) + ) + for x in range(0, 100, 20): + self.assertTrue( + common.allequal(row[2][x // 20], np.arange(x, dtype="int32")) + ) + + def test04np_startstopstep(self): + """Checking reads with a start, stop & step values (numpy indices)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test04np_startstopstep..." + % self.__class__.__name__ + ) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray.read(np.int8(0), np.int8(10), np.int8(2))) + row.append(vlarray.read(np.int8(5), np.int8(15), np.int8(3))) + row.append(vlarray.read(np.int8(0), np.int8(100), np.int8(20))) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 5) + self.assertEqual(len(row[1]), 4) + self.assertEqual(len(row[2]), 5) + for x in range(0, 10, 2): + self.assertTrue( + common.allequal(row[0][x // 2], np.arange(x, dtype="int32")) + ) + for x in range(5, 15, 3): + self.assertTrue( + common.allequal( + row[1][(x - 5) // 3], np.arange(x, dtype="int32") + ) + ) + for x in range(0, 100, 20): + self.assertTrue( + common.allequal(row[2][x // 20], np.arange(x, dtype="int32")) + ) + + def test04b_slices(self): + """Checking reads with start, stop & step values in slices""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04b_slices..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray[0:10:2]) + row.append(vlarray[5:15:3]) + row.append(vlarray[0:100:20]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 5) + self.assertEqual(len(row[1]), 4) + self.assertEqual(len(row[2]), 5) + for x in range(0, 10, 2): + self.assertTrue( + common.allequal(row[0][x // 2], np.arange(x, dtype="int32")) + ) + for x in range(5, 15, 3): + self.assertTrue( + common.allequal( + row[1][(x - 5) // 3], np.arange(x, dtype="int32") + ) + ) + for x in range(0, 100, 20): + self.assertTrue( + common.allequal(row[2][x // 20], np.arange(x, dtype="int32")) + ) + + def test04bnp_slices(self): + """Checking reads with start, stop & step values in slices. + + (numpy indices) + + """ + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04bnp_slices..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray[np.int16(0) : np.int16(10) : np.int32(2)]) + row.append(vlarray[np.int16(5) : np.int16(15) : np.int64(3)]) + row.append(vlarray[np.uint16(0) : np.int32(100) : np.int8(20)]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 5) + self.assertEqual(len(row[1]), 4) + self.assertEqual(len(row[2]), 5) + for x in range(0, 10, 2): + self.assertTrue( + common.allequal(row[0][x // 2], np.arange(x, dtype="int32")) + ) + for x in range(5, 15, 3): + self.assertTrue( + common.allequal( + row[1][(x - 5) // 3], np.arange(x, dtype="int32") + ) + ) + for x in range(0, 100, 20): + self.assertTrue( + common.allequal(row[2][x // 20], np.arange(x, dtype="int32")) + ) + + def test05_out_of_range(self): + """Checking out of range reads""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05_out_of_range..." % self.__class__.__name__ + ) + + vlarray = self.h5file.root.vlarray + + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + + with self.assertRaises(IndexError): + row = vlarray.read(1000)[0] + print("row-->", row) + + +class GetItemRangeTestCase(common.TempFileMixin, common.PyTablesTestCase): + nrows = 100 + open_mode = "w" + compress = 0 + complib = "zlib" # Default compression library + + def setUp(self): + super().setUp() + + self.rootgroup = self.h5file.root + self.populateFile() + self._reopen() + + def populateFile(self): + group = self.rootgroup + filters = tb.Filters(complevel=self.compress, complib=self.complib) + vlarray = self.h5file.create_vlarray( + group, + "vlarray", + tb.Int32Atom(), + "ragged array if ints", + filters=filters, + expectedrows=1000, + ) + + # Fill it with 100 rows with variable length + for i in range(self.nrows): + vlarray.append(list(range(i))) + + def test01_start(self): + """Checking reads with only a start value""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_start..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Read some rows: + row = [] + row.append(vlarray[0]) + + # rank-0 array should work as a regular index (see #303) + row.append(vlarray[np.array(10)]) + row.append(vlarray[99]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 0) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 99) + self.assertTrue(common.allequal(row[0], np.arange(0, dtype="int32"))) + self.assertTrue(common.allequal(row[1], np.arange(10, dtype="int32"))) + self.assertTrue(common.allequal(row[2], np.arange(99, dtype="int32"))) + + def test01b_start(self): + """Checking reads with only a start value in a slice""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b_start..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Read some rows: + row = [] + row.append(vlarray[0]) + row.append(vlarray[10]) + row.append(vlarray[99]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 0) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 99) + self.assertTrue(common.allequal(row[0], np.arange(0, dtype="int32"))) + self.assertTrue(common.allequal(row[1], np.arange(10, dtype="int32"))) + self.assertTrue(common.allequal(row[2], np.arange(99, dtype="int32"))) + + def test02_stop(self): + """Checking reads with only a stop value""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_stop..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray[:1]) + row.append(vlarray[:10]) + row.append(vlarray[:99]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("First row in vlarray ==>", row[0]) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 1) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 99) + self.assertTrue( + common.allequal(row[0][0], np.arange(0, dtype="int32")) + ) + for x in range(10): + self.assertTrue( + common.allequal(row[1][x], np.arange(x, dtype="int32")) + ) + for x in range(99): + self.assertTrue( + common.allequal(row[2][x], np.arange(x, dtype="int32")) + ) + + def test02b_stop(self): + """Checking reads with only a stop value in a slice""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02b_stop..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray[:1]) + row.append(vlarray[:10]) + row.append(vlarray[:99]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 1) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 99) + for x in range(1): + self.assertTrue( + common.allequal(row[0][x], np.arange(0, dtype="int32")) + ) + for x in range(10): + self.assertTrue( + common.allequal(row[1][x], np.arange(x, dtype="int32")) + ) + for x in range(99): + self.assertTrue( + common.allequal(row[2][x], np.arange(x, dtype="int32")) + ) + + def test03_startstop(self): + """Checking reads with a start and stop values""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_startstop..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray[0:10]) + row.append(vlarray[5:15]) + row.append(vlarray[0:100]) # read all the array + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 10) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 100) + for x in range(0, 10): + self.assertTrue( + common.allequal(row[0][x], np.arange(x, dtype="int32")) + ) + for x in range(5, 15): + self.assertTrue( + common.allequal(row[1][x - 5], np.arange(x, dtype="int32")) + ) + for x in range(0, 100): + self.assertTrue( + common.allequal(row[2][x], np.arange(x, dtype="int32")) + ) + + def test03b_startstop(self): + """Checking reads with a start and stop values in slices""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03b_startstop..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray[0:10]) + row.append(vlarray[5:15]) + row.append(vlarray[:]) # read all the array + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 10) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 100) + for x in range(0, 10): + self.assertTrue( + common.allequal(row[0][x], np.arange(x, dtype="int32")) + ) + for x in range(5, 15): + self.assertTrue( + common.allequal(row[1][x - 5], np.arange(x, dtype="int32")) + ) + for x in range(0, 100): + self.assertTrue( + common.allequal(row[2][x], np.arange(x, dtype="int32")) + ) + + def test04_slices(self): + """Checking reads with a start, stop & step values""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_slices..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray[0:10:2]) + row.append(vlarray[5:15:3]) + row.append(vlarray[0:100:20]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 5) + self.assertEqual(len(row[1]), 4) + self.assertEqual(len(row[2]), 5) + for x in range(0, 10, 2): + self.assertTrue( + common.allequal(row[0][x // 2], np.arange(x, dtype="int32")) + ) + for x in range(5, 15, 3): + self.assertTrue( + common.allequal( + row[1][(x - 5) // 3], np.arange(x, dtype="int32") + ) + ) + for x in range(0, 100, 20): + self.assertTrue( + common.allequal(row[2][x // 20], np.arange(x, dtype="int32")) + ) + + def test04bnp_slices(self): + """Checking reads with start, stop & step values (numpy indices)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04np_slices..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Choose a small value for buffer size + vlarray._nrowsinbuf = 3 + + # Read some rows: + row = [] + row.append(vlarray[np.int8(0) : np.int8(10) : np.int8(2)]) + row.append(vlarray[np.int8(5) : np.int8(15) : np.int8(3)]) + row.append(vlarray[np.int8(0) : np.int8(100) : np.int8(20)]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 5) + self.assertEqual(len(row[1]), 4) + self.assertEqual(len(row[2]), 5) + for x in range(0, 10, 2): + self.assertTrue( + common.allequal(row[0][x // 2], np.arange(x, dtype="int32")) + ) + for x in range(5, 15, 3): + self.assertTrue( + common.allequal( + row[1][(x - 5) // 3], np.arange(x, dtype="int32") + ) + ) + for x in range(0, 100, 20): + self.assertTrue( + common.allequal(row[2][x // 20], np.arange(x, dtype="int32")) + ) + + def test05_out_of_range(self): + """Checking out of range reads""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05_out_of_range..." % self.__class__.__name__ + ) + + vlarray = self.h5file.root.vlarray + + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + + with self.assertRaises(IndexError): + row = vlarray[1000] + print("row-->", row) + + def test05np_out_of_range(self): + """Checking out of range reads (numpy indexes)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test05np_out_of_range..." % self.__class__.__name__ + ) + + vlarray = self.h5file.root.vlarray + + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + + with self.assertRaises(IndexError): + row = vlarray[np.int32(1000)] + print("row-->", row) + + +class SetRangeTestCase(common.TempFileMixin, common.PyTablesTestCase): + nrows = 100 + open_mode = "w" + compress = 0 + complib = "zlib" # Default compression library + + def setUp(self): + super().setUp() + self.rootgroup = self.h5file.root + self.populateFile() + self._reopen(mode="a") + + def populateFile(self): + group = self.rootgroup + filters = tb.Filters(complevel=self.compress, complib=self.complib) + vlarray = self.h5file.create_vlarray( + group, + "vlarray", + tb.Int32Atom(), + "ragged array if ints", + filters=filters, + expectedrows=1000, + ) + + # Fill it with 100 rows with variable length + for i in range(self.nrows): + vlarray.append(list(range(i))) + + def test01_start(self): + """Checking updates that modifies a complete row""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_start..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Modify some rows: + vlarray[0] = vlarray[0] * 2 + 3 + vlarray[10] = vlarray[10] * 2 + 3 + vlarray[99] = vlarray[99] * 2 + 3 + + # Read some rows: + row = [] + row.append(vlarray.read(0)[0]) + row.append(vlarray.read(10)[0]) + row.append(vlarray.read(99)[0]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 0) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 99) + self.assertTrue( + common.allequal(row[0], np.arange(0, dtype="int32") * 2 + 3) + ) + self.assertTrue( + common.allequal(row[1], np.arange(10, dtype="int32") * 2 + 3) + ) + self.assertTrue( + common.allequal(row[2], np.arange(99, dtype="int32") * 2 + 3) + ) + + def test01np_start(self): + """Checking updates that modifies a complete row""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01np_start..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Modify some rows: + vlarray[np.int8(0)] = vlarray[np.int16(0)] * 2 + 3 + vlarray[np.int8(10)] = vlarray[np.int8(10)] * 2 + 3 + vlarray[np.int32(99)] = vlarray[np.int64(99)] * 2 + 3 + + # Read some rows: + row = [] + row.append(vlarray.read(np.int8(0))[0]) + row.append(vlarray.read(np.int8(10))[0]) + row.append(vlarray.read(np.int8(99))[0]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 0) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 99) + self.assertTrue( + common.allequal(row[0], np.arange(0, dtype="int32") * 2 + 3) + ) + self.assertTrue( + common.allequal(row[1], np.arange(10, dtype="int32") * 2 + 3) + ) + self.assertTrue( + common.allequal(row[2], np.arange(99, dtype="int32") * 2 + 3) + ) + + def test02_partial(self): + """Checking updates with only a part of a row""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_partial..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + # Modify some rows: + vlarray[0] = vlarray[0] * 2 + 3 + vlarray[10] = vlarray[10] * 2 + 3 + vlarray[96] = vlarray[99][3:] * 2 + 3 + + # Read some rows: + row = [] + row.append(vlarray.read(0)[0]) + row.append(vlarray.read(10)[0]) + row.append(vlarray.read(96)[0]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 0) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 96) + self.assertTrue( + common.allequal(row[0], np.arange(0, dtype="int32") * 2 + 3) + ) + self.assertTrue( + common.allequal(row[1], np.arange(10, dtype="int32") * 2 + 3) + ) + a = np.arange(3, 99, dtype="int32") + a = a * 2 + 3 + self.assertTrue(common.allequal(row[2], a)) + + def test03a_several_rows(self): + """Checking updating several rows at once (slice style)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test03a_several_rows..." % self.__class__.__name__ + ) + + vlarray = self.h5file.root.vlarray + + # Modify some rows: + vlarray[3:6] = ( + vlarray[3] * 2 + 3, + vlarray[4] * 2 + 3, + vlarray[5] * 2 + 3, + ) + + # Read some rows: + row = [] + row.append(vlarray.read(3)[0]) + row.append(vlarray.read(4)[0]) + row.append(vlarray.read(5)[0]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 3) + self.assertEqual(len(row[1]), 4) + self.assertEqual(len(row[2]), 5) + self.assertTrue( + common.allequal(row[0], np.arange(3, dtype="int32") * 2 + 3) + ) + self.assertTrue( + common.allequal(row[1], np.arange(4, dtype="int32") * 2 + 3) + ) + self.assertTrue( + common.allequal(row[2], np.arange(5, dtype="int32") * 2 + 3) + ) + + def test03b_several_rows(self): + """Checking updating several rows at once (list style)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test03b_several_rows..." % self.__class__.__name__ + ) + + vlarray = self.h5file.root.vlarray + + # Modify some rows: + vlarray[[0, 10, 96]] = ( + vlarray[0] * 2 + 3, + vlarray[10] * 2 + 3, + vlarray[96] * 2 + 3, + ) + + # Read some rows: + row = [] + row.append(vlarray.read(0)[0]) + row.append(vlarray.read(10)[0]) + row.append(vlarray.read(96)[0]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 0) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 96) + self.assertTrue( + common.allequal(row[0], np.arange(0, dtype="int32") * 2 + 3) + ) + self.assertTrue( + common.allequal(row[1], np.arange(10, dtype="int32") * 2 + 3) + ) + self.assertTrue( + common.allequal(row[2], np.arange(96, dtype="int32") * 2 + 3) + ) + + def test03c_several_rows(self): + """Checking updating several rows at once (NumPy's where style)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test03c_several_rows..." % self.__class__.__name__ + ) + + vlarray = self.h5file.root.vlarray + + # Modify some rows: + vlarray[(np.array([0, 10, 96]),)] = ( + vlarray[0] * 2 + 3, + vlarray[10] * 2 + 3, + vlarray[96] * 2 + 3, + ) + + # Read some rows: + row = [] + row.append(vlarray.read(0)[0]) + row.append(vlarray.read(10)[0]) + row.append(vlarray.read(96)[0]) + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + print("Second row in vlarray ==>", row[1]) + + self.assertEqual(vlarray.nrows, self.nrows) + self.assertEqual(len(row[0]), 0) + self.assertEqual(len(row[1]), 10) + self.assertEqual(len(row[2]), 96) + self.assertTrue( + common.allequal(row[0], np.arange(0, dtype="int32") * 2 + 3) + ) + self.assertTrue( + common.allequal(row[1], np.arange(10, dtype="int32") * 2 + 3) + ) + self.assertTrue( + common.allequal(row[2], np.arange(96, dtype="int32") * 2 + 3) + ) + + def test04_out_of_range(self): + """Checking out of range updates (first index)""" + + if common.verbose: + print("\n", "-=" * 30) + print( + "Running %s.test04_out_of_range..." % self.__class__.__name__ + ) + + vlarray = self.h5file.root.vlarray + + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + + with self.assertRaises(IndexError): + vlarray[1000] = [1] + + def test05_value_error(self): + """Checking out value errors""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05_value_error..." % self.__class__.__name__) + + vlarray = self.h5file.root.vlarray + + if common.verbose: + print("Nrows in", vlarray._v_pathname, ":", vlarray.nrows) + + with self.assertRaises(ValueError): + vlarray[10] = [1] * 100 + + +class CopyTestCase(common.TempFileMixin, common.PyTablesTestCase): + close = True + + def test01a_copy(self): + """Checking VLArray.copy() method.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01a_copy..." % self.__class__.__name__) + + # Create an Vlarray + arr = tb.Int16Atom(shape=2) + array1 = self.h5file.create_vlarray( + self.h5file.root, "array1", arr, "title array1" + ) + array1.flavor = "python" + array1.append([[2, 3]]) + array1.append(()) # an empty row + array1.append([[3, 457], [2, 4]]) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("array1-->", repr(array1)) + print("array2-->", repr(array2)) + print("array1[:]-->", repr(array1.read())) + print("array2[:]-->", repr(array2.read())) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + self.assertEqual(array1.read(), array2.read()) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.flavor, array2.flavor) + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(repr(array1.atom), repr(array2.atom)) + + self.assertEqual(array1.title, array2.title) + + def test01b_copy(self): + """Checking VLArray.copy() method (Pseudo-atom case)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01b_copy..." % self.__class__.__name__) + + # Create an Vlarray + arr = tb.VLStringAtom() + array1 = self.h5file.create_vlarray( + self.h5file.root, "array1", arr, "title array1" + ) + array1.flavor = "python" + array1.append(b"a string") + array1.append(b"") # an empty row + array1.append(b"another string") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("array1-->", repr(array1)) + print("array2-->", repr(array2)) + print("array1[:]-->", repr(array1.read())) + print("array2[:]-->", repr(array2.read())) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + self.assertEqual(array1.read(), array2.read()) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.flavor, array2.flavor) + self.assertEqual(array1.atom.type, array2.atom.type) + self.assertEqual(repr(array1.atom), repr(array2.atom)) + + self.assertEqual(array1.title, array2.title) + + def test02_copy(self): + """Checking VLArray.copy() method (where specified)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test02_copy..." % self.__class__.__name__) + + # Create an VLArray + arr = tb.Int16Atom(shape=2) + array1 = self.h5file.create_vlarray( + self.h5file.root, "array1", arr, "title array1" + ) + array1.flavor = "python" + array1.append([[2, 3]]) + array1.append(()) # an empty row + array1.append([[3, 457], [2, 4]]) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy to another location + group1 = self.h5file.create_group("/", "group1") + array2 = array1.copy(group1, "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.group1.array2 + + if common.verbose: + print("array1-->", repr(array1)) + print("array2-->", repr(array2)) + print("array1-->", array1.read()) + print("array2-->", array2.read()) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Check that all the elements are equal + self.assertEqual(array1.read(), array2.read()) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.flavor, array2.flavor) + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(repr(array1.atom), repr(array1.atom)) + self.assertEqual(array1.title, array2.title) + + def test03_copy(self): + """Checking VLArray.copy() method ('python' flavor)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test03_copy..." % self.__class__.__name__) + + # Create an VLArray + atom = tb.Int16Atom(shape=2) + array1 = self.h5file.create_vlarray( + self.h5file.root, "array1", atom, title="title array1" + ) + array1.flavor = "python" + array1.append(((2, 3),)) + array1.append(()) # an empty row + array1.append(((3, 457), (2, 4))) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy to another location + array2 = array1.copy("/", "array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Assert other properties in array + self.assertEqual(array1.nrows, array2.nrows) + self.assertEqual(array1.shape, array2.shape) + self.assertEqual(array1.flavor, array2.flavor) # Very important here + self.assertEqual(array1.atom.dtype, array2.atom.dtype) + self.assertEqual(repr(array1.atom), repr(array1.atom)) + self.assertEqual(array1.title, array2.title) + + def test04_copy(self): + """Checking VLArray.copy() method (checking title copying)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test04_copy..." % self.__class__.__name__) + + # Create an VLArray + atom = tb.Int16Atom(shape=2) + array1 = self.h5file.create_vlarray( + self.h5file.root, "array1", atom=atom, title="title array1" + ) + array1.append(((2, 3),)) + array1.append(()) # an empty row + array1.append(((3, 457), (2, 4))) + + # Append some user attrs + array1.attrs.attr1 = "attr1" + array1.attrs.attr2 = 2 + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another Array + array2 = array1.copy("/", "array2", title="title array2") + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + # Assert user attributes + if common.verbose: + print("title of destination array-->", array2.title) + self.assertEqual(array2.title, "title array2") + + def test05_copy(self): + """Checking VLArray.copy() method (user attributes copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05_copy..." % self.__class__.__name__) + + # Create an Array + atom = tb.Int16Atom(shape=2) + array1 = self.h5file.create_vlarray( + self.h5file.root, "array1", atom=atom, title="title array1" + ) + array1.append(((2, 3),)) + array1.append(()) # an empty row + array1.append(((3, 457), (2, 4))) + + # Append some user attrs + array1.attrs.attr1 = "attr1" + array1.attrs.attr2 = 2 + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another Array + array2 = array1.copy("/", "array2", copyuserattrs=1) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Assert user attributes + self.assertEqual(array2.attrs.attr1, "attr1") + self.assertEqual(array2.attrs.attr2, 2) + + def notest05b_copy(self): + """Checking VLArray.copy() method (user attributes not copied)""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test05b_copy..." % self.__class__.__name__) + + # Create an VLArray + atom = tb.Int16Atom(shape=2) + array1 = self.h5file.create_vlarray( + self.h5file.root, "array1", atom=atom, title="title array1" + ) + array1.append(((2, 3),)) + array1.append(()) # an empty row + array1.append(((3, 457), (2, 4))) + + # Append some user attrs + array1.attrs.attr1 = "attr1" + array1.attrs.attr2 = 2 + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy it to another Array + array2 = array1.copy("/", "array2", copyuserattrs=0) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + array2 = self.h5file.root.array2 + + if common.verbose: + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + + # Assert user attributes + self.assertEqual(array2.attrs.attr1, None) + self.assertEqual(array2.attrs.attr2, None) + + +class CloseCopyTestCase(CopyTestCase): + close = 1 + + +class OpenCopyTestCase(CopyTestCase): + close = 0 + + +class CopyIndexTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def test01_index(self): + """Checking VLArray.copy() method with indexes.""" + + if common.verbose: + print("\n", "-=" * 30) + print("Running %s.test01_index..." % self.__class__.__name__) + + # Create an VLArray + atom = tb.Int32Atom(shape=(2,)) + array1 = self.h5file.create_vlarray( + self.h5file.root, "array1", atom, "t array1" + ) + array1.flavor = "python" + + # The next creates 20 rows of variable length + r = [] + for row in range(20): + r.append([[row, row + 1]]) + array1.append([row, row + 1]) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen(mode="a") + array1 = self.h5file.root.array1 + + # Copy to another array + array2 = array1.copy( + "/", "array2", start=self.start, stop=self.stop, step=self.step + ) + + r2 = r[self.start : self.stop : self.step] + if common.verbose: + print("r2-->", r2) + print("array2-->", array2[:]) + print("attrs array1-->", repr(array1.attrs)) + print("attrs array2-->", repr(array2.attrs)) + print("nrows in array2-->", array2.nrows) + print("and it should be-->", len(r2)) + + # Check that all the elements are equal + self.assertEqual(r2, array2[:]) + + # Assert the number of rows in array + self.assertEqual(len(r2), array2.nrows) + + +class CopyIndex1TestCase(CopyIndexTestCase): + close = 0 + start = 0 + stop = 7 + step = 1 + + +class CopyIndex2TestCase(CopyIndexTestCase): + close = 1 + start = 0 + stop = -1 + step = 1 + + +class CopyIndex3TestCase(CopyIndexTestCase): + close = 0 + start = 1 + stop = 7 + step = 1 + + +class CopyIndex4TestCase(CopyIndexTestCase): + close = 1 + start = 0 + stop = 6 + step = 1 + + +class CopyIndex5TestCase(CopyIndexTestCase): + close = 0 + start = 3 + stop = 7 + step = 1 + + +class CopyIndex6TestCase(CopyIndexTestCase): + close = 1 + start = 3 + stop = 6 + step = 2 + + +class CopyIndex7TestCase(CopyIndexTestCase): + close = 0 + start = 0 + stop = 7 + step = 10 + + +class CopyIndex8TestCase(CopyIndexTestCase): + close = 1 + start = 6 + stop = -1 # Negative values means starting from the end + step = 1 + + +class CopyIndex9TestCase(CopyIndexTestCase): + close = 0 + start = 3 + stop = 4 + step = 1 + + +class CopyIndex10TestCase(CopyIndexTestCase): + close = 1 + start = 3 + stop = 4 + step = 2 + + +class CopyIndex11TestCase(CopyIndexTestCase): + close = 0 + start = -3 + stop = -1 + step = 2 + + +class CopyIndex12TestCase(CopyIndexTestCase): + close = 1 + start = -1 # Should point to the last element + stop = None # None should mean the last element (including it) + step = 1 + + +class ChunkshapeTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + atom = tb.Int32Atom(shape=(2,)) + self.h5file.create_vlarray( + "/", "vlarray", atom=atom, title="t array1", chunkshape=13 + ) + + def test00(self): + """Test setting the chunkshape in a table (no reopen).""" + + vla = self.h5file.root.vlarray + if common.verbose: + print("chunkshape-->", vla.chunkshape) + self.assertEqual(vla.chunkshape, (13,)) + + def test01(self): + """Test setting the chunkshape in a table (reopen).""" + + self.h5file.close() + self.h5file = tb.open_file(self.h5fname, "r") + vla = self.h5file.root.vlarray + if common.verbose: + print("chunkshape-->", vla.chunkshape) + self.assertEqual(vla.chunkshape, (13,)) + + +class VLUEndianTestCase(common.PyTablesTestCase): + def setUp(self): + super().setUp() + self.h5fname = common.test_filename("vlunicode_endian.h5") + self.h5file = tb.open_file(self.h5fname) + + def tearDown(self): + self.h5file.close() + super().tearDown() + + def test(self): + """Accessing ``vlunicode`` data of a different endianness.""" + + bedata = self.h5file.root.vlunicode_big[0] + ledata = self.h5file.root.vlunicode_little[0] + self.assertEqual(bedata, "para\u0140lel") + self.assertEqual(ledata, "para\u0140lel") + + +class TruncateTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + + # Create an VLArray + arr = tb.Int16Atom(dflt=3) + array1 = self.h5file.create_vlarray( + self.h5file.root, "array1", arr, "title array1" + ) + + # Add a couple of rows + array1.append(np.array([456, 2], dtype="int16")) + array1.append(np.array([3], dtype="int16")) + + def test00_truncate(self): + """Checking VLArray.truncate() method (truncating to 0 rows)""" + + array1 = self.h5file.root.array1 + # Truncate to 0 elements + array1.truncate(0) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + + if common.verbose: + print("array1-->", array1.read()) + + self.assertEqual(array1.nrows, 0) + self.assertEqual(array1[:], []) + + def test01_truncate(self): + """Checking VLArray.truncate() method (truncating to 1 rows)""" + + array1 = self.h5file.root.array1 + # Truncate to 1 element + array1.truncate(1) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + + if common.verbose: + print("array1-->", array1.read()) + + self.assertEqual(array1.nrows, 1) + self.assertTrue( + common.allequal(array1[0], np.array([456, 2], dtype="int16")) + ) + + def test02_truncate(self): + """Checking VLArray.truncate() method (truncating to == self.nrows)""" + + array1 = self.h5file.root.array1 + # Truncate to 2 elements + array1.truncate(2) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + + if common.verbose: + print("array1-->", array1.read()) + + self.assertEqual(array1.nrows, 2) + self.assertTrue( + common.allequal(array1[0], np.array([456, 2], dtype="int16")) + ) + self.assertTrue( + common.allequal(array1[1], np.array([3], dtype="int16")) + ) + + def test03_truncate(self): + """Checking VLArray.truncate() method (truncating to > self.nrows)""" + + array1 = self.h5file.root.array1 + # Truncate to 4 elements + array1.truncate(4) + + if self.close: + if common.verbose: + print("(closing file version)") + self._reopen() + array1 = self.h5file.root.array1 + + if common.verbose: + print("array1-->", array1.read()) + + self.assertEqual(array1.nrows, 4) + + # Check the original values + self.assertTrue( + common.allequal(array1[0], np.array([456, 2], dtype="int16")) + ) + self.assertTrue( + common.allequal(array1[1], np.array([3], dtype="int16")) + ) + + # Check that the added rows are empty + self.assertTrue( + common.allequal(array1[2], np.array([], dtype="int16")) + ) + self.assertTrue( + common.allequal(array1[3], np.array([], dtype="int16")) + ) + + +class TruncateOpenTestCase(TruncateTestCase): + close = 0 + + +class TruncateCloseTestCase(TruncateTestCase): + close = 1 + + +class PointSelectionTestCase(common.TempFileMixin, common.PyTablesTestCase): + + def setUp(self): + super().setUp() + + # The next are valid selections for both NumPy and PyTables + self.working_keyset = [ + [], # empty list + [2], # single-entry list + [0, 2], # list + [0, -2], # negative values + ([0, 2],), # tuple of list + np.array([], dtype="i4"), # empty array + np.array([1], dtype="i4"), # single-entry array + np.array([True, False, True]), # array of bools + ] + + # The next are invalid selections for VLArrays + self.not_working_keyset = [ + [1, 2, 100], # coordinate 100 > len(vlarray) + ([True, False, True],), # tuple of bools + ] + + # Create a sample array + arr1 = np.array([5, 6], dtype="i4") + arr2 = np.array([5, 6, 7], dtype="i4") + arr3 = np.array([5, 6, 9, 8], dtype="i4") + self.nparr = np.array([arr1, arr2, arr3], dtype="object") + + # Create the VLArray + self.vlarr = self.h5file.create_vlarray( + self.h5file.root, "vlarray", tb.Int32Atom() + ) + self.vlarr.append(arr1) + self.vlarr.append(arr2) + self.vlarr.append(arr3) + + def test01a_read(self): + """Test for point-selections (read, boolean keys).""" + + nparr = self.nparr + vlarr = self.vlarr + for key in self.working_keyset: + if common.verbose: + print("Selection to test:", repr(key)) + a = nparr[key].tolist() + b = vlarr[key] + # if common.verbose: + # print "NumPy selection:", a, type(a) + # print "PyTables selection:", b, type(b) + self.assertEqual( + repr(a), + repr(b), + "NumPy array and PyTables selections does not match.", + ) + + def test01b_read(self): + """Test for point-selections (not working selections, read).""" + + vlarr = self.vlarr + for key in self.not_working_keyset: + if common.verbose: + print("Selection to test:", key) + self.assertRaises(IndexError, vlarr.__getitem__, key) + + +class SizeInMemoryPropertyTestCase( + common.TempFileMixin, common.PyTablesTestCase +): + def create_array(self, atom, complevel): + filters = tb.Filters(complevel=complevel, complib="blosc") + self.array = self.h5file.create_vlarray( + "/", "vlarray", atom=atom, filters=filters + ) + + def test_zero_length(self): + atom = tb.Int32Atom() + complevel = 0 + self.create_array(atom, complevel) + self.assertEqual(self.array.size_in_memory, 0) + + def int_tests(self, complevel, flavor): + atom = tb.Int32Atom() + self.create_array(atom, complevel) + self.array.flavor = flavor + expected_size = 0 + for i in range(10): + row = np.arange((i + 1) * 10, dtype="i4") + self.array.append(row) + expected_size += row.nbytes + return expected_size + + def test_numpy_int_numpy_flavor(self): + complevel = 0 + flavor = "numpy" + expected_size = self.int_tests(complevel, flavor) + self.assertEqual(self.array.size_in_memory, expected_size) + + # compression will have no effect, since this is uncompressed size + def test_numpy_int_numpy_flavor_compressed(self): + complevel = 1 + flavor = "numpy" + expected_size = self.int_tests(complevel, flavor) + self.assertEqual(self.array.size_in_memory, expected_size) + + # flavor will have no effect on what's stored in HDF5 file + def test_numpy_int_python_flavor(self): + complevel = 0 + flavor = "python" + expected_size = self.int_tests(complevel, flavor) + self.assertEqual(self.array.size_in_memory, expected_size) + + # this relies on knowledge of the implementation, so it's not + # a great test + def test_object_atom(self): + atom = tb.ObjectAtom() + complevel = 0 + self.create_array(atom, complevel) + obj = [1, 2, 3] + for i in range(10): + self.array.append(obj) + pickle_array = atom.toarray(obj) + expected_size = 10 * pickle_array.nbytes + self.assertEqual(self.array.size_in_memory, expected_size) + + +class SizeOnDiskPropertyTestCase( + common.TempFileMixin, common.PyTablesTestCase +): + def create_array(self, atom, complevel): + filters = tb.Filters(complevel=complevel, complib="blosc") + self.h5file.create_vlarray("/", "vlarray", atom, filters=filters) + self.array = self.h5file.get_node("/", "vlarray") + + def test_not_implemented(self): + atom = tb.IntAtom() + complevel = 0 + self.create_array(atom, complevel) + self.assertRaises( + NotImplementedError, getattr, self.array, "size_on_disk" + ) + + +class AccessClosedTestCase(common.TempFileMixin, common.PyTablesTestCase): + def setUp(self): + super().setUp() + self.array = self.h5file.create_vlarray( + self.h5file.root, "array", atom=tb.StringAtom(8) + ) + self.array.append([str(i) for i in range(5, 5005, 100)]) + + def test_read(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.read) + + def test_getitem(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.__getitem__, 0) + + def test_setitem(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.__setitem__, 0, "0") + + def test_append(self): + self.h5file.close() + self.assertRaises(tb.ClosedNodeError, self.array.append, "xxxxxxxxx") + + +class TestCreateVLArrayArgs(common.TempFileMixin, common.PyTablesTestCase): + obj = np.array([1, 2, 3]) + where = "/" + name = "vlarray" + atom = tb.Atom.from_dtype(obj.dtype) + title = "title" + filters = None + expectedrows = None + chunkshape = None + byteorder = None + createparents = False + + def test_positional_args_01(self): + self.h5file.create_vlarray( + self.where, + self.name, + self.atom, + self.title, + self.filters, + self.expectedrows, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (0,)) + self.assertEqual(ptarr.nrows, 0) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + + def test_positional_args_02(self): + ptarr = self.h5file.create_vlarray( + self.where, + self.name, + self.atom, + self.title, + self.filters, + self.expectedrows, + ) + ptarr.append(self.obj) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read()[0] + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (1,)) + self.assertEqual(ptarr[0].shape, self.obj.shape) + self.assertEqual(ptarr.nrows, 1) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_positional_args_obj(self): + self.h5file.create_vlarray( + self.where, + self.name, + None, + self.title, + self.filters, + self.expectedrows, + self.chunkshape, + self.byteorder, + self.createparents, + self.obj, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read()[0] + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (1,)) + self.assertEqual(ptarr[0].shape, self.obj.shape) + self.assertEqual(ptarr.nrows, 1) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj(self): + self.h5file.create_vlarray( + self.where, self.name, title=self.title, obj=self.obj + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read()[0] + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (1,)) + self.assertEqual(ptarr[0].shape, self.obj.shape) + self.assertEqual(ptarr.nrows, 1) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_atom_01(self): + ptarr = self.h5file.create_vlarray( + self.where, self.name, title=self.title, atom=self.atom + ) + ptarr.append(self.obj) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read()[0] + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (1,)) + self.assertEqual(ptarr[0].shape, self.obj.shape) + self.assertEqual(ptarr.nrows, 1) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_atom_02(self): + ptarr = self.h5file.create_vlarray( + self.where, self.name, title=self.title, atom=self.atom + ) + # ptarr.append(self.obj) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (0,)) + self.assertEqual(ptarr.nrows, 0) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + + def test_kwargs_obj_atom(self): + ptarr = self.h5file.create_vlarray( + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=self.atom, + ) + self.h5file.close() + + self.h5file = tb.open_file(self.h5fname) + ptarr = self.h5file.get_node(self.where, self.name) + nparr = ptarr.read()[0] + + self.assertEqual(ptarr.title, self.title) + self.assertEqual(ptarr.shape, (1,)) + self.assertEqual(ptarr[0].shape, self.obj.shape) + self.assertEqual(ptarr.nrows, 1) + self.assertEqual(ptarr.atom, self.atom) + self.assertEqual(ptarr.atom.dtype, self.atom.dtype) + self.assertTrue(common.allequal(self.obj, nparr)) + + def test_kwargs_obj_atom_error(self): + atom = tb.Atom.from_dtype(np.dtype("complex")) + # shape = self.shape + self.shape + self.assertRaises( + TypeError, + self.h5file.create_vlarray, + self.where, + self.name, + title=self.title, + obj=self.obj, + atom=atom, + ) + + +def suite(): + theSuite = common.unittest.TestSuite() + niter = 1 + + for n in range(niter): + theSuite.addTest(common.make_suite(BasicNumPyTestCase)) + theSuite.addTest(common.make_suite(BasicPythonTestCase)) + theSuite.addTest(common.make_suite(ZlibComprTestCase)) + theSuite.addTest(common.make_suite(BloscComprTestCase)) + theSuite.addTest(common.make_suite(BloscShuffleComprTestCase)) + theSuite.addTest(common.make_suite(BloscBitShuffleComprTestCase)) + theSuite.addTest(common.make_suite(BloscBloscLZComprTestCase)) + theSuite.addTest(common.make_suite(BloscLZ4ComprTestCase)) + theSuite.addTest(common.make_suite(BloscLZ4HCComprTestCase)) + theSuite.addTest(common.make_suite(BloscSnappyComprTestCase)) + theSuite.addTest(common.make_suite(BloscZlibComprTestCase)) + theSuite.addTest(common.make_suite(BloscZstdComprTestCase)) + theSuite.addTest(common.make_suite(LZOComprTestCase)) + theSuite.addTest(common.make_suite(Bzip2ComprTestCase)) + theSuite.addTest(common.make_suite(TypesReopenTestCase)) + theSuite.addTest(common.make_suite(TypesNoReopenTestCase)) + theSuite.addTest(common.make_suite(MDTypesNumPyTestCase)) + theSuite.addTest(common.make_suite(OpenAppendShapeTestCase)) + theSuite.addTest(common.make_suite(CloseAppendShapeTestCase)) + theSuite.addTest(common.make_suite(PythonFlavorTestCase)) + theSuite.addTest(common.make_suite(NumPyFlavorTestCase)) + theSuite.addTest(common.make_suite(ReadRangeTestCase)) + theSuite.addTest(common.make_suite(GetItemRangeTestCase)) + theSuite.addTest(common.make_suite(SetRangeTestCase)) + theSuite.addTest(common.make_suite(ShuffleComprTestCase)) + theSuite.addTest(common.make_suite(CloseCopyTestCase)) + theSuite.addTest(common.make_suite(OpenCopyTestCase)) + theSuite.addTest(common.make_suite(CopyIndex1TestCase)) + theSuite.addTest(common.make_suite(CopyIndex2TestCase)) + theSuite.addTest(common.make_suite(CopyIndex3TestCase)) + theSuite.addTest(common.make_suite(CopyIndex4TestCase)) + theSuite.addTest(common.make_suite(CopyIndex5TestCase)) + theSuite.addTest(common.make_suite(CopyIndex6TestCase)) + theSuite.addTest(common.make_suite(CopyIndex7TestCase)) + theSuite.addTest(common.make_suite(CopyIndex8TestCase)) + theSuite.addTest(common.make_suite(CopyIndex9TestCase)) + theSuite.addTest(common.make_suite(CopyIndex10TestCase)) + theSuite.addTest(common.make_suite(CopyIndex11TestCase)) + theSuite.addTest(common.make_suite(CopyIndex12TestCase)) + theSuite.addTest(common.make_suite(ChunkshapeTestCase)) + theSuite.addTest(common.make_suite(VLUEndianTestCase)) + theSuite.addTest(common.make_suite(TruncateOpenTestCase)) + theSuite.addTest(common.make_suite(TruncateCloseTestCase)) + theSuite.addTest(common.make_suite(PointSelectionTestCase)) + theSuite.addTest(common.make_suite(SizeInMemoryPropertyTestCase)) + theSuite.addTest(common.make_suite(SizeOnDiskPropertyTestCase)) + theSuite.addTest(common.make_suite(AccessClosedTestCase)) + theSuite.addTest(common.make_suite(TestCreateVLArrayArgs)) + + return theSuite + + +if __name__ == "__main__": + common.parse_argv(sys.argv) + common.print_versions() + common.unittest.main(defaultTest="suite") diff --git a/venv/Lib/site-packages/tables/tests/time-table-vlarray-1_x.h5 b/venv/Lib/site-packages/tables/tests/time-table-vlarray-1_x.h5 new file mode 100644 index 0000000..8e96bc8 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/time-table-vlarray-1_x.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/times-nested-be.h5 b/venv/Lib/site-packages/tables/tests/times-nested-be.h5 new file mode 100644 index 0000000..7ff7f5a Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/times-nested-be.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/vlstr_attr.h5 b/venv/Lib/site-packages/tables/tests/vlstr_attr.h5 new file mode 100644 index 0000000..c2f436c Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/vlstr_attr.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/vlunicode_endian.h5 b/venv/Lib/site-packages/tables/tests/vlunicode_endian.h5 new file mode 100644 index 0000000..2996021 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/vlunicode_endian.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/zerodim-attrs-1.3.h5 b/venv/Lib/site-packages/tables/tests/zerodim-attrs-1.3.h5 new file mode 100644 index 0000000..b4065d1 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/zerodim-attrs-1.3.h5 differ diff --git a/venv/Lib/site-packages/tables/tests/zerodim-attrs-1.4.h5 b/venv/Lib/site-packages/tables/tests/zerodim-attrs-1.4.h5 new file mode 100644 index 0000000..4b1e865 Binary files /dev/null and b/venv/Lib/site-packages/tables/tests/zerodim-attrs-1.4.h5 differ diff --git a/venv/Lib/site-packages/tables/undoredo.py b/venv/Lib/site-packages/tables/undoredo.py new file mode 100644 index 0000000..03d827f --- /dev/null +++ b/venv/Lib/site-packages/tables/undoredo.py @@ -0,0 +1,181 @@ +"""Support for undoing and redoing actions. + +Functions: + +* undo(file, operation, *args) +* redo(file, operation, *args) +* move_to_shadow(file, path) +* move_from_shadow(file, path) +* attr_to_shadow(file, path, name) +* attr_from_shadow(file, path, name) + +Misc variables: + +`__docformat__` + The format of documentation strings in this module. + +""" + +from typing import Literal, TYPE_CHECKING + +from .path import split_path + +if TYPE_CHECKING: + from .file import File + +__docformat__ = "reStructuredText" +"""The format of documentation strings in this module.""" + + +def undo( + file_: "File", + operation: Literal["ADDATTR", "CREATE", "DELATTR", "MOVE", "REMOVE"], + *args: str, +) -> None: + """Undo action.""" + if operation == "CREATE": + undo_create(file_, args[0]) + elif operation == "REMOVE": + undo_remove(file_, args[0]) + elif operation == "MOVE": + undo_move(file_, args[0], args[1]) + elif operation == "ADDATTR": + undo_add_attr(file_, args[0], args[1]) + elif operation == "DELATTR": + undo_del_attr(file_, args[0], args[1]) + else: + raise NotImplementedError( + "the requested unknown operation %r can " + "not be undone; please report this to the " + "authors" % operation + ) + + +def redo( + file_: "File", + operation: Literal["ADDATTR", "CREATE", "DELATTR", "MOVE", "REMOVE"], + *args: str, +) -> None: + """Re-do action.""" + if operation == "CREATE": + redo_create(file_, args[0]) + elif operation == "REMOVE": + redo_remove(file_, args[0]) + elif operation == "MOVE": + redo_move(file_, args[0], args[1]) + elif operation == "ADDATTR": + redo_add_attr(file_, args[0], args[1]) + elif operation == "DELATTR": + redo_del_attr(file_, args[0], args[1]) + else: + raise NotImplementedError( + "the requested unknown operation %r can " + "not be redone; please report this to the " + "authors" % operation + ) + + +def move_to_shadow(file_: "File", path: str) -> None: + """Move a node to the set of shadowed ones.""" + node = file_._get_node(path) + + shparent, shname = file_._shadow_name() + node._g_move(shparent, shname) + + +def move_from_shadow(file_: "File", path: str) -> None: + """Move a node fro the set of shadowe dones back to foreground.""" + shparent, shname = file_._shadow_name() + node = shparent._f_get_child(shname) + + pname, name = split_path(path) + parent = file_._get_node(pname) + node._g_move(parent, name) + + +def undo_create(file_: "File", path: str) -> None: + """Undo create node.""" + move_to_shadow(file_, path) + + +def redo_create(file_: "File", path: str) -> None: + """Re-do create node.""" + move_from_shadow(file_, path) + + +def undo_remove(file_: "File", path: str) -> None: + """Undo remove node.""" + move_from_shadow(file_, path) + + +def redo_remove(file_: "File", path: str) -> None: + """Re-do remove node.""" + move_to_shadow(file_, path) + + +def undo_move(file_: "File", origpath: str, destpath: str) -> None: + """Undo move node.""" + origpname, origname = split_path(origpath) + + node = file_._get_node(destpath) + origparent = file_._get_node(origpname) + node._g_move(origparent, origname) + + +def redo_move(file_: "File", origpath: str, destpath: str) -> None: + """Re-do move node.""" + destpname, destname = split_path(destpath) + + node = file_._get_node(origpath) + destparent = file_._get_node(destpname) + node._g_move(destparent, destname) + + +def attr_to_shadow(file_: "File", path: str, name: str) -> None: + """Move an attribute to the shadowed attribute set.""" + node = file_._get_node(path) + attrs = node._v_attrs + value = getattr(attrs, name) + + shparent, shname = file_._shadow_name() + shattrs = shparent._v_attrs + + # Set the attribute only if it has not been kept in the shadow. + # This avoids re-pickling complex attributes on REDO. + if shname not in shattrs: + shattrs._g__setattr(shname, value) + + attrs._g__delattr(name) + + +def attr_from_shadow(file_: "File", path: str, name: str) -> None: + """Move an attribute from shadowed attribute set to foreground.""" + shparent, shname = file_._shadow_name() + shattrs = shparent._v_attrs + value = getattr(shattrs, shname) + + node = file_._get_node(path) + node._v_attrs._g__setattr(name, value) + + # Keeping the attribute in the shadow allows reusing it on Undo/Redo. + # shattrs._g__delattr(shname) + + +def undo_add_attr(file_: "File", path: str, name: str) -> None: + """Undo add attribute.""" + attr_to_shadow(file_, path, name) + + +def redo_add_attr(file_: "File", path: str, name: str) -> None: + """Re-do add attribute.""" + attr_from_shadow(file_, path, name) + + +def undo_del_attr(file_: "File", path: str, name: str) -> None: + """Undo delete attribute.""" + attr_from_shadow(file_, path, name) + + +def redo_del_attr(file_: "File", path: str, name: str) -> None: + """Re-do delete attribute.""" + attr_to_shadow(file_, path, name) diff --git a/venv/Lib/site-packages/tables/unimplemented.py b/venv/Lib/site-packages/tables/unimplemented.py new file mode 100644 index 0000000..30f831a --- /dev/null +++ b/venv/Lib/site-packages/tables/unimplemented.py @@ -0,0 +1,175 @@ +"""Here is defined the UnImplemented class.""" + +from __future__ import annotations + +import warnings +from typing import TYPE_CHECKING + +from . import hdf5extension +from .leaf import Leaf +from .node import Node +from .utils import SizeType + +if TYPE_CHECKING: + from .group import Group + + +class UnImplemented(hdf5extension.UnImplemented, Leaf): + """Class represents datasets not supported by PyTables in an HDF5 file. + + When reading a generic HDF5 file (i.e. one that has not been created with + PyTables, but with some other HDF5 library based tool), chances are that + the specific combination of datatypes or dataspaces in some dataset might + not be supported by PyTables yet. In such a case, this dataset will be + mapped into an UnImplemented instance and the user will still be able to + access the complete object tree of the generic HDF5 file. The user will + also be able to *read and write the attributes* of the dataset, *access + some of its metadata*, and perform *certain hierarchy manipulation + operations* like deleting or moving (but not copying) the node. Of course, + the user will not be able to read the actual data on it. + + This is an elegant way to allow users to work with generic HDF5 files + despite the fact that some of its datasets are not supported by + PyTables. However, if you are really interested in having full access to an + unimplemented dataset, please get in contact with the developer team. + + This class does not have any public instance variables or methods, except + those inherited from the Leaf class (see :ref:`LeafClassDescr`). + + """ + + # Class identifier. + _c_classid = "UNIMPLEMENTED" + + def __init__(self, parentnode: Group, name: str) -> None: + """Create the `UnImplemented` instance.""" + # UnImplemented objects always come from opening an existing node + # (they can not be created). + self._v_new = False + """Is this the first time the node has been created?""" + self.nrows = SizeType(0) + """The length of the first dimension of the data.""" + self.shape = (SizeType(0),) + """The shape of the stored data.""" + self.byteorder: str | None = None + """The endianness of data in memory ('big', 'little' or + 'irrelevant').""" + + super().__init__(parentnode, name) + + def _g_open(self) -> int: + self.shape, self.byteorder, object_id = self._open_unimplemented() + try: + self.nrows = SizeType(self.shape[0]) + except IndexError: + self.nrows = SizeType(0) + return object_id + + def _g_copy( + self, + newparent: Group, + newname: str, + recursive: bool, + _log: bool = True, + **kwargs, + ) -> None: + """Do nothing. + + This method does nothing, but a ``UserWarning`` is issued. + Please note that this method *does not return a new node*, but + ``None``. + + """ + warnings.warn( + f"UnImplemented node {self._v_pathname!r} does not know how " + f"to copy itself; skipping" + ) + return None # Can you see it? + + def _f_copy( + self, + newparent: Group | None = None, + newname: str | None = None, + overwrite: bool = False, + recursive: bool = False, + createparents: bool = False, + **kwargs, + ) -> None: + """Do nothing. + + This method does nothing, since `UnImplemented` nodes can not + be copied. However, a ``UserWarning`` is issued. Please note + that this method *does not return a new node*, but ``None``. + + """ + # This also does nothing but warn. + self._g_copy(newparent, newname, recursive, **kwargs) + return None # Can you see it? + + def __repr__(self) -> str: + return f"""{str(self)} + NOTE: +""" + + +# Classes reported as H5G_UNKNOWN by HDF5 +class Unknown(Node): + """Class representing nodes reported as *unknown* by the HDF5 library. + + This class does not have any public instance variables or methods, except + those inherited from the Node class. + + """ + + # Class identifier + _c_classid = "UNKNOWN" + + def __init__(self, parentnode: Group, name: str) -> None: + """Create the `Unknown` instance.""" + self._v_new = False + super().__init__(parentnode, name) + + def _g_new(self, parentnode: Group, name: str, init: bool = False) -> None: + pass + + def _g_open(self) -> int: + return 0 + + def _g_copy( + self, + newparent: Group, + newname: str, + recursive: bool, + _log: bool = True, + **kwargs, + ) -> None: + # Silently avoid doing copies of unknown nodes + return None + + def _g_delete(self, parent: Group) -> None: + pass + + def __str__(self) -> str: + pathname = self._v_pathname + classname = self.__class__.__name__ + return f"{pathname} ({classname})" + + def __repr__(self) -> str: + return f"""{self!s} + NOTE: +""" + + +# These are listed here for backward compatibility with PyTables 0.9.x indexes +class OldIndexArray(UnImplemented): + """Old IndexArray. + + Provided for compatibility with PyTables 0.9. + """ + + _c_classid = "IndexArray" diff --git a/venv/Lib/site-packages/tables/utils.py b/venv/Lib/site-packages/tables/utils.py new file mode 100644 index 0000000..6412707 --- /dev/null +++ b/venv/Lib/site-packages/tables/utils.py @@ -0,0 +1,471 @@ +"""Utility functions.""" + +from __future__ import annotations + +import os +import sys +import math +import weakref +import warnings +from time import perf_counter as clock +from typing import Any, Literal, TextIO, TYPE_CHECKING +from pathlib import Path +from collections.abc import Callable + +import numpy as np +import numpy.typing as npt + +from .flavor import array_of_flavor + +if TYPE_CHECKING: + from .atom import Atom + +# The map between byteorders in NumPy and PyTables +byteorders = { + ">": "big", + "<": "little", + "=": sys.byteorder, + "|": "irrelevant", +} + +# The type used for size values: indexes, coordinates, dimension +# lengths, row numbers, shapes, chunk shapes, byte counts... +SizeType = np.int64 + +copy_if_needed: bool | None + +if np.lib.NumpyVersion(np.__version__) >= "2.0.0": + copy_if_needed = None +elif np.lib.NumpyVersion(np.__version__) < "1.28.0": + copy_if_needed = False +else: + # 2.0.0 dev versions, handle cases where copy may or may not exist + try: + np.array([1]).__array__(copy=None) # type: ignore[call-overload] + copy_if_needed = None + except TypeError: + copy_if_needed = False + + +def correct_byteorder(ptype: str, byteorder: str) -> str: + """Fix the byteorder depending on the PyTables types.""" + if ptype in ["string", "bool", "int8", "uint8", "object"]: + return "irrelevant" + else: + return byteorder + + +def is_idx(index: Any) -> bool: + """Check if an object can work as an index or not.""" + if type(index) is int: + return True + elif hasattr(index, "__index__"): + # Exclude the array([idx]) as working as an index. Fixes #303. + if hasattr(index, "shape") and index.shape != (): + return False + try: + index.__index__() + if isinstance(index, bool): + warnings.warn( + "using a boolean instead of an integer will result in an " + "error in the future", + DeprecationWarning, + stacklevel=2, + ) + return True + except TypeError: + return False + elif isinstance(index, np.integer): + return True + # For Python 2.4 one should test 0-dim and 1-dim, 1-elem arrays as well + elif ( + isinstance(index, np.ndarray) + and (index.shape == ()) + and index.dtype.str[1] == "i" + ): + return True + + return False + + +def idx2long(index: int | float | np.ndarray) -> int: + """Convert a possible index into a long int.""" + try: + if hasattr(index, "item"): + return index.item() + else: + return int(index) + except Exception: + raise TypeError("not an integer type.") + + +# This is used in VLArray and EArray to produce NumPy object compliant +# with atom from a generic python type. If copy is stated as True, it +# is assured that it will return a copy of the object and never the same +# object or a new one sharing the same memory. +def convert_to_np_atom( + arr: npt.ArrayLike, atom: Atom, copy: bool | None = copy_if_needed +) -> np.ndarray: + """Convert a generic object into a NumPy object compliant with atom.""" + # First, convert the object into a NumPy array + nparr = array_of_flavor(arr, "numpy") + # Copy of data if necessary for getting a contiguous buffer, or if + # dtype is not the correct one. + if atom.shape == (): + # Scalar atom case + nparr = np.array(nparr, dtype=atom.dtype, copy=copy) + else: + # Multidimensional atom case. Addresses #133. + # We need to use this strange way to obtain a dtype compliant + # array because NumPy doesn't honor the shape of the dtype when + # it is multidimensional. See: + # http://scipy.org/scipy/numpy/ticket/926 + # for details. + # All of this is done just to taking advantage of the NumPy + # broadcasting rules. + newshape = nparr.shape[: -len(atom.dtype.shape)] + nparr2 = np.empty(newshape, dtype=[("", atom.dtype)]) + nparr2["f0"][:] = nparr + # Return a view (i.e. get rid of the record type) + nparr = nparr2.view(atom.dtype) + return nparr + + +# The next is used in Array, EArray and VLArray, and it is a bit more +# high level than convert_to_np_atom +def convert_to_np_atom2(obj: npt.ArrayLike, atom: Atom) -> np.ndarray: + """Convert a generic object into a NumPy object compliant with atom.""" + # Check whether the object needs to be copied to make the operation + # safe to in-place conversion. + copy = True if atom.type in ["time64"] else copy_if_needed + nparr = convert_to_np_atom(obj, atom, copy) + # Finally, check the byteorder and change it if needed + byteorder = byteorders[nparr.dtype.byteorder] + if byteorder in ["little", "big"] and byteorder != sys.byteorder: + # The byteorder needs to be fixed (a copy is made + # so that the original array is not modified) + nparr = nparr.byteswap() + + return nparr + + +def check_file_access( + filename: str, mode: Literal["r", "w", "a", "r+"] = "r" +) -> None: + """Check for file access in the specified `mode`. + + `mode` is one of the modes supported by `File` objects. If the file + indicated by `filename` can be accessed using that `mode`, the + function ends successfully. Else, an ``IOError`` is raised + explaining the reason of the failure. + + All this paraphernalia is used to avoid the lengthy and scaring HDF5 + messages produced when there are problems opening a file. No + changes are ever made to the file system. + + """ + path = Path(filename).resolve() + + if mode == "r": + # The file should be readable. + if not os.access(path, os.F_OK): + raise FileNotFoundError(f"``{path}`` does not exist") + if not path.is_file(): + raise IsADirectoryError(f"``{path}`` is not a regular file") + if not os.access(path, os.R_OK): + raise PermissionError( + f"file ``{path}`` exists but it can not be read" + ) + elif mode == "w": + if os.access(path, os.F_OK): + # Since the file is not removed but replaced, + # it must already be accessible to read and write operations. + check_file_access(path, "r+") + else: + # A new file is going to be created, + # so the directory should be writable. + if not os.access(path.parent, os.F_OK): + raise FileNotFoundError(f"``{path.parent}`` does not exist") + if not path.parent.is_dir(): + raise NotADirectoryError( + f"``{path.parent}`` is not a directory" + ) + if not os.access(path.parent, os.W_OK): + raise PermissionError( + f"directory ``{path.parent}`` exists but it can not be " + f"written" + ) + elif mode == "a": + if os.access(path, os.F_OK): + check_file_access(path, "r+") + else: + check_file_access(path, "w") + elif mode == "r+": + check_file_access(path, "r") + if not os.access(path, os.W_OK): + raise PermissionError( + f"file ``{path}`` exists but it can not be written" + ) + else: + raise ValueError(f"invalid mode: {mode!r}") + + +def lazyattr(fget: Callable[[Any], Any]) -> property: + """Create a *lazy attribute* from the result of `fget`. + + This function is intended to be used as a *method decorator*. It + returns a *property* which caches the result of calling the `fget` + instance method. The docstring of `fget` is used for the property + itself. For instance: + + >>> class MyClass(object): + ... @lazyattr + ... def attribute(self): + ... 'Attribute description.' + ... print('creating value') + ... return 10 + ... + >>> type(MyClass.attribute) + + >>> MyClass.attribute.__doc__ + 'Attribute description.' + >>> obj = MyClass() + >>> obj.__dict__ + {} + >>> obj.attribute + creating value + 10 + >>> obj.__dict__ + {'attribute': 10} + >>> obj.attribute + 10 + >>> del obj.attribute + Traceback (most recent call last): + ... + AttributeError: ... + + .. warning:: + + Please note that this decorator *changes the type of the + decorated object* from an instance method into a property. + + """ + name = fget.__name__ + + def newfget(self): + mydict = self.__dict__ + if name in mydict: + return mydict[name] + mydict[name] = value = fget(self) + return value + + return property(newfget, None, None, fget.__doc__) + + +def show_stats(explain: str, tref: float, encoding=None) -> float: + """Show the used memory (only works for Linux 2.6.x).""" + for line in Path("/proc/self/status").read_text().splitlines(): + if line.startswith("VmSize:"): + vmsize = int(line.split()[1]) + elif line.startswith("VmRSS:"): + vmrss = int(line.split()[1]) + elif line.startswith("VmData:"): + vmdata = int(line.split()[1]) + elif line.startswith("VmStk:"): + vmstk = int(line.split()[1]) + elif line.startswith("VmExe:"): + vmexe = int(line.split()[1]) + elif line.startswith("VmLib:"): + vmlib = int(line.split()[1]) + print("Memory usage: ******* %s *******" % explain) + print(f"VmSize: {vmsize:>7} kB\tVmRSS: {vmrss:>7} kB") + print(f"VmData: {vmdata:>7} kB\tVmStk: {vmstk:>7} kB") + print(f"VmExe: {vmexe:>7} kB\tVmLib: {vmlib:>7} kB") + tnow = clock() + print(f"WallClock time: {tnow - tref:.3f}") + return tnow + + +# truncate data before calling __setitem__, to improve compression ratio +# this function is taken verbatim from netcdf4-python +def quantize(data: npt.ArrayLike, least_significant_digit: int): + """Quantize data to improve compression. + + Data is quantized using around(scale*data)/scale, where scale is + 2**bits, and bits is determined from the least_significant_digit. + + For example, if least_significant_digit=1, bits will be 4. + + """ + exp = -least_significant_digit + exp = math.floor(exp) if exp < 0 else math.ceil(exp) + bits = math.ceil(math.log2(10**-exp)) + scale = 2**bits + datout = np.around(scale * data) / scale + + return datout + + +# Utilities to detect leaked instances. See recipe 14.10 of the Python +# Cookbook by Martelli & Ascher. +tracked_classes: dict[str, list[weakref.ReferenceType]] = {} + + +def log_instance_creation(instance: Any, name: str | None = None) -> None: + """Log instance creation.""" + if name is None: + name = instance.__class__.__name__ + if name not in tracked_classes: + tracked_classes[name] = [] + tracked_classes[name].append(weakref.ref(instance)) + + +def string_to_classes(s: str) -> list[str]: + """Return the list of tracked classes matching the input string.""" + if s == "*": + c = sorted(tracked_classes) + return c + else: + return s.split() + + +def fetch_logged_instances(classes: str = "*") -> list[tuple[str, int]]: + """Return the list of logged instances.""" + classnames = string_to_classes(classes) + return [(cn, len(tracked_classes[cn])) for cn in classnames] + + +def count_logged_instances(classes: str, file: TextIO = sys.stdout) -> None: + """Write to file the number of logged instances.""" + for classname in string_to_classes(classes): + file.write(f"{classname}: {len(tracked_classes[classname])}\n") + + +def list_logged_instances(classes: str, file: TextIO = sys.stdout) -> None: + """Write to file the list of loggen instances.""" + for classname in string_to_classes(classes): + file.write(f"\n{classname}:\n") + for ref in tracked_classes[classname]: + obj = ref() + if obj is not None: + file.write(" %s\n" % repr(obj)) + + +def dump_logged_instances(classes: str, file: TextIO = sys.stdout) -> None: + """Dump the logged instances.""" + for classname in string_to_classes(classes): + file.write(f"\n{classname}:\n") + for ref in tracked_classes[classname]: + obj = ref() + if obj is not None: + file.write(" %s:\n" % obj) + for key, value in obj.__dict__.items(): + file.write(f" {key:>20} : {value}\n") + + +# +# A class useful for cache usage +# +class CacheDict(dict): + """A dictionary that prevents itself from growing too much.""" + + def __init__(self, maxentries: int) -> None: + self.maxentries = maxentries + super().__init__(self) + + def __setitem__(self, key: str, value: Any) -> None: + # Protection against growing the cache too much + if len(self) > self.maxentries: + # Remove a 10% of (arbitrary) elements from the cache + entries_to_remove = self.maxentries / 10 + for k in list(self)[:entries_to_remove]: + super().__delitem__(k) + super().__setitem__(key, value) + + +class NailedDict: + """A dictionary which ignores its items when it has nails on it.""" + + def __init__(self, maxentries: int) -> None: + self.maxentries = maxentries + self._cache: dict = {} + self._nailcount = 0 + + # Only a restricted set of dictionary methods are supported. That + # is why we buy instead of inherit. + + # The following are intended to be used by ``Table`` code changing + # the set of usable indexes. + + def clear(self) -> None: + """Clear teh dictionsry.""" + self._cache.clear() + + def nail(self) -> None: + """Increase the nail count.""" + self._nailcount += 1 + + def unnail(self) -> None: + """Decrease the nail count.""" + self._nailcount -= 1 + + # The following are intended to be used by ``Table`` code handling + # conditions. + + def __contains__(self, key: Any) -> bool: + if self._nailcount > 0: + return False + return key in self._cache + + def __getitem__(self, key: Any) -> Any: + if self._nailcount > 0: + raise KeyError(key) + return self._cache[key] + + def get(self, key: Any, default: Any | None = None) -> Any: + """Return the value for the specified key.""" + if self._nailcount > 0: + return default + return self._cache.get(key, default) + + def __setitem__(self, key: Any, value: Any) -> None: + if self._nailcount > 0: + return + cache = self._cache + # Protection against growing the cache too much + if len(cache) > self.maxentries: + # Remove a 10% of (arbitrary) elements from the cache + entries_to_remove = max(self.maxentries // 10, 1) + for k in list(cache)[:entries_to_remove]: + del cache[k] + cache[key] = value + + +def detect_number_of_cores() -> int: + """Detect the number of cores on a system.""" + # Linux, Unix and MacOS: + if hasattr(os, "sysconf"): + if "SC_NPROCESSORS_ONLN" in os.sysconf_names: + # Linux & Unix: + ncpus = os.sysconf("SC_NPROCESSORS_ONLN") + if isinstance(ncpus, int) and ncpus > 0: + return ncpus + else: # OSX: + return int(os.popen2("sysctl -n hw.ncpu")[1].read()) + # Windows: + if "NUMBER_OF_PROCESSORS" in os.environ: + ncpus = int(os.environ["NUMBER_OF_PROCESSORS"]) + if ncpus > 0: + return ncpus + return 1 # Default + + +def _test() -> None: + """Run ``doctest`` on this module.""" + import doctest + + doctest.testmod() + + +if __name__ == "__main__": + _test() diff --git a/venv/Lib/site-packages/tables/utilsextension.pxd b/venv/Lib/site-packages/tables/utilsextension.pxd new file mode 100644 index 0000000..eb3d563 --- /dev/null +++ b/venv/Lib/site-packages/tables/utilsextension.pxd @@ -0,0 +1,24 @@ +######################################################################## +# +# License: BSD +# Created: March 03, 2008 +# Author: Francesc Alted - faltet@pytables.com +# +# $Id: definitions.pyd 1018 2005-06-20 09:43:34Z faltet $ +# +######################################################################## + +""" +These are declarations for functions in utilsextension.pyx that have to +be shared with other extensions. +""" + +from numpy cimport ndarray + +from .definitions cimport hsize_t, hid_t, hobj_ref_t + + +cdef hsize_t *malloc_dims(object) +cdef hid_t get_native_type(hid_t) nogil +cdef str cstr_to_pystr(const char*) +cdef int load_reference(hid_t dataset_id, hobj_ref_t *refbuf, size_t item_size, ndarray nparr) except -1 diff --git a/venv/Lib/site-packages/tables/utilsextension.pyd b/venv/Lib/site-packages/tables/utilsextension.pyd new file mode 100644 index 0000000..8561e7a Binary files /dev/null and b/venv/Lib/site-packages/tables/utilsextension.pyd differ diff --git a/venv/Lib/site-packages/tables/utilsextension.pyx b/venv/Lib/site-packages/tables/utilsextension.pyx new file mode 100644 index 0000000..c06b206 --- /dev/null +++ b/venv/Lib/site-packages/tables/utilsextension.pyx @@ -0,0 +1,1668 @@ +######################################################################## +# +# License: BSD +# Created: May 20, 2005 +# Author: Francesc Alted - faltet@pytables.com +# +# $Id$ +# +######################################################################## + +"""Cython utilities for PyTables and HDF5 library.""" + +import os +import sys +import warnings + +try: + import zlib + zlib_imported = True +except ImportError: + zlib_imported = False + +import numpy as np + +from .atom import Atom, EnumAtom, ReferenceAtom +from .utils import check_file_access +from .misc.enum import Enum +from .exceptions import HDF5ExtError +from .description import Description, Col + +from libc.stdio cimport stderr +from libc.stdlib cimport malloc, free +from libc.string cimport strchr, strcmp, strncmp, strlen +from cpython.bytes cimport PyBytes_Check, PyBytes_FromStringAndSize +from cpython.unicode cimport PyUnicode_DecodeUTF8, PyUnicode_Check + + +# Functions from Blosc +cdef extern from "blosc.h" nogil: + void blosc_init() + int blosc_set_nthreads(int nthreads) + const char* blosc_list_compressors() + int blosc_compcode_to_compname(int compcode, char **compname) + int blosc_get_complib_info(char *compname, char **complib, char **version) + +# Functions from Blosc2 +cdef extern from "blosc2.h" nogil: + void blosc2_init() + int blosc2_set_nthreads(int nthreads) + const char* blosc2_list_compressors() + int blosc2_compcode_to_compname(int compcode, char **compname) + int blosc2_get_complib_info(char *compname, char **complib, char **version) + +from numpy cimport ( + import_array, + ndarray, + dtype, + npy_int64, + PyArray_DATA, + PyArray_GETPTR1, + PyArray_DescrFromType, + npy_intp, + NPY_BOOL, + NPY_STRING, + NPY_INT8, + NPY_INT16, + NPY_INT32, + NPY_INT64, + NPY_UINT8, + NPY_UINT16, + NPY_UINT32, + NPY_UINT64, + NPY_FLOAT16, + NPY_FLOAT32, + NPY_FLOAT64, + NPY_COMPLEX64, + NPY_COMPLEX128, +) + +from .definitions cimport ( + H5ARRAYget_info, + H5ARRAYget_ndims, + H5ATTRfind_attribute, + H5ATTRget_attribute_string, + H5D_CHUNKED, + H5D_layout_t, + H5Dclose, + H5Dget_type, + H5Dopen, + H5E_DEFAULT, + H5E_WALK_DOWNWARD, + H5E_auto_t, + H5E_error_t, + H5E_walk_t, + H5Eget_msg, + H5Eprint, + H5Eset_auto, + H5Ewalk, + H5F_ACC_RDONLY, + H5Fclose, + H5Fis_hdf5, + H5Fopen, + H5Gclose, + H5Gopen, + H5P_DEFAULT, + H5T_ARRAY, + H5T_BITFIELD, + H5T_COMPOUND, + H5T_CSET_ASCII, + H5T_CSET_UTF8, + H5T_C_S1, + H5T_DIR_DEFAULT, + H5T_ENUM, + H5T_FLOAT, + H5T_IEEE_F32BE, + H5T_IEEE_F32LE, + H5T_IEEE_F64BE, + H5T_IEEE_F64LE, + H5T_INTEGER, + H5T_NATIVE_DOUBLE, + H5T_NATIVE_LDOUBLE, + H5T_NO_CLASS, + H5T_OPAQUE, + H5T_ORDER_BE, + H5T_ORDER_LE, + H5T_REFERENCE, + H5T_STD_B8BE, + H5T_STD_B8LE, + H5T_STD_I16BE, + H5T_STD_I16LE, + H5T_STD_I32BE, + H5T_STD_I32LE, + H5T_STD_I64BE, + H5T_STD_I64LE, + H5T_STD_I8BE, + H5T_STD_I8LE, + H5T_STD_U16BE, + H5T_STD_U16LE, + H5T_STD_U32BE, + H5T_STD_U32LE, + H5T_STD_U64BE, + H5T_STD_U64LE, + H5T_STD_U8BE, + H5T_STD_U8LE, + H5T_STRING, + H5T_TIME, + H5T_UNIX_D32BE, + H5T_UNIX_D32LE, + H5T_UNIX_D64BE, + H5T_UNIX_D64LE, + H5T_VLEN, + H5T_class_t, + H5T_sign_t, + H5Tarray_create, + H5Tclose, + H5Tequal, + H5Tcopy, + H5Tcreate, + H5Tenum_create, + H5Tenum_insert, + H5Tget_array_dims, + H5Tget_array_ndims, + H5Tget_class, + H5Tget_member_name, + H5Tget_member_type, + H5Tget_member_value, + H5Tget_native_type, + H5Tget_nmembers, + H5Tget_offset, + H5Tget_order, + H5Tget_member_offset, + H5Tget_precision, + H5Tget_sign, + H5Tget_size, + H5Tget_super, + H5Tinsert, + H5Tis_variable_str, + H5Tpack, + H5Tset_precision, + H5Tset_size, + H5Tvlen_create, + H5Zunregister, + FILTER_BLOSC, + FILTER_BLOSC2, + PyArray_Scalar, + create_ieee_complex128, + create_ieee_complex64, + create_ieee_float16, + create_ieee_complex192, + create_ieee_complex256, + get_len_of_range, + get_order, + herr_t, + hid_t, + hsize_t, + hssize_t, + htri_t, + is_complex, + register_blosc, + register_blosc2, + set_order, + H5free_memory, + H5T_STD_REF_OBJ, + H5Rdereference, + H5R_OBJECT, + H5I_DATASET, + H5I_REFERENCE, + H5Iget_type, + hobj_ref_t, + H5Oclose, +) + +# Platform-dependent types +if sys.byteorder == "little": + platform_byteorder = H5T_ORDER_LE + # Standard types, independent of the byteorder + H5T_STD_B8 = H5T_STD_B8LE + H5T_STD_I8 = H5T_STD_I8LE + H5T_STD_I16 = H5T_STD_I16LE + H5T_STD_I32 = H5T_STD_I32LE + H5T_STD_I64 = H5T_STD_I64LE + H5T_STD_U8 = H5T_STD_U8LE + H5T_STD_U16 = H5T_STD_U16LE + H5T_STD_U32 = H5T_STD_U32LE + H5T_STD_U64 = H5T_STD_U64LE + H5T_IEEE_F32 = H5T_IEEE_F32LE + H5T_IEEE_F64 = H5T_IEEE_F64LE + H5T_UNIX_D32 = H5T_UNIX_D32LE + H5T_UNIX_D64 = H5T_UNIX_D64LE +else: # sys.byteorder == "big" + platform_byteorder = H5T_ORDER_BE + # Standard types, independent of the byteorder + H5T_STD_B8 = H5T_STD_B8BE + H5T_STD_I8 = H5T_STD_I8BE + H5T_STD_I16 = H5T_STD_I16BE + H5T_STD_I32 = H5T_STD_I32BE + H5T_STD_I64 = H5T_STD_I64BE + H5T_STD_U8 = H5T_STD_U8BE + H5T_STD_U16 = H5T_STD_U16BE + H5T_STD_U32 = H5T_STD_U32BE + H5T_STD_U64 = H5T_STD_U64BE + H5T_IEEE_F32 = H5T_IEEE_F32BE + H5T_IEEE_F64 = H5T_IEEE_F64BE + H5T_UNIX_D32 = H5T_UNIX_D32BE + H5T_UNIX_D64 = H5T_UNIX_D64BE + + +#---------------------------------------------------------------------------- + +# Conversion from PyTables string types to HDF5 native types +# List only types that are susceptible of changing byteorder +# (complex & enumerated types are special and should not be listed here) +pttype_to_hdf5 = { + 'int8' : H5T_STD_I8, 'uint8' : H5T_STD_U8, + 'int16' : H5T_STD_I16, 'uint16' : H5T_STD_U16, + 'int32' : H5T_STD_I32, 'uint32' : H5T_STD_U32, + 'int64' : H5T_STD_I64, 'uint64' : H5T_STD_U64, + 'float32': H5T_IEEE_F32, 'float64': H5T_IEEE_F64, + 'float96': H5T_NATIVE_LDOUBLE, 'float128': H5T_NATIVE_LDOUBLE, + 'time32' : H5T_UNIX_D32, 'time64' : H5T_UNIX_D64, +} + +# Special cases whose byteorder cannot be directly changed +pt_special_kinds = ['complex', 'string', 'enum', 'bool'] + +# Conversion table from NumPy extended codes prefixes to PyTables kinds +npext_prefixes_to_ptkinds = { + "S": "string", + "b": "bool", + "i": "int", + "u": "uint", + "f": "float", + "c": "complex", + "t": "time", + "e": "enum", +} + +# Names of HDF5 classes +hdf5_class_to_string = { + H5T_NO_CLASS : 'H5T_NO_CLASS', + H5T_INTEGER : 'H5T_INTEGER', + H5T_FLOAT : 'H5T_FLOAT', + H5T_TIME : 'H5T_TIME', + H5T_STRING : 'H5T_STRING', + H5T_BITFIELD : 'H5T_BITFIELD', + H5T_OPAQUE : 'H5T_OPAQUE', + H5T_COMPOUND : 'H5T_COMPOUND', + H5T_REFERENCE : 'H5T_REFERENCE', + H5T_ENUM : 'H5T_ENUM', + H5T_VLEN : 'H5T_VLEN', + H5T_ARRAY : 'H5T_ARRAY', +} + + +# Depprecated API +PTTypeToHDF5 = pttype_to_hdf5 +PTSpecialKinds = pt_special_kinds +NPExtPrefixesToPTKinds = npext_prefixes_to_ptkinds +HDF5ClassToString = hdf5_class_to_string + + +from numpy import sctypeDict + + +cdef int have_float16 = ("float16" in sctypeDict) + + +#---------------------------------------------------------------------- + +# External declarations + + +# PyTables helper routines. +cdef extern from "utils.h": + + #object getZLIBVersionInfo() + object getHDF5VersionInfo() + object get_filter_names( hid_t loc_id, char *dset_name) + + H5T_class_t getHDF5ClassID(hid_t loc_id, char *name, H5D_layout_t *layout, + hid_t *type_id, hid_t *dataset_id) nogil + + +cdef extern from "H5ARRAY.h" nogil: + herr_t H5ARRAYread(hid_t dataset_id, hid_t type_id, + hsize_t start, hsize_t nrows, hsize_t step, + int extdim, void *data) + +# @TODO: use the c_string_type and c_string_encoding global directives +# (new in cython 0.19) +# TODO: drop +cdef str cstr_to_pystr(const char* cstring): + return cstring.decode('utf-8') + + +#---------------------------------------------------------------------- +# Initialization code + +# The NumPy API requires this function to be called before +# using any NumPy facilities in an extension module. +import_array() + +# NaN-aware sorting with NaN as the greatest element +# numpy.isnan only takes floats, this should work for strings too +cpdef nan_aware_lt(a, b): return a < b or (b != b and a == a) +cpdef nan_aware_le(a, b): return a <= b or b != b +cpdef nan_aware_gt(a, b): return a > b or (a != a and b == b) +cpdef nan_aware_ge(a, b): return a >= b or a != a + +def bisect_left(a, x, int lo=0): + """Return the index where to insert item x in list a, assuming a is sorted. + + The return value i is such that all e in a[:i] have e < x, and all e in + a[i:] have e >= x. So if x already appears in the list, i points just + before the leftmost x already there. + + """ + + cdef int mid, hi = len(a) + + lo = 0 + while lo < hi: + mid = (lo+hi)//2 + if nan_aware_lt(a[mid], x): lo = mid+1 + else: hi = mid + return lo + +def bisect_right(a, x, int lo=0): + """Return the index where to insert item x in list a, assuming a is sorted. + + The return value i is such that all e in a[:i] have e <= x, and all e in + a[i:] have e > x. So if x already appears in the list, i points just + beyond the rightmost x already there. + + """ + + cdef int mid, hi = len(a) + + lo = 0 + while lo < hi: + mid = (lo+hi)//2 + if nan_aware_lt(x, a[mid]): hi = mid + else: lo = mid+1 + return lo + +cdef register_blosc_(): + cdef char *version + cdef char *date + + register_blosc(&version, &date) + compinfo = (version, date) + free(version) + free(date) + return compinfo[0].decode('ascii'), compinfo[1].decode('ascii') + +blosc_version = register_blosc_() + +cdef register_blosc2_(): + cdef char *version + cdef char *date + + register_blosc2(&version, &date) + compinfo = (version, date) + free(version) + free(date) + return compinfo[0].decode('ascii'), compinfo[1].decode('ascii') + +blosc2_version = register_blosc2_() + +blosc_init() # from 1.2 on, Blosc library must be initialized +blosc2_init() + +# Important: Blosc calls that modifies global variables in Blosc must be +# called from the same extension where Blosc is registered in HDF5. +def set_blosc_max_threads(nthreads): + """set_blosc_max_threads(nthreads) + + Set the maximum number of threads that Blosc can use. + + This actually overrides the :data:`tables.parameters.MAX_BLOSC_THREADS` + setting in :mod:`tables.parameters`, so the new value will be effective until + this function is called again or a new file with a different + :data:`tables.parameters.MAX_BLOSC_THREADS` value is specified. + + Returns the previous setting for maximum threads. + """ + return blosc_set_nthreads(nthreads) + + +# Important: Blosc2 calls that modifies global variables in Blosc2 must be +# called from the same extension where Blosc2 is registered in HDF5. +def set_blosc2_max_threads(nthreads): + """set_blosc2_max_threads(nthreads) + + Set the maximum number of threads that Blosc2 can use. + + This actually overrides the :data:`tables.parameters.MAX_BLOSC_THREADS` + setting in :mod:`tables.parameters`, so the new value will be effective until + this function is called again or a new file with a different + :data:`tables.parameters.MAX_BLOSC_THREADS` value is specified. + + Returns the previous setting for maximum threads. + """ + return blosc2_set_nthreads(nthreads) + + +# Initialize & register lzo +try: + import tables._comp_lzo + lzo_version = tables._comp_lzo.register_() + lzo_version = lzo_version if lzo_version else None +except ImportError: + lzo_version = None + +# Initialize & register bzip2 +try: + import tables._comp_bzip2 + bzip2_version = tables._comp_bzip2.register_() + bzip2_version = bzip2_version if bzip2_version else None +except ImportError: + bzip2_version = None + + +# End of initialization code +#--------------------------------------------------------------------- + +# Error handling helpers +cdef herr_t e_walk_cb(unsigned n, const H5E_error_t *err, void *data) noexcept with gil: + cdef object bt = data # list + #cdef char major_msg[256] + #cdef char minor_msg[256] + #cdef ssize_t msg_len + + if err == NULL: + return -1 + + #msg_len = H5Eget_msg(err.maj_num, NULL, major_msg, 256) + #if msg_len < 0: + # major_msg[0] = '\0' + + #msg_len = H5Eget_msg(err.min_num, NULL, minor_msg, 256) + #if msg_len < 0: + # minor_msg[0] = '\0' + + #msg = "%s (MAJOR: %s, MINOR: %s)" % ( + # bytes(err.desc).decode('utf-8'), + # bytes(major_msg).decode('utf-8'), + # bytes(minor_msg).decode('utf-8')) + + msg = bytes(err.desc).decode('utf-8') + + bt.append(( + bytes(err.file_name).decode('utf-8'), + err.line, + bytes(err.func_name).decode('utf-8'), + msg, + )) + + return 0 + + +def _dump_h5_backtrace(): + cdef object bt = [] + + if H5Ewalk(H5E_DEFAULT, H5E_WALK_DOWNWARD, e_walk_cb, bt) < 0: + return None + + return bt + + +# Initialization of the _dump_h5_backtrace method of HDF5ExtError. +# The unusual machinery is needed in order to avoid cirdular dependencies +# between modules. +HDF5ExtError._dump_h5_backtrace = staticmethod(_dump_h5_backtrace) + + +def silence_hdf5_messages(silence=True): + """silence_hdf5_messages(silence=True) + + Silence (or re-enable) messages from the HDF5 C library. + + The *silence* parameter can be used control the behaviour and reset + the standard HDF5 logging. + + .. versionadded:: 2.4 + + """ + cdef herr_t err + if silence: + err = H5Eset_auto(H5E_DEFAULT, NULL, NULL) + else: + err = H5Eset_auto(H5E_DEFAULT, H5Eprint, stderr) + if err < 0: + raise HDF5ExtError("unable to configure HDF5 internal error handling") + + + + +# Disable automatic HDF5 error logging +silence_hdf5_messages() + + +# Helper functions +cdef hsize_t *malloc_dims(object pdims): + """Return a malloced hsize_t dims from a python pdims.""" + + cdef int i, rank + cdef hsize_t *dims + + dims = NULL + rank = len(pdims) + if rank > 0: + dims = malloc(rank * sizeof(hsize_t)) + for i in range(rank): + dims[i] = pdims[i] + return dims + + +cdef hid_t get_native_float_type(hid_t type_id) nogil: + """Get a native type of an HDF5 float type. + + This function also handles half precision (float16) data type. + + """ + + cdef hid_t native_type_id + cdef size_t precision + + precision = H5Tget_precision(type_id) + + if precision == 16 and have_float16: + native_type_id = create_ieee_float16(NULL) + else: + native_type_id = H5Tget_native_type(type_id, H5T_DIR_DEFAULT) + + return native_type_id + + +# TODO: simplify this routine (now PyTables requires HDF5 >= 1.10.5) +# This routine is more complex than required because HDF5 1.6.x does +# not implement support for H5Tget_native_type with some types, like +# H5T_BITFIELD and probably others. When 1.8.x would be a requisite, +# this can be simplified. +cdef hid_t get_native_type(hid_t type_id) noexcept nogil: + """Get the native type of a HDF5 type.""" + + cdef H5T_class_t class_id, super_class_id + cdef hid_t native_type_id = 0, super_type_id, native_super_type_id + cdef int rank + cdef hsize_t *dims + + class_id = H5Tget_class(type_id) + if class_id == H5T_COMPOUND: + return H5Tget_native_type(type_id, H5T_DIR_DEFAULT) + + elif class_id in (H5T_ARRAY, H5T_VLEN): + # Get the array base component + super_type_id = H5Tget_super(type_id) + # Get the class + super_class_id = H5Tget_class(super_type_id) + if super_class_id == H5T_FLOAT: + # replicate the logic of H5Tget_native_type for H5T_ARRAY and + # H5T_VLEN taking into account extended floating point types + # XXX: HDF5 error check + native_super_type_id = get_native_float_type(super_type_id) + H5Tclose(super_type_id) + if class_id == H5T_ARRAY: + rank = H5Tget_array_ndims(type_id) + dims = malloc(rank * sizeof(hsize_t)) + H5Tget_array_dims(type_id, dims) + native_type_id = H5Tarray_create(native_super_type_id, rank, dims) + free(dims) + H5Tclose(native_super_type_id) + return native_type_id + elif class_id == H5T_VLEN: + native_type_id = H5Tvlen_create(native_super_type_id) + H5Tclose(native_super_type_id) + return native_type_id + class_id = super_class_id + H5Tclose(super_type_id) + + if class_id == H5T_FLOAT: + native_type_id = get_native_float_type(type_id) + elif class_id in (H5T_INTEGER, H5T_ENUM): + native_type_id = H5Tget_native_type(type_id, H5T_DIR_DEFAULT) + else: + # Fixing the byteorder for other types shouldn't be needed. + # More in particular, H5T_TIME is not managed yet by HDF5 and so this + # has to be managed explicitely inside the PyTables extensions. + # Regarding H5T_BITFIELD, well, I'm not sure if changing the byteorder + # of this is a good idea at all. + native_type_id = H5Tcopy(type_id) + + return native_type_id + + +def encode_filename(object filename): + """Return the encoded filename in the filesystem encoding.""" + + cdef bytes encname + + if hasattr(os, 'fspath'): + filename = os.fspath(filename) + + if isinstance(filename, (unicode, np.str_)): +# if type(filename) is unicode: + encoding = sys.getfilesystemencoding() + encname = filename.encode(encoding, 'replace') + else: + encname = filename + + return encname + + +# Main functions +def is_hdf5_file(object filename): + """is_hdf5_file(filename) + + Determine whether a file is in the HDF5 format. + + When successful, it returns a true value if the file is an HDF5 + file, false otherwise. If there were problems identifying the file, + an HDF5ExtError is raised. + + """ + + # Check that the file exists and is readable. + check_file_access(filename) + + # Encode the filename in case it is unicode + encname = encode_filename(filename) + + ret = H5Fis_hdf5(encname) + if ret < 0: + raise HDF5ExtError("problems identifying file ``%s``" % (filename,)) + return ret > 0 + + + + +def is_pytables_file(object filename): + """is_pytables_file(filename) + + Determine whether a file is in the PyTables format. + + When successful, it returns the format version string if the file is a + PyTables file, None otherwise. If there were problems identifying the + file, an HDF5ExtError is raised. + + """ + + cdef hid_t file_id + cdef object isptf = None # A PYTABLES_FORMAT_VERSION attribute was not found + + if is_hdf5_file(filename): + # Encode the filename in case it is unicode + encname = encode_filename(filename) + # The file exists and is HDF5, that's ok + # Open it in read-only mode + file_id = H5Fopen(encname, H5F_ACC_RDONLY, H5P_DEFAULT) + isptf = read_f_attr(file_id, 'PYTABLES_FORMAT_VERSION') + # Close the file + H5Fclose(file_id) + + # system attributes should always be str + if PyBytes_Check(isptf): + isptf = isptf.decode('utf-8') + + return isptf + + + + +def get_hdf5_version(): + """Get the underlying HDF5 library version""" + + return getHDF5VersionInfo()[1] + + +def which_lib_version(str name): + """which_lib_version(name) + + Get version information about a C library. + + If the library indicated by name is available, this function returns a + 3-tuple containing the major library version as an integer, its full version + as a string, and the version date as a string. If the library is not + available, None is returned. + + The currently supported library names are hdf5, zlib, lzo, bzip2, and blosc. If + another name is given, a ValueError is raised. + + """ + + cdef char *cname = NULL + cdef bytes encoded_name + + encoded_name = name.encode('utf-8') + # get the C pointer + cname = encoded_name + + libnames = ('hdf5', 'zlib', 'lzo', 'bzip2', 'blosc', 'blosc2') + + if strcmp(cname, "hdf5") == 0: + binver, strver = getHDF5VersionInfo() + return (binver, strver, None) # Should be always available + elif strcmp(cname, "zlib") == 0: + if zlib_imported: + return (1, zlib.ZLIB_VERSION, None) + elif strcmp(cname, "lzo") == 0: + if lzo_version: + (lzo_version_string, lzo_version_date) = lzo_version + return (lzo_version, lzo_version_string, lzo_version_date) + elif strcmp(cname, "bzip2") == 0: + if bzip2_version: + (bzip2_version_string, bzip2_version_date) = bzip2_version + return (bzip2_version, bzip2_version_string, bzip2_version_date) + elif strncmp(cname, "blosc2", 6) == 0: + if blosc2_version: + (blosc2_version_string, blosc2_version_date) = blosc2_version + return (blosc2_version, blosc2_version_string, blosc2_version_date) + elif strncmp(cname, "blosc", 5) == 0: + if blosc_version: + (blosc_version_string, blosc_version_date) = blosc_version + return (blosc_version, blosc_version_string, blosc_version_date) + else: + raise ValueError("asked version of unsupported library ``%s``; " + "supported library names are ``%s``" % (name, libnames)) + + # A supported library was specified, but no version is available. + return None + + + + +# A function returning all the compressors supported by Blosc +def blosc_compressor_list(): + """ + Returns a list of compressors available in the Blosc build. + + Parameters + ---------- + None + + Returns + ------- + out : list + The list of names. + """ + list_compr = blosc_list_compressors().decode() + clist = [str(cname) for cname in list_compr.split(',')] + return clist + + +# A function returning all the compressors supported by Blosc2 +def blosc2_compressor_list(): + """ + Returns a list of compressors available in the Blosc build. + + Parameters + ---------- + None + + Returns + ------- + out : list + The list of names. + """ + list_compr = blosc2_list_compressors().decode() + clist = [str(cname) for cname in list_compr.split(',')] + return clist + + +# Convert compressor code to compressor name +def blosc_compcode_to_compname_(compcode): + """ + Returns the compressor name associated with compressor code. + + Parameters + ---------- + None + + Returns + ------- + out : string + The name of the compressor. + """ + cdef const char *cname + cdef object compname + + compname = b"unknown (report this to developers)" + if blosc_compcode_to_compname(compcode, &cname) >= 0: + compname = cname + return compname.decode() + + +# Convert compressor code to compressor name +def blosc2_compcode_to_compname_(compcode): + """ + Returns the compressor name associated with compressor code. + + Parameters + ---------- + None + + Returns + ------- + out : string + The name of the compressor. + """ + cdef const char *cname + cdef object compname + + compname = b"unknown (report this to developers)" + if blosc2_compcode_to_compname(compcode, &cname) >= 0: + compname = cname + return compname.decode() + + +def blosc_get_complib_info_(): + """Get info from compression libraries included in Blosc. + + Returns a mapping containing the compressor names as keys and the + tuple (complib, version) as values. + + """ + + cdef char *complib + cdef char *version + + cinfo = {} + for name in blosc_list_compressors().split(b','): + ret = blosc_get_complib_info(name, &complib, &version) + if ret < 0: + continue + if isinstance(name, str): + cinfo[name] = (complib, version) + else: + cinfo[name.decode()] = (complib.decode(), version.decode()) + free(complib) + free(version) + + return cinfo + +def blosc2_get_complib_info_(): + """Get info from compression libraries included in Blosc2. + + Returns a mapping containing the compressor names as keys and the + tuple (complib, version) as values. + + """ + + cdef char *complib + cdef char *version + + cinfo = {} + for name in blosc2_list_compressors().split(b','): + ret = blosc2_get_complib_info(name, &complib, &version) + if ret < 0: + continue + if isinstance(name, str): + cinfo[name] = (complib, version) + else: + cinfo[name.decode()] = (complib.decode(), version.decode()) + free(complib) + free(version) + + return cinfo + + +def which_class(hid_t loc_id, object name): + """Detects a class ID using heuristics.""" + + cdef H5T_class_t class_id + cdef H5D_layout_t layout + cdef hsize_t nfields + cdef char *field_name1 + cdef char *field_name2 + cdef int i + cdef hid_t type_id, dataset_id + cdef object classId + cdef int rank + cdef hsize_t *dims + cdef hsize_t *maxdims + cdef char byteorder[11] # "irrelevant" fits easily here + cdef bytes encoded_name + + if isinstance(name, unicode): + encoded_name = name.encode('utf-8') + else: + encoded_name = name + + classId = "UNSUPPORTED" # default value + # Get The HDF5 class for the datatype in this dataset + class_id = getHDF5ClassID(loc_id, encoded_name, &layout, &type_id, + &dataset_id) + # Check if this a dataset of supported classtype for ARRAY + if ((class_id == H5T_INTEGER) or + (class_id == H5T_FLOAT) or + (class_id == H5T_BITFIELD) or + (class_id == H5T_TIME) or + (class_id == H5T_ENUM) or + (class_id == H5T_STRING) or + (class_id == H5T_ARRAY) or + (class_id == H5T_REFERENCE)): + if layout == H5D_CHUNKED: + if H5ARRAYget_ndims(dataset_id, &rank) < 0: + raise HDF5ExtError("Problems getting ndims.") + dims = malloc(rank * sizeof(hsize_t)) + maxdims = malloc(rank * sizeof(hsize_t)) + if H5ARRAYget_info(dataset_id, type_id, dims, maxdims, + &class_id, byteorder) < 0: + raise HDF5ExtError("Unable to get array info.") + classId = "CARRAY" + # Check whether some dimension is enlargeable + for i in range(rank): + if maxdims[i] == -1: + classId = "EARRAY" + break + free(dims) + free(maxdims) + else: + classId = "ARRAY" + + elif class_id == H5T_COMPOUND: + # check whether the type is complex or not + iscomplex = False + nfields = H5Tget_nmembers(type_id) + if nfields == 2: + field_name1 = H5Tget_member_name(type_id, 0) + field_name2 = H5Tget_member_name(type_id, 1) + # The pair ("r", "i") is for PyTables. ("real", "imag") for Octave. + if ( (strcmp(field_name1, "real") == 0 and + strcmp(field_name2, "imag") == 0) or + (strcmp(field_name1, "r") == 0 and + strcmp(field_name2, "i") == 0) ): + iscomplex = True + H5free_memory(field_name1) + H5free_memory(field_name2) + if layout == H5D_CHUNKED: + if iscomplex: + classId = "CARRAY" + else: + classId = "TABLE" + else: # Not chunked case + # Octave saves complex arrays as non-chunked tables + # with two fields: "real" and "imag" + # Francesc Alted 2005-04-29 + # Get number of records + if iscomplex: + classId = "ARRAY" # It is probably an Octave complex array + else: + # Added to support non-chunked tables + classId = "TABLE" # A test for supporting non-growable tables + + elif class_id == H5T_VLEN: + if layout == H5D_CHUNKED: + classId = "VLARRAY" + + # Release the datatype. + H5Tclose(type_id) + + # Close the dataset. + H5Dclose(dataset_id) + + # Fallback + return classId + + + + +def get_nested_field(recarray, fieldname): + """Get the maybe nested field named `fieldname` from the `recarray`. + + The `fieldname` may be a simple field name or a nested field name + with slash-separated components. + + """ + + if not isinstance(fieldname, str): + raise TypeError + + cdef bytes name = fieldname.encode('utf-8') + try: + if strchr(name, 47) != NULL: # ord('/') == 47 + # It may be convenient to implement this way of descending nested + # fields into the ``__getitem__()`` method of a subclass of + # ``numpy.ndarray``. -- ivb + field = recarray + for nfieldname in fieldname.split('/'): + field = field[nfieldname] + else: + # Faster method for non-nested columns + field = recarray[fieldname] + except KeyError: + raise KeyError("no such column: %s" % (fieldname,)) + return field + + + + +def read_f_attr(hid_t file_id, str attr_name): + """Read PyTables file attributes (i.e. in root group). + + Returns the value of the `attr_name` attribute in root group, or `None` + if it does not exist. This call cannot fail. + + """ + + cdef size_t size + cdef char *attr_value + cdef int cset = H5T_CSET_ASCII + cdef object retvalue + cdef bytes encoded_attr_name + cdef char *c_attr_name = NULL + + encoded_attr_name = attr_name.encode('utf-8') + # Get the C pointer + c_attr_name = encoded_attr_name + + attr_value = NULL + retvalue = None + # Check if attribute exists + if H5ATTRfind_attribute(file_id, c_attr_name): + # Read the attr_name attribute + size = H5ATTRget_attribute_string(file_id, c_attr_name, &attr_value, &cset) + if size == 0: + if cset == H5T_CSET_UTF8: + retvalue = np.str_('') + else: + retvalue = np.bytes_(b'') + else: + retvalue = (attr_value).rstrip(b'\x00') + if cset == H5T_CSET_UTF8: + retvalue = retvalue.decode('utf-8') + retvalue = np.str_(retvalue) + else: + retvalue = np.bytes_(retvalue) # bytes + + # Important to release attr_value, because it has been malloc'ed! + if attr_value: + free(attr_value) + + return retvalue + + +def get_filters(parent_id, name): + """Get a dictionary with the filter names and cd_values""" + + cdef bytes encoded_name + + encoded_name = name.encode('utf-8') + + return get_filter_names(parent_id, encoded_name) + + + + +# This is used by several ._convert_types() methods. +def get_type_enum(hid_t h5type): + """_getTypeEnum(h5type) -> hid_t + + Get the native HDF5 enumerated type of `h5type`. + + If `h5type` is an enumerated type, it is returned. If it is a + variable-length type with an enumerated base type, this is returned. If it + is a multi-dimensional type with an enumerated base type, this is returned. + Else, a ``TypeError`` is raised. + + """ + + cdef H5T_class_t typeClass + cdef hid_t enumId, enumId2 + + typeClass = H5Tget_class(h5type) + if typeClass < 0: + raise HDF5ExtError("failed to get class of HDF5 type") + + if typeClass == H5T_ENUM: + # Get the native type (in order to do byteorder conversions automatically) + enumId = H5Tget_native_type(h5type, H5T_DIR_DEFAULT) + elif typeClass in (H5T_ARRAY, H5T_VLEN): + # The field is multi-dimensional or variable length. + enumId2 = H5Tget_super(h5type) + enumId = get_type_enum(enumId2) + H5Tclose(enumId2) + else: + raise TypeError( + "enumerated values can not be stored using the given type") + return enumId + + + +def enum_from_hdf5(hid_t enumId, str byteorder): + """enum_from_hdf5(enumId) -> (Enum, npType) + + Convert an HDF5 enumerated type to a PyTables one. + + This function takes an HDF5 enumerated type and returns an `Enum` + instance built from that, and the NumPy type used to encode it. + + """ + + cdef hid_t baseId + cdef int nelems, npenum, i + cdef void *rbuf + cdef char *ename + cdef ndarray npvalue + cdef object dtype + cdef str pyename + + # Find the base type of the enumerated type, and get the atom + baseId = H5Tget_super(enumId) + atom = atom_from_hdf5_type(baseId) + H5Tclose(baseId) + if atom.kind not in ('int', 'uint'): + raise NotImplementedError("sorry, only integer concrete values are " + "supported at this moment") + + dtype = atom.dtype + npvalue = np.array((0,), dtype=dtype) + rbuf = PyArray_DATA(npvalue) + + # Get the name and value of each of the members + # and put the pair in `enumDict`. + enumDict = {} + + nelems = H5Tget_nmembers(enumId) + if enumId < 0: + raise HDF5ExtError( + "failed to get element count of HDF5 enumerated type") + + for i in range(nelems): + ename = H5Tget_member_name(enumId, i) + if ename == NULL: + raise HDF5ExtError( + "failed to get element name from HDF5 enumerated type") + + pyename = cstr_to_pystr(ename) + + H5free_memory(ename) + + if H5Tget_member_value(enumId, i, rbuf) < 0: + raise HDF5ExtError( + "failed to get element value from HDF5 enumerated type") + + enumDict[pyename] = npvalue[0] # converted to NumPy scalar + + # Build an enumerated type from `enumDict` and return it. + return Enum(enumDict), dtype + + + + +def enum_to_hdf5(object enum_atom, str byteorder): + """Convert a PyTables enumerated type to an HDF5 one. + + This function creates an HDF5 enumerated type from the information + contained in `enumAtom` (an ``Atom`` object), with the specified + `byteorder` (a string). The resulting HDF5 enumerated type is + returned. + + """ + + cdef hid_t base_id, enum_id + cdef object base_atom + cdef ndarray values + + # Get the base HDF5 type and create the enumerated type. + base_atom = Atom.from_dtype(enum_atom.dtype.base) + base_id = atom_to_hdf5_type(base_atom, byteorder) + + try: + enum_id = H5Tenum_create(base_id) + if enum_id < 0: + raise HDF5ExtError("failed to create HDF5 enumerated type") + + finally: + if H5Tclose(base_id) < 0: + raise HDF5ExtError("failed to close HDF5 base type") + + try: + # Set the name and value of each of the members. + names = enum_atom._names + values = enum_atom._values + + # This saves the default enum value first so that we can restore it + default_name = enum_atom._defname + index_default = names.index(default_name) + H5Tenum_insert(enum_id, default_name.encode('utf-8'), + PyArray_GETPTR1(values, index_default)) + + for i, n in enumerate(names): + # Skip the default value as we have already inserted it before + if i == index_default: + continue + + if H5Tenum_insert(enum_id, n.encode('utf-8'), + PyArray_GETPTR1(values, i)) < 0: + raise HDF5ExtError("failed to insert value into HDF5 enumerated type") + + # Return the new, open HDF5 enumerated type. + return enum_id + + except: + if H5Tclose(enum_id) < 0: + raise HDF5ExtError("failed to close HDF5 enumerated type") + + raise + + +def atom_to_hdf5_type(atom, str byteorder): + cdef hid_t tid = -1 + cdef hid_t tid2 = -1 + cdef hsize_t *dims = NULL + cdef bytes encoded_byteorder + cdef char *cbyteorder = NULL + + encoded_byteorder = byteorder.encode('utf-8') + # Get the C pointer + cbyteorder = encoded_byteorder + + # Create the base HDF5 type + if atom.type in pttype_to_hdf5: + tid = H5Tcopy(pttype_to_hdf5[atom.type]) + # Fix the byteorder + if atom.kind != 'time': + set_order(tid, cbyteorder) + elif atom.type == 'float16': + tid = create_ieee_float16(cbyteorder) + elif atom.kind in pt_special_kinds: + # Special cases (the byteorder doesn't need to be fixed afterwards) + if atom.type == 'complex64': + tid = create_ieee_complex64(cbyteorder) + elif atom.type == 'complex128': + tid = create_ieee_complex128(cbyteorder) + elif atom.type == 'complex192': + tid = create_ieee_complex192(cbyteorder) + elif atom.type == 'complex256': + tid = create_ieee_complex256(cbyteorder) + elif atom.kind == 'string': + tid = H5Tcopy(H5T_C_S1); + H5Tset_size(tid, atom.itemsize) + elif atom.kind == 'bool': + tid = H5Tcopy(H5T_STD_B8); + elif atom.kind == 'enum': + tid = enum_to_hdf5(atom, byteorder) + else: + raise TypeError("Invalid type for atom %s" % (atom,)) + # Create an H5T_ARRAY in case of non-scalar atoms + if atom.shape != (): + dims = malloc_dims(atom.shape) + tid2 = H5Tarray_create(tid, len(atom.shape), dims) + free(dims) + H5Tclose(tid) + tid = tid2 + + return tid + + + + +def load_enum(hid_t type_id): + """load_enum() -> (Enum, npType) + + Load the enumerated HDF5 type associated with this type_id. + + It returns an `Enum` instance built from that, and the + NumPy type used to encode it. + + """ + + cdef hid_t enumId + cdef char c_byteorder[11] # "irrelevant" fits well here + cdef str byteorder + + # Get the enumerated type + enumId = get_type_enum(type_id) + + # Get the byteorder + get_order(type_id, c_byteorder) + byteorder = cstr_to_pystr(c_byteorder) + # Get the Enum and NumPy types and close the HDF5 type. + try: + return enum_from_hdf5(enumId, byteorder) + finally: + # (Yes, the ``finally`` clause *is* executed.) + if H5Tclose(enumId) < 0: + raise HDF5ExtError("failed to close HDF5 enumerated type") + + + +def hdf5_to_np_nested_type(hid_t type_id): + """Given a HDF5 `type_id`, return a dtype string representation of it.""" + + cdef hid_t member_type_id + cdef hid_t member_offset + cdef hsize_t nfields + cdef int i + cdef char *c_colname + cdef H5T_class_t class_id + cdef object desc + cdef str colname + + desc = {} + # Get the number of members + nfields = H5Tget_nmembers(type_id) + # Iterate thru the members + for i in range(nfields): + # Get the member name + c_colname = H5Tget_member_name(type_id, i) + colname = cstr_to_pystr(c_colname) + + # Get the member type + member_type_id = H5Tget_member_type(type_id, i) + member_offset = H5Tget_member_offset(type_id, i) + + # Get the HDF5 class + class_id = H5Tget_class(member_type_id) + if class_id == H5T_COMPOUND and not is_complex(member_type_id): + desc[colname] = hdf5_to_np_nested_type(member_type_id) + desc[colname]["_v_pos"] = i + desc[colname]["_v_offset"] = member_offset + else: + atom = atom_from_hdf5_type(member_type_id, pure_numpy_types=True) + desc[colname] = Col.from_atom(atom, pos=i, _offset=member_offset) + + # Release resources + H5Tclose(member_type_id) + H5free_memory(c_colname) + + return desc + + + +def hdf5_to_np_ext_type(hid_t type_id, pure_numpy_types=True, atom=False, ptparams=None): + """Map the atomic HDF5 type to a string repr of NumPy extended codes. + + If `pure_numpy_types` is true, detected HDF5 types that does not match pure + NumPy types will raise a ``TypeError`` exception. If not, HDF5 types like + TIME, VLEN or ENUM are passed through. + + If `atom` is true, the resulting repr is meant for atoms. If not, the + result is meant for attributes. + + Returns the string repr of type and its shape. The exception is for + compounds types, that returns a NumPy dtype and shape instead. + + """ + + cdef H5T_sign_t sign + cdef hid_t super_type_id, native_type_id + cdef H5T_class_t class_id + cdef size_t itemsize + cdef object stype, shape, shape2 + cdef hsize_t *dims + + # default shape + shape = () + # Get the HDF5 class + class_id = H5Tget_class(type_id) + # Get the itemsize + itemsize = H5Tget_size(type_id) + + if class_id == H5T_BITFIELD: + stype = "b1" + elif class_id == H5T_INTEGER: + # Get the sign + sign = H5Tget_sign(type_id) + if sign > 0: + stype = "i%s" % itemsize + else: + stype = "u%s" % itemsize + elif class_id == H5T_FLOAT: + stype = "f%s" % itemsize + elif class_id == H5T_COMPOUND: + if is_complex(type_id): + stype = "c%s" % itemsize + else: + if atom: + raise TypeError("the HDF5 class ``%s`` is not supported yet" + % hdf5_class_to_string[class_id]) + desc = Description(hdf5_to_np_nested_type(type_id), ptparams=ptparams) + # stype here is not exactly a string, but the NumPy dtype factory + # will deal with this. + stype = desc._v_dtype + elif class_id == H5T_STRING: + if H5Tis_variable_str(type_id): + raise TypeError("variable length strings are not supported yet") + stype = "S%s" % itemsize + elif class_id == H5T_TIME: + if pure_numpy_types: + raise TypeError("the HDF5 class ``%s`` is not supported yet" + % hdf5_class_to_string[class_id]) + stype = "t%s" % itemsize + elif class_id == H5T_ENUM: + if pure_numpy_types: + raise TypeError("the HDF5 class ``%s`` is not supported yet" + % hdf5_class_to_string[class_id]) + stype = "e" + elif class_id == H5T_VLEN: + if pure_numpy_types: + raise TypeError("the HDF5 class ``%s`` is not supported yet" + % hdf5_class_to_string[class_id]) + # Get the variable length base component + super_type_id = H5Tget_super(type_id) + # Find the super member format + stype, shape = hdf5_to_np_ext_type(super_type_id, pure_numpy_types) + # Release resources + H5Tclose(super_type_id) + elif class_id == H5T_REFERENCE: + # only standard referenced objects (for atoms) are now supported + if not atom or not H5Tequal(type_id, H5T_STD_REF_OBJ): + raise TypeError("the HDF5 class ``%s`` is not supported yet" + % hdf5_class_to_string[class_id]) + stype = "_ref_" + elif class_id == H5T_ARRAY: + # Get the array base component + super_type_id = H5Tget_super(type_id) + # Find the super member format + stype, shape2 = hdf5_to_np_ext_type(super_type_id, pure_numpy_types) + # Get shape + shape = [] + ndims = H5Tget_array_ndims(type_id) + dims = malloc(ndims * sizeof(hsize_t)) + H5Tget_array_dims(type_id, dims) + for i in range(ndims): + shape.append(dims[i]) # cast to avoid long representation (i.e. 2L) + shape = tuple(shape) + # Release resources + free(dims) + H5Tclose(super_type_id) + else: + # Other types are not supported yet + raise TypeError("the HDF5 class ``%s`` is not supported yet" + % hdf5_class_to_string[class_id]) + + return stype, shape + + + + +def atom_from_hdf5_type(hid_t type_id, pure_numpy_types=False): + """Get an atom from a type_id. + + See `hdf5_to_np_ext_type` for an explanation of the `pure_numpy_types` + parameter. + + """ + + cdef object stype, shape, atom_, sctype, tsize, kind + cdef object dflt, base, enum_, nptype + + stype, shape = hdf5_to_np_ext_type(type_id, pure_numpy_types, atom=True) + # Create the Atom + if stype == '_ref_': + atom_ = ReferenceAtom(shape=shape) + elif stype == 'e': + (enum_, nptype) = load_enum(type_id) + # Take one of the names as the default in the enumeration. + dflt = next(iter(enum_))[0] + base = Atom.from_dtype(nptype) + atom_ = EnumAtom(enum_, dflt, base, shape=shape) + else: + kind = npext_prefixes_to_ptkinds[stype[0]] + tsize = int(stype[1:]) + atom_ = Atom.from_kind(kind, tsize, shape=shape) + + return atom_ + + + +def create_nested_type(object desc, str byteorder): + """Create a nested type based on a description and return an HDF5 type.""" + + cdef hid_t tid, tid2 + cdef size_t offset + cdef bytes encoded_name + + tid = H5Tcreate(H5T_COMPOUND, desc._v_itemsize) + if tid < 0: + return -1 + + offset = desc._v_offsets[0] if desc._v_offsets else 0 + for i, k in enumerate(desc._v_names): + obj = desc._v_colobjects[k] + if isinstance(obj, Description): + tid2 = create_nested_type(obj, byteorder) + else: + tid2 = atom_to_hdf5_type(obj, byteorder) + encoded_name = k.encode('utf-8') + if desc._v_offsets: + offset = desc._v_offsets[i] + H5Tinsert(tid, encoded_name, offset, tid2) + if not desc._v_offsets: + offset += desc._v_dtype[k].itemsize + # Release resources + H5Tclose(tid2) + + return tid + + +cdef int load_reference(hid_t dataset_id, hobj_ref_t *refbuf, size_t item_size, ndarray nparr) except -1: + """Load a reference as an array of objects + :param dataset_id: dataset of the reference + :param refbuf: load the references requested + :param item_size: size of the reference in the file read into refbuf + :param nparr: numpy object array already pre-allocated with right size and shape for refbuf references + """ + cdef size_t nelements = nparr.size + cdef int i, j + cdef hid_t refobj_id = -1 # if valid can be only be a dataset id + cdef hid_t reftype_id + cdef hid_t disk_type_id = -1 + cdef void *rbuf + cdef int rank = 0 + cdef hsize_t *maxdims = NULL + cdef hsize_t *dims = NULL + cdef char cbyteorder[11] + cdef H5T_class_t class_id + cdef hsize_t nrows + cdef ndarray nprefarr + cdef int extdim + cdef hobj_ref_t *newrefbuf = NULL + + + if refbuf == NULL: + raise ValueError("Invalid reference buffer") + + try: + + for i in range(nelements): + refobj_id = H5Rdereference(dataset_id, H5P_DEFAULT, H5R_OBJECT, &refbuf[i]) + if H5Iget_type(refobj_id) != H5I_DATASET: + raise ValueError('Invalid reference type %d %d' % (H5Iget_type(refobj_id), item_size)) + disk_type_id = H5Dget_type(refobj_id) + reftype_id = get_native_type(disk_type_id) + # Get the rank for this array object + if H5ARRAYget_ndims(refobj_id, &rank) < 0: + raise HDF5ExtError("Problems getting ndims!") + + dims = malloc(rank * sizeof(hsize_t)) + maxdims = malloc(rank * sizeof(hsize_t)) + # Get info on dimensions, class and type (of base class) + ret = H5ARRAYget_info(refobj_id, disk_type_id, + dims, maxdims, + &class_id, cbyteorder) + if ret < 0: + raise HDF5ExtError("Unable to get array info.") + + # Get the extendable dimension (if any) + extdim = -1 # default is non-extensible Array + for j in range(rank): + if maxdims[j] == -1: + extdim = j + break + if extdim < 0: + extdim += rank + + nrows = dims[extdim] + + # read entire dataset as numpy array + stype_, shape_ = hdf5_to_np_ext_type(reftype_id, pure_numpy_types=True, atom=True) + if stype_ == "_ref_": + dtype_ = np.dtype(("O", shape_)) + else: + dtype_ = np.dtype((stype_, shape_)) + shape = [] + for j in range(rank): + shape.append(dims[j]) + shape = tuple(shape) + + nprefarr = np.empty(dtype=dtype_, shape=shape) + nparr[i] = [nprefarr] # box the array in a list to store it as one object + if stype_ == "_ref_": + newrefbuf = malloc(nprefarr.size * item_size) + rbuf = newrefbuf + else: + rbuf = PyArray_DATA(nprefarr) + + # Do the physical read + with nogil: + ret = H5ARRAYread(refobj_id, reftype_id, 0, nrows, 1, extdim, rbuf) + if ret < 0: + raise HDF5ExtError("Problems reading the array data.") + + if stype_ == "_ref_": + # recurse to read the reference + load_reference(refobj_id, newrefbuf, item_size, nprefarr) + + # close objects + if newrefbuf: + free(newrefbuf) + newrefbuf = NULL + H5Oclose(refobj_id) + refobj_id = -1 + H5Tclose(reftype_id) + reftype_id = -1 + H5Tclose(disk_type_id) + disk_type_id = -1 + free(maxdims) + maxdims = NULL + free(dims) + dims = NULL + finally: + if newrefbuf: + free(newrefbuf) + newrefbuf = NULL + if refobj_id >= 0: + H5Oclose(refobj_id) + if reftype_id >= 0: + H5Tclose(reftype_id) + if disk_type_id >= 0: + H5Tclose(disk_type_id) + if maxdims: + free(maxdims) + if dims: + free(dims) + + # no error + return 0 + +## Local Variables: +## mode: python +## py-indent-offset: 2 +## tab-width: 2 +## fill-column: 78 +## End: diff --git a/venv/Lib/site-packages/tables/vlarray.py b/venv/Lib/site-packages/tables/vlarray.py new file mode 100644 index 0000000..91cecf1 --- /dev/null +++ b/venv/Lib/site-packages/tables/vlarray.py @@ -0,0 +1,916 @@ +"""Here is defined the VLArray class.""" + +from __future__ import annotations + +import sys +import operator +from typing import Any, NoReturn, TYPE_CHECKING +from collections.abc import Sequence + +import numpy as np +import numpy.typing as npt + +from . import hdf5extension +from .atom import ObjectAtom, VLStringAtom, VLUnicodeAtom +from .leaf import Leaf, calc_chunksize +from .utils import ( + convert_to_np_atom, + convert_to_np_atom2, + idx2long, + correct_byteorder, + SizeType, + is_idx, + lazyattr, +) +from .flavor import internal_to_flavor + +if TYPE_CHECKING: + from .atom import Atom, Enum + from .group import Group + from .filters import Filters + +# default version for VLARRAY objects +# obversion = "1.0" # initial version +# obversion = "1.0" # add support for complex datatypes +# obversion = "1.1" # This adds support for time datatypes. +# obversion = "1.2" # This adds support for enumerated datatypes. +# obversion = "1.3" # Introduced 'PSEUDOATOM' attribute. +obversion = "1.4" # Numeric and numarray flavors are gone. + + +class VLArray(hdf5extension.VLArray, Leaf): + """This class represents variable length (ragged) arrays in an HDF5 file. + + Instances of this class represent array objects in the object tree + with the property that their rows can have a *variable* number of + homogeneous elements, called *atoms*. Like Table datasets (see + :ref:`TableClassDescr`), variable length arrays can have only one + dimension, and the elements (atoms) of their rows can be fully + multidimensional. + + When reading a range of rows from a VLArray, you will *always* get + a Python list of objects of the current flavor (each of them for a + row), which may have different lengths. + + This class provides methods to write or read data to or from + variable length array objects in the file. Note that it also + inherits all the public attributes and methods that Leaf (see + :ref:`LeafClassDescr`) already provides. + + .. note:: + + VLArray objects also support compression although compression + is only performed on the data structures used internally by + the HDF5 to take references of the location of the variable + length data. Data itself (the raw data) are not compressed + or filtered. + + Please refer to the `VLTypes Technical Note + `_ + for more details on the topic. + + Parameters + ---------- + parentnode + The parent :class:`Group` object. + name : str + The name of this node in its parent group. + atom + An `Atom` instance representing the *type* and *shape* of the atomic + objects to be saved. + title + A description for this node (it sets the ``TITLE`` HDF5 attribute on + disk). + filters + An instance of the `Filters` class that provides information about the + desired I/O filters to be applied during the life of this object. + expectedrows + A user estimate about the number of row elements that will + be added to the growable dimension in the `VLArray` node. + If not provided, the default value is ``EXPECTED_ROWS_VLARRAY`` + (see ``tables/parameters.py``). If you plan to create either + a much smaller or a much bigger `VLArray` try providing a guess; + this will optimize the HDF5 B-Tree creation and management + process time and the amount of memory used. + + .. versionadded:: 3.0 + + chunkshape + The shape of the data chunk to be read or written in a single HDF5 I/O + operation. Filters are applied to those chunks of data. The + dimensionality of `chunkshape` must be 1. If ``None``, a sensible + value is calculated (which is recommended). + byteorder + The byteorder of the data *on disk*, specified as 'little' or 'big'. + If this is not specified, the byteorder is that of the platform. + + track_times + Whether time data associated with the leaf are recorded (object + access time, raw data modification time, metadata change time, object + birth time); default True. Semantics of these times depend on their + implementation in the HDF5 library: refer to documentation of the + H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata + change time) is implemented. + + .. versionadded:: 3.4.3 + + + .. versionchanged:: 3.0 + *parentNode* renamed into *parentnode*. + + .. versionchanged:: 3.0 + The *expectedsizeinMB* parameter has been replaced by *expectedrows*. + + Examples + -------- + See below a small example of the use of the VLArray class. The code is + available in :file:`examples/vlarray1.py`:: + + import numpy as np + import tables as tb + + # Create a VLArray: + fileh = tb.open_file('vlarray1.h5', mode='w') + vlarray = fileh.create_vlarray( + fileh.root, + 'vlarray1', + tb.Int32Atom(shape=()), + "ragged array of ints", + filters=tb.Filters(1)) + + # Append some (variable length) rows: + vlarray.append(np.array([5, 6])) + vlarray.append(np.array([5, 6, 7])) + vlarray.append([5, 6, 9, 8]) + + # Now, read it through an iterator: + print('-->', vlarray.title) + for x in vlarray: + print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, x)) + + # Now, do the same with native Python strings. + vlarray2 = fileh.create_vlarray( + fileh.root, + 'vlarray2', + tb.StringAtom(itemsize=2), + "ragged array of strings", + filters=tb.Filters(1)) + vlarray2.flavor = 'python' + + # Append some (variable length) rows: + print('-->', vlarray2.title) + vlarray2.append(['5', '66']) + vlarray2.append(['5', '6', '77']) + vlarray2.append(['5', '6', '9', '88']) + + # Now, read it through an iterator: + for x in vlarray2: + print('%s[%d]--> %s' % (vlarray2.name, vlarray2.nrow, x)) + + # Close the file. + fileh.close() + + The output for the previous script is something like:: + + --> ragged array of ints + vlarray1[0]--> [5 6] + vlarray1[1]--> [5 6 7] + vlarray1[2]--> [5 6 9 8] + --> ragged array of strings + vlarray2[0]--> ['5', '66'] + vlarray2[1]--> ['5', '6', '77'] + vlarray2[2]--> ['5', '6', '9', '88'] + + + .. rubric:: VLArray attributes + + The instance variables below are provided in addition to those in + Leaf (see :ref:`LeafClassDescr`). + + .. attribute:: atom + + An Atom (see :ref:`AtomClassDescr`) + instance representing the *type* and + *shape* of the atomic objects to be + saved. You may use a *pseudo-atom* for + storing a serialized object or variable length string per row. + + .. attribute:: flavor + + The type of data object read from this leaf. + + Please note that when reading several rows of VLArray data, + the flavor only applies to the *components* of the returned + Python list, not to the list itself. + + .. attribute:: nrow + + On iterators, this is the index of the current row. + + .. attribute:: nrows + + The current number of rows in the array. + + .. attribute:: extdim + + The index of the enlargeable dimension (always 0 for vlarrays). + + """ + + # Class identifier. + _c_classid = "VLARRAY" + + @lazyattr + def dtype(self) -> np.dtype: + """Return the NumPy ``dtype`` that most closely matches this array.""" + return self.atom.dtype + + @property + def shape(self) -> tuple[int]: + """Return the shape of the stored array.""" + return (self.nrows,) + + @property + def size_on_disk(self) -> NoReturn: + """Return the size on disk of the `VLArray` object. + + The HDF5 library does not include a function to determine size_on_disk + for variable-length arrays. Accessing this attribute will raise a + NotImplementedError. + """ + raise NotImplementedError("size_on_disk not implemented for VLArrays") + + @property + def size_in_memory(self) -> int: + """Size of the array's data in bytes. + + .. note:: + + When data is stored in a VLArray using the ObjectAtom type, + it is first serialized using pickle, and then converted to + a NumPy array suitable for storage in an HDF5 file. + This attribute will return the size of that NumPy + representation. If you wish to know the size of the Python + objects after they are loaded from disk, you can use this + `ActiveState recipe + `_. + """ + return self._get_memory_size() + + def __init__( + self, + parentnode: Group, + name: str, + atom: Atom | None = None, + title: str = "", + filters: Filters | None = None, + expectedrows: int | None = None, + chunkshape: tuple[int, ...] | None = None, + byteorder: str | None = None, + _log: bool = True, + track_times: bool = True, + ) -> None: + + self._v_version: str | None = None + """The object version of this array.""" + + self._v_new = new = atom is not None + """Is this the first time the node has been created?""" + + self._v_new_title = title + """New title for this node.""" + + self._v_new_filters = filters + """New filter properties for this array.""" + + if expectedrows is None: + expectedrows = parentnode._v_file.params["EXPECTED_ROWS_VLARRAY"] + self._v_expectedrows = int(expectedrows) + """The expected number of rows to be stored in the array. + + .. versionadded:: 3.0 + + """ + + self._v_chunkshape: tuple[int, ...] | None = None + """Private storage for the `chunkshape` property of Leaf.""" + + # Miscellaneous iteration rubbish. + self._start: int | None = None + """Starting row for the current iteration.""" + + self._stop: int | None = None + """Stopping row for the current iteration.""" + + self._step: int | None = None + """Step size for the current iteration.""" + + self._nrowsread: int | None = None + """Number of rows read up to the current state of iteration.""" + + self._startb: int | None = None + """Starting row for current buffer.""" + + self._stopb: int | None = None + """Stopping row for current buffer. """ + + self._row: int | None = None + """Current row in iterators (sentinel).""" + + self._init = False + """Whether we are in the middle of an iteration or not (sentinel).""" + + self.listarr: npt.ArrayLike | None = None + """Current buffer in iterators.""" + + # Documented (*public*) attributes. + self.atom = atom + """ + An Atom (see :ref:`AtomClassDescr`) instance representing the + *type* and *shape* of the atomic objects to be saved. You may + use a *pseudo-atom* for storing a serialized object or + variable length string per row. + """ + self.nrow: int | None = None + """On iterators, this is the index of the current row.""" + + self.nrows: int | None = None + """The current number of rows in the array.""" + + self.extdim = 0 # VLArray only have one dimension currently + """The index of the enlargeable dimension (always 0 for vlarrays).""" + + # Check the chunkshape parameter + if new and chunkshape is not None: + if isinstance(chunkshape, (int, np.integer)): + chunkshape = (chunkshape,) + try: + chunkshape = tuple(chunkshape) + except TypeError: + raise TypeError( + "`chunkshape` parameter must be an integer or sequence " + "and you passed a %s" % type(chunkshape) + ) + if len(chunkshape) != 1: + raise ValueError( + f"`chunkshape` rank (length) must be 1: {chunkshape!r}" + ) + self._v_chunkshape = tuple(SizeType(s) for s in chunkshape) + + super().__init__( + parentnode, name, new, filters, byteorder, _log, track_times + ) + + def _g_post_init_hook(self) -> None: + super()._g_post_init_hook() + self.nrowsinbuf = 100 # maybe enough for most applications + + # This is too specific for moving it into Leaf + def _calc_chunkshape(self, expectedrows: int) -> tuple[int]: + """Calculate the size for the HDF5 chunk.""" + # For computing the chunkshape for HDF5 VL types, we have to + # choose the itemsize of the *each* element of the atom and + # not the size of the entire atom. I don't know why this + # should be like this, perhaps I should report this to the + # HDF5 list. + # F. Alted 2006-11-23 + # elemsize = self.atom.atomsize() + elemsize = self._basesize + + # AV 2013-05-03 + # This is just a quick workaround tha allows to change the API for + # PyTables 3.0 release and remove the expected_mb parameter. + # The algorithm for computing the chunkshape should be rewritten as + # requested by gh-35. + expected_mb = expectedrows * elemsize / 1024**2 + + chunksize = calc_chunksize(expected_mb) + + # Set the chunkshape + chunkshape = chunksize // elemsize + # Safeguard against itemsizes being extremely large + if chunkshape == 0: + chunkshape = 1 + return (SizeType(chunkshape),) + + def _g_create(self) -> int: + """Create a variable length array (ragged array).""" + atom = self.atom + self._v_version = obversion + # Check for zero dims in atom shape (not allowed in VLArrays) + zerodims = np.sum(np.array(atom.shape) == 0) + if zerodims > 0: + raise ValueError( + "When creating VLArrays, none of the dimensions " + "of the Atom instance can be zero." + ) + + if not hasattr(atom, "size"): # it is a pseudo-atom + self._atomicdtype = atom.base.dtype + self._atomicsize = atom.base.size + self._basesize = atom.base.itemsize + else: + self._atomicdtype = atom.dtype + self._atomicsize = atom.size + self._basesize = atom.itemsize + self._atomictype = atom.type + self._atomicshape = atom.shape + + # Compute the optimal chunkshape, if needed + if self._v_chunkshape is None: + self._v_chunkshape = self._calc_chunkshape(self._v_expectedrows) + + self.nrows = SizeType(0) # No rows at creation time + + # Correct the byteorder if needed + if self.byteorder is None: + self.byteorder = correct_byteorder(atom.type, sys.byteorder) + + # After creating the vlarray, ``self._v_objectid`` needs to be + # set because it is needed for setting attributes afterwards. + self._v_objectid = self._create_array(self._v_new_title) + + # Add an attribute in case we have a pseudo-atom so that we + # can retrieve the proper class after a re-opening operation. + if not hasattr(atom, "size"): # it is a pseudo-atom + self.attrs.PSEUDOATOM = atom.kind + + return self._v_objectid + + def _g_open(self) -> int: + """Get the metadata info for an array in file.""" + self._v_objectid, self.nrows, self._v_chunkshape, atom = ( + self._open_array() + ) + + # Check if the atom can be a PseudoAtom + if "PSEUDOATOM" in self.attrs: + kind = self.attrs.PSEUDOATOM + if kind == "vlstring": + atom = VLStringAtom() + elif kind == "vlunicode": + atom = VLUnicodeAtom() + elif kind == "object": + atom = ObjectAtom() + else: + raise ValueError("pseudo-atom name ``%s`` not known." % kind) + elif self._v_file.format_version[:1] == "1": + flavor1x = self.attrs.FLAVOR + if flavor1x == "VLString": + atom = VLStringAtom() + elif flavor1x == "Object": + atom = ObjectAtom() + + self.atom = atom + return self._v_objectid + + def _getnobjects(self, nparr: np.ndarray) -> int: + """Return the number of objects in a NumPy array.""" + # Check for zero dimensionality array + zerodims = np.sum(np.array(nparr.shape) == 0) + if zerodims > 0: + # No objects to be added + return 0 + shape = nparr.shape + atom_shape = self.atom.shape + shapelen = len(nparr.shape) + if isinstance(atom_shape, tuple): + atomshapelen = len(self.atom.shape) + else: + atom_shape = (self.atom.shape,) + atomshapelen = 1 + diflen = shapelen - atomshapelen + if shape == atom_shape: + nobjects = 1 + elif diflen == 1 and shape[diflen:] == atom_shape: + # Check if the leading dimensions are all ones + # if shape[:diflen-1] == (1,)*(diflen-1): + # nobjects = shape[diflen-1] + # shape = shape[diflen:] + # It's better to accept only inputs with the exact dimensionality + # i.e. a dimensionality only 1 element larger than atom + nobjects = shape[0] + shape = shape[1:] + elif atom_shape == (1,) and shapelen == 1: + # Case where shape = (N,) and shape_atom = 1 or (1,) + nobjects = shape[0] + else: + raise ValueError( + "The object '%s' is composed of elements with " + "shape '%s', which is not compatible with the " + "atom shape ('%s')." % (nparr, shape, atom_shape) + ) + return nobjects + + def get_enum(self) -> Enum: + """Get the enumerated type associated with this array. + + If this array is of an enumerated type, the corresponding Enum instance + (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated + type, a TypeError is raised. + + """ + if self.atom.kind != "enum": + raise TypeError( + "array ``%s`` is not of an enumerated type" % self._v_pathname + ) + + return self.atom.enum + + def append(self, sequence: npt.ArrayLike) -> None: + """Add a sequence of data to the end of the dataset. + + This method appends the objects in the sequence to a *single row* in + this array. The type and shape of individual objects must be compliant + with the atoms in the array. In the case of serialized objects and + variable length strings, the object or string to append is itself the + sequence. + + """ + self._g_check_open() + self._v_file._check_writable() + + # Prepare the sequence to convert it into a NumPy object + atom = self.atom + if not hasattr(atom, "size"): # it is a pseudo-atom + sequence = atom.toarray(sequence) + statom = atom.base + else: + try: # fastest check in most cases + len(sequence) + except TypeError: + raise TypeError("argument is not a sequence") + statom = atom + + if len(sequence) > 0: + # The sequence needs to be copied to make the operation safe + # to in-place conversion. + nparr = convert_to_np_atom2(sequence, statom) + nobjects = self._getnobjects(nparr) + else: + nobjects = 0 + nparr = None + + self._append(nparr, nobjects) + self.nrows += 1 + + def iterrows( + self, + start: int | None = None, + stop: int | None = None, + step: int | None = None, + ) -> VLArray: + """Iterate over the rows of the array. + + This method returns an iterator yielding an object of the current + flavor for each selected row in the array. + + If a range is not supplied, *all the rows* in the array are iterated + upon. You can also use the :meth:`VLArray.__iter__` special method for + that purpose. If you only want to iterate over a given *range of rows* + in the array, you may use the start, stop and step parameters. + + Examples + -------- + :: + + for row in vlarray.iterrows(step=4): + print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, row)) + + .. versionchanged:: 3.0 + If the *start* parameter is provided and *stop* is None then the + array is iterated from *start* to the last line. + In PyTables < 3.0 only one element was returned. + + """ + self._start, self._stop, self._step = self._process_range( + start, stop, step + ) + self._init_loop() + return self + + def __iter__(self) -> VLArray: + """Iterate over the rows of the array. + + This is equivalent to calling :meth:`VLArray.iterrows` with default + arguments, i.e. it iterates over *all the rows* in the array. + + Examples + -------- + :: + + result = [row for row in vlarray] + + Which is equivalent to:: + + result = [row for row in vlarray.iterrows()] + + """ + if not self._init: + # If the iterator is called directly, assign default variables + self._start = 0 + self._stop = self.nrows + self._step = 1 + # and initialize the loop + self._init_loop() + + return self + + def _init_loop(self) -> None: + """Initialize the __iter__ iterator.""" + self._nrowsread = self._start + self._startb = self._start + self._row = -1 # Sentinel + self._init = True # Sentinel + self.nrow = SizeType(self._start - self._step) # row number + + def __next__(self) -> list | np.ndarray: + """Get the next element of the array during an iteration. + + The element is returned as a list of objects of the current + flavor. + + """ + if self._nrowsread >= self._stop: + self._init = False + raise StopIteration # end of iteration + else: + # Read a chunk of rows + if self._row + 1 >= self.nrowsinbuf or self._row < 0: + self._stopb = self._startb + self._step * self.nrowsinbuf + self.listarr = self.read(self._startb, self._stopb, self._step) + self._row = -1 + self._startb = self._stopb + self._row += 1 + self.nrow += self._step + self._nrowsread += self._step + return self.listarr[self._row] + + def __getitem__( + self, key: int | slice | Sequence[int] | np.ndarray + ) -> list: + """Get a row or a range of rows from the array. + + If key argument is an integer, the corresponding array row is returned + as an object of the current flavor. If key is a slice, the range of + rows determined by it is returned as a list of objects of the current + flavor. + + In addition, NumPy-style point selections are supported. In + particular, if key is a list of row coordinates, the set of rows + determined by it is returned. Furthermore, if key is an array of + boolean values, only the coordinates where key is True are returned. + Note that for the latter to work it is necessary that key list would + contain exactly as many rows as the array has. + + Examples + -------- + :: + + a_row = vlarray[4] + a_list = vlarray[4:1000:2] + a_list2 = vlarray[[0,2]] # get list of coords + a_list3 = vlarray[[0,-2]] # negative values accepted + a_list4 = vlarray[np.array([True,...,False])] # array of bools + + """ + self._g_check_open() + if is_idx(key): + key = operator.index(key) + + # Index out of range protection + if key >= self.nrows: + raise IndexError("Index out of range") + if key < 0: + # To support negative values + key += self.nrows + start, stop, step = self._process_range(key, key + 1, 1) + return self.read(start, stop, step)[0] + elif isinstance(key, slice): + start, stop, step = self._process_range( + key.start, key.stop, key.step + ) + return self.read(start, stop, step) + # Try with a boolean or point selection + elif type(key) in (list, tuple) or isinstance(key, np.ndarray): + coords = self._point_selection(key) + return self._read_coordinates(coords) + else: + raise IndexError(f"Invalid index or slice: {key!r}") + + def _assign_values(self, coords: Sequence[int], values: Sequence) -> None: + """Assign the `values` to the positions stated in `coords`.""" + for nrow, value in zip(coords, values): + if nrow >= self.nrows: + raise IndexError("First index out of range") + if nrow < 0: + # To support negative values + nrow += self.nrows + object_ = value + # Prepare the object to convert it into a NumPy object + atom = self.atom + if not hasattr(atom, "size"): # it is a pseudo-atom + object_ = atom.toarray(object_) + statom = atom.base + else: + statom = atom + value = convert_to_np_atom(object_, statom) + nobjects = self._getnobjects(value) + + # Get the previous value + nrow = idx2long(nrow) # To convert any possible numpy scalar value + nparr = self._read_array(nrow, nrow + 1, 1)[0] + nobjects = len(nparr) + if len(value) > nobjects: + raise ValueError( + "Length of value (%s) is larger than number " + "of elements in row (%s)" % (len(value), nobjects) + ) + try: + nparr[:] = value + except Exception as exc: # XXX + raise ValueError( + "Value parameter:\n'%r'\n" + "cannot be converted into an array object " + "compliant vlarray[%s] row: \n'%r'\n" + "The error was: <%s>" % (value, nrow, nparr[:], exc) + ) + + if nparr.size > 0: + self._modify(nrow, nparr, nobjects) + + def __setitem__( + self, + key: int | slice | Sequence[int] | np.ndarray, + value: Any, + ) -> None: + """Set a row, or set of rows, in the array. + + It takes different actions depending on the type of the *key* + parameter: if it is an integer, the corresponding table row is + set to *value* (a record or sequence capable of being converted + to the table structure). If *key* is a slice, the row slice + determined by it is set to *value* (a record array or sequence + of rows capable of being converted to the table structure). + + In addition, NumPy-style point selections are supported. In + particular, if key is a list of row coordinates, the set of rows + determined by it is set to value. Furthermore, if key is an array of + boolean values, only the coordinates where key is True are set to + values from value. Note that for the latter to work it is necessary + that key list would contain exactly as many rows as the table has. + + .. note:: + + When updating the rows of a VLArray object which uses a + pseudo-atom, there is a problem: you can only update values + with *exactly* the same size in bytes than the original row. + This is very difficult to meet with object pseudo-atoms, + because :mod:`pickle` applied on a Python object does not + guarantee to return the same number of bytes than over another + object, even if they are of the same class. + This effectively limits the kinds of objects than can be + updated in variable-length arrays. + + Examples + -------- + :: + + vlarray[0] = vlarray[0] * 2 + 3 + vlarray[99] = arange(96) * 2 + 3 + + # Negative values for the index are supported. + vlarray[-99] = vlarray[5] * 2 + 3 + vlarray[1:30:2] = list_of_rows + vlarray[[1,3]] = new_1_and_3_rows + + """ + self._g_check_open() + self._v_file._check_writable() + + if is_idx(key): + # If key is not a sequence, convert to it + coords = [key] + value = [value] + elif isinstance(key, slice): + start, stop, step = self._process_range( + key.start, key.stop, key.step + ) + coords = range(start, stop, step) + # Try with a boolean or point selection + elif type(key) in (list, tuple) or isinstance(key, np.ndarray): + coords = self._point_selection(key) + else: + raise IndexError(f"Invalid index or slice: {key!r}") + + # Do the assignment row by row + self._assign_values(coords, value) + + # Accessor for the _read_array method in superclass + def read( + self, + start: int | None = None, + stop: int | None = None, + step: int = 1, + ) -> list: + """Get data in the array as a list of objects of the current flavor. + + Please note that, as the lengths of the different rows are variable, + the returned value is a *Python list* (not an array of the current + flavor), with as many entries as specified rows in the range + parameters. + + The start, stop and step parameters can be used to select only a + *range of rows* in the array. Their meanings are the same as in + the built-in range() Python function, except that negative values + of step are not allowed yet. Moreover, if only start is specified, + then stop will be set to start + 1. If you do not specify neither + start nor stop, then *all the rows* in the array are selected. + + """ + self._g_check_open() + start, stop, step = self._process_range_read(start, stop, step) + if start == stop: + listarr = [] + else: + listarr = self._read_array(start, stop, step) + + atom = self.atom + if not hasattr(atom, "size"): # it is a pseudo-atom + outlistarr = [atom.fromarray(arr) for arr in listarr] + else: + # Convert the list to the right flavor + flavor = self.flavor + outlistarr = [internal_to_flavor(arr, flavor) for arr in listarr] + return outlistarr + + def _read_coordinates(self, coords: Sequence[int]) -> list[list]: + """Read rows specified in `coords`.""" + rows = [] + for coord in coords: + rows.append(self.read(idx2long(coord), idx2long(coord) + 1, 1)[0]) + return rows + + def _g_copy_with_stats( + self, + group: Group, + name: str, + start: int, + stop: int, + step: int, + title: str, + filters: Filters | None, + chunkshape: tuple[int, ...] | None, + _log: bool, + **kwargs, + ) -> tuple[VLArray, int]: + """Private part of Leaf.copy() for each kind of leaf.""" + # Build the new VLArray object + obj = VLArray( + group, + name, + self.atom, + title=title, + filters=filters, + expectedrows=self._v_expectedrows, + chunkshape=chunkshape, + _log=_log, + ) + + # Now, fill the new vlarray with values from the old one + # This is not buffered because we cannot forsee the length + # of each record. So, the safest would be a copy row by row. + # In the future, some analysis can be done in order to buffer + # the copy process. + nrowsinbuf = 1 + start, stop, step = self._process_range_read(start, stop, step) + # Optimized version (no conversions, no type and shape checks, etc...) + nrowscopied = SizeType(0) + nbytes = 0 + if not hasattr(self.atom, "size"): # it is a pseudo-atom + atomsize = self.atom.base.size + else: + atomsize = self.atom.size + for start2 in range(start, stop, step * nrowsinbuf): + # Save the records on disk + stop2 = start2 + step * nrowsinbuf + if stop2 > stop: + stop2 = stop + nparr = self._read_array(start=start2, stop=stop2, step=step)[0] + nobjects = nparr.shape[0] + obj._append(nparr, nobjects) + nbytes += nobjects * atomsize + nrowscopied += 1 + obj.nrows = nrowscopied + return (obj, nbytes) + + def __repr__(self) -> str: + """`VLArray` string representation. + + Provides more metainfo w.r.t standard __str__. + """ + return f"""{self} + atom = {self.atom!r} + byteorder = {self.byteorder!r} + nrows = {self.nrows} + flavor = {self.flavor!r}""" diff --git a/venv/Scripts/cpuinfo.exe b/venv/Scripts/cpuinfo.exe new file mode 100644 index 0000000..4c53deb Binary files /dev/null and b/venv/Scripts/cpuinfo.exe differ diff --git a/venv/Scripts/pt2to3.exe b/venv/Scripts/pt2to3.exe new file mode 100644 index 0000000..91b3f0d Binary files /dev/null and b/venv/Scripts/pt2to3.exe differ diff --git a/venv/Scripts/ptdump.exe b/venv/Scripts/ptdump.exe new file mode 100644 index 0000000..5a04471 Binary files /dev/null and b/venv/Scripts/ptdump.exe differ diff --git a/venv/Scripts/ptrepack.exe b/venv/Scripts/ptrepack.exe new file mode 100644 index 0000000..3b2c79d Binary files /dev/null and b/venv/Scripts/ptrepack.exe differ diff --git a/venv/Scripts/pttree.exe b/venv/Scripts/pttree.exe new file mode 100644 index 0000000..617a622 Binary files /dev/null and b/venv/Scripts/pttree.exe differ