mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 19:26:56 +08:00
Compare commits
40 Commits
gemini-cli
...
sid/worksp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cf292d258a | ||
|
|
11618e9928 | ||
|
|
91b2c19762 | ||
|
|
d431a200c0 | ||
|
|
c938e2817f | ||
|
|
e9ab7c7f68 | ||
|
|
5008f123ae | ||
|
|
ec0fa5a2be | ||
|
|
fe80e118b7 | ||
|
|
01462d5918 | ||
|
|
b201b1f38f | ||
|
|
3c850b8ffc | ||
|
|
4d6e51ad1d | ||
|
|
62116ff3c9 | ||
|
|
15f623fd94 | ||
|
|
263c357d06 | ||
|
|
d9672ba628 | ||
|
|
344e487102 | ||
|
|
4921bad61f | ||
|
|
9f8ca1888f | ||
|
|
4fb0c7d08d | ||
|
|
d934eba7ad | ||
|
|
09c50ebb40 | ||
|
|
577e27373d | ||
|
|
b40075e85d | ||
|
|
742cb556bb | ||
|
|
5d99a78fe1 | ||
|
|
161a3d5d61 | ||
|
|
7460975174 | ||
|
|
07f1a364ed | ||
|
|
ec18a783d8 | ||
|
|
9ed83932a8 | ||
|
|
669905e854 | ||
|
|
77c10079c8 | ||
|
|
ebcb8cb925 | ||
|
|
026c9c9533 | ||
|
|
066d285527 | ||
|
|
9f4208645a | ||
|
|
3db1a7e451 | ||
|
|
228a23198b |
@@ -176,6 +176,66 @@ SKILLS_GUIDANCE = (
|
||||
"Skills that aren't maintained become liabilities."
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Workspace tool guidance
|
||||
#
|
||||
# Injected when workspace tools are enabled. The assembler
|
||||
# build_workspace_guidance() returns one coherent block that grows with the
|
||||
# tools available. workspace_delete is intentionally not prompted — it's
|
||||
# destructive and should never be a default reach.
|
||||
# =========================================================================
|
||||
|
||||
WORKSPACE_SEARCH_GUIDANCE_CORE = (
|
||||
"You have workspace_search, a BM25 full-text search tool over files "
|
||||
"indexed from configured workspace roots. When the user asks about "
|
||||
"concepts, terms, or content that could plausibly live in the indexed "
|
||||
"codebase or docs, call workspace_search first — it is faster and more "
|
||||
"precise than terminal-based grep/find/cat for retrieval. "
|
||||
"It returns ranked chunks with path:line_start-line_end and a snippet, "
|
||||
"which you can follow up on by reading the file directly if needed. "
|
||||
"Do NOT use workspace_search for file edits, for content you already "
|
||||
"have, or for files you know aren't in the index (e.g. /tmp, build "
|
||||
"artifacts, the user's non-workspace projects)."
|
||||
)
|
||||
|
||||
WORKSPACE_RETRIEVE_GUIDANCE = (
|
||||
"When you know the specific file path and want its full indexed content "
|
||||
"(all chunks), use workspace_retrieve instead of re-searching. Retrieve "
|
||||
"dumps every chunk for that one path in order."
|
||||
)
|
||||
|
||||
WORKSPACE_LIST_GUIDANCE = (
|
||||
"Use workspace_list to see what's actually in the index when you're "
|
||||
"unsure whether a file is indexed. Prefer this over guessing at paths."
|
||||
)
|
||||
|
||||
WORKSPACE_INDEX_GUIDANCE = (
|
||||
"workspace_index rebuilds the full index. It is expensive — only call "
|
||||
"it when the user has just modified indexed files and search results "
|
||||
"look stale. Never call it speculatively at session start."
|
||||
)
|
||||
|
||||
|
||||
def build_workspace_guidance(available_tools: set[str]) -> str | None:
|
||||
"""Assemble workspace guidance based on which tools are enabled.
|
||||
|
||||
Returns None if workspace_search isn't available (nothing to guide).
|
||||
Otherwise returns a single string composed of the core guidance plus
|
||||
one paragraph per additional workspace tool that's present. The output
|
||||
is a newline-joined block ready to be appended to the system prompt.
|
||||
"""
|
||||
if "workspace_search" not in available_tools:
|
||||
return None
|
||||
sections = [WORKSPACE_SEARCH_GUIDANCE_CORE]
|
||||
if "workspace_retrieve" in available_tools:
|
||||
sections.append(WORKSPACE_RETRIEVE_GUIDANCE)
|
||||
if "workspace_list" in available_tools:
|
||||
sections.append(WORKSPACE_LIST_GUIDANCE)
|
||||
if "workspace_index" in available_tools:
|
||||
sections.append(WORKSPACE_INDEX_GUIDANCE)
|
||||
return "\n".join(sections)
|
||||
|
||||
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
"# Tool-use enforcement\n"
|
||||
"You MUST use your tools to take action — do not describe what you would do "
|
||||
|
||||
3
cli.py
3
cli.py
@@ -5752,6 +5752,9 @@ class HermesCLI:
|
||||
print(f" {status} {p['name']}{version}{detail}{error}")
|
||||
except Exception as e:
|
||||
print(f"Plugin system error: {e}")
|
||||
elif canonical == "workspace":
|
||||
from hermes_cli.workspace_slash import handle_workspace_slash
|
||||
handle_workspace_slash(cmd_original, console=self.console)
|
||||
elif canonical == "rollback":
|
||||
self._handle_rollback_command(cmd_original)
|
||||
elif canonical == "snapshot":
|
||||
|
||||
@@ -1,608 +0,0 @@
|
||||
# Pricing Accuracy Architecture
|
||||
|
||||
Date: 2026-03-16
|
||||
|
||||
## Goal
|
||||
|
||||
Hermes should only show dollar costs when they are backed by an official source for the user's actual billing path.
|
||||
|
||||
This design replaces the current static, heuristic pricing flow in:
|
||||
|
||||
- `run_agent.py`
|
||||
- `agent/usage_pricing.py`
|
||||
- `agent/insights.py`
|
||||
- `cli.py`
|
||||
|
||||
with a provider-aware pricing system that:
|
||||
|
||||
- handles cache billing correctly
|
||||
- distinguishes `actual` vs `estimated` vs `included` vs `unknown`
|
||||
- reconciles post-hoc costs when providers expose authoritative billing data
|
||||
- supports direct providers, OpenRouter, subscriptions, enterprise pricing, and custom endpoints
|
||||
|
||||
## Problems In The Current Design
|
||||
|
||||
Current Hermes behavior has four structural issues:
|
||||
|
||||
1. It stores only `prompt_tokens` and `completion_tokens`, which is insufficient for providers that bill cache reads and cache writes separately.
|
||||
2. It uses a static model price table and fuzzy heuristics, which can drift from current official pricing.
|
||||
3. It assumes public API list pricing matches the user's real billing path.
|
||||
4. It has no distinction between live estimates and reconciled billed cost.
|
||||
|
||||
## Design Principles
|
||||
|
||||
1. Normalize usage before pricing.
|
||||
2. Never fold cached tokens into plain input cost.
|
||||
3. Track certainty explicitly.
|
||||
4. Treat the billing path as part of the model identity.
|
||||
5. Prefer official machine-readable sources over scraped docs.
|
||||
6. Use post-hoc provider cost APIs when available.
|
||||
7. Show `n/a` rather than inventing precision.
|
||||
|
||||
## High-Level Architecture
|
||||
|
||||
The new system has four layers:
|
||||
|
||||
1. `usage_normalization`
|
||||
Converts raw provider usage into a canonical usage record.
|
||||
2. `pricing_source_resolution`
|
||||
Determines the billing path, source of truth, and applicable pricing source.
|
||||
3. `cost_estimation_and_reconciliation`
|
||||
Produces an immediate estimate when possible, then replaces or annotates it with actual billed cost later.
|
||||
4. `presentation`
|
||||
`/usage`, `/insights`, and the status bar display cost with certainty metadata.
|
||||
|
||||
## Canonical Usage Record
|
||||
|
||||
Add a canonical usage model that every provider path maps into before any pricing math happens.
|
||||
|
||||
Suggested structure:
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class CanonicalUsage:
|
||||
provider: str
|
||||
billing_provider: str
|
||||
model: str
|
||||
billing_route: str
|
||||
|
||||
input_tokens: int = 0
|
||||
output_tokens: int = 0
|
||||
cache_read_tokens: int = 0
|
||||
cache_write_tokens: int = 0
|
||||
reasoning_tokens: int = 0
|
||||
request_count: int = 1
|
||||
|
||||
raw_usage: dict[str, Any] | None = None
|
||||
raw_usage_fields: dict[str, str] | None = None
|
||||
computed_fields: set[str] | None = None
|
||||
|
||||
provider_request_id: str | None = None
|
||||
provider_generation_id: str | None = None
|
||||
provider_response_id: str | None = None
|
||||
```
|
||||
|
||||
Rules:
|
||||
|
||||
- `input_tokens` means non-cached input only.
|
||||
- `cache_read_tokens` and `cache_write_tokens` are never merged into `input_tokens`.
|
||||
- `output_tokens` excludes cache metrics.
|
||||
- `reasoning_tokens` is telemetry unless a provider officially bills it separately.
|
||||
|
||||
This is the same normalization pattern used by `opencode`, extended with provenance and reconciliation ids.
|
||||
|
||||
## Provider Normalization Rules
|
||||
|
||||
### OpenAI Direct
|
||||
|
||||
Source usage fields:
|
||||
|
||||
- `prompt_tokens`
|
||||
- `completion_tokens`
|
||||
- `prompt_tokens_details.cached_tokens`
|
||||
|
||||
Normalization:
|
||||
|
||||
- `cache_read_tokens = cached_tokens`
|
||||
- `input_tokens = prompt_tokens - cached_tokens`
|
||||
- `cache_write_tokens = 0` unless OpenAI exposes it in the relevant route
|
||||
- `output_tokens = completion_tokens`
|
||||
|
||||
### Anthropic Direct
|
||||
|
||||
Source usage fields:
|
||||
|
||||
- `input_tokens`
|
||||
- `output_tokens`
|
||||
- `cache_read_input_tokens`
|
||||
- `cache_creation_input_tokens`
|
||||
|
||||
Normalization:
|
||||
|
||||
- `input_tokens = input_tokens`
|
||||
- `output_tokens = output_tokens`
|
||||
- `cache_read_tokens = cache_read_input_tokens`
|
||||
- `cache_write_tokens = cache_creation_input_tokens`
|
||||
|
||||
### OpenRouter
|
||||
|
||||
Estimate-time usage normalization should use the response usage payload with the same rules as the underlying provider when possible.
|
||||
|
||||
Reconciliation-time records should also store:
|
||||
|
||||
- OpenRouter generation id
|
||||
- native token fields when available
|
||||
- `total_cost`
|
||||
- `cache_discount`
|
||||
- `upstream_inference_cost`
|
||||
- `is_byok`
|
||||
|
||||
### Gemini / Vertex
|
||||
|
||||
Use official Gemini or Vertex usage fields where available.
|
||||
|
||||
If cached content tokens are exposed:
|
||||
|
||||
- map them to `cache_read_tokens`
|
||||
|
||||
If a route exposes no cache creation metric:
|
||||
|
||||
- store `cache_write_tokens = 0`
|
||||
- preserve the raw usage payload for later extension
|
||||
|
||||
### DeepSeek And Other Direct Providers
|
||||
|
||||
Normalize only the fields that are officially exposed.
|
||||
|
||||
If a provider does not expose cache buckets:
|
||||
|
||||
- do not infer them unless the provider explicitly documents how to derive them
|
||||
|
||||
### Subscription / Included-Cost Routes
|
||||
|
||||
These still use the canonical usage model.
|
||||
|
||||
Tokens are tracked normally. Cost depends on billing mode, not on whether usage exists.
|
||||
|
||||
## Billing Route Model
|
||||
|
||||
Hermes must stop keying pricing solely by `model`.
|
||||
|
||||
Introduce a billing route descriptor:
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class BillingRoute:
|
||||
provider: str
|
||||
base_url: str | None
|
||||
model: str
|
||||
billing_mode: str
|
||||
organization_hint: str | None = None
|
||||
```
|
||||
|
||||
`billing_mode` values:
|
||||
|
||||
- `official_cost_api`
|
||||
- `official_generation_api`
|
||||
- `official_models_api`
|
||||
- `official_docs_snapshot`
|
||||
- `subscription_included`
|
||||
- `user_override`
|
||||
- `custom_contract`
|
||||
- `unknown`
|
||||
|
||||
Examples:
|
||||
|
||||
- OpenAI direct API with Costs API access: `official_cost_api`
|
||||
- Anthropic direct API with Usage & Cost API access: `official_cost_api`
|
||||
- OpenRouter request before reconciliation: `official_models_api`
|
||||
- OpenRouter request after generation lookup: `official_generation_api`
|
||||
- GitHub Copilot style subscription route: `subscription_included`
|
||||
- local OpenAI-compatible server: `unknown`
|
||||
- enterprise contract with configured rates: `custom_contract`
|
||||
|
||||
## Cost Status Model
|
||||
|
||||
Every displayed cost should have:
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class CostResult:
|
||||
amount_usd: Decimal | None
|
||||
status: Literal["actual", "estimated", "included", "unknown"]
|
||||
source: Literal[
|
||||
"provider_cost_api",
|
||||
"provider_generation_api",
|
||||
"provider_models_api",
|
||||
"official_docs_snapshot",
|
||||
"user_override",
|
||||
"custom_contract",
|
||||
"none",
|
||||
]
|
||||
label: str
|
||||
fetched_at: datetime | None
|
||||
pricing_version: str | None
|
||||
notes: list[str]
|
||||
```
|
||||
|
||||
Presentation rules:
|
||||
|
||||
- `actual`: show dollar amount as final
|
||||
- `estimated`: show dollar amount with estimate labeling
|
||||
- `included`: show `included` or `$0.00 (included)` depending on UX choice
|
||||
- `unknown`: show `n/a`
|
||||
|
||||
## Official Source Hierarchy
|
||||
|
||||
Resolve cost using this order:
|
||||
|
||||
1. Request-level or account-level official billed cost
|
||||
2. Official machine-readable model pricing
|
||||
3. Official docs snapshot
|
||||
4. User override or custom contract
|
||||
5. Unknown
|
||||
|
||||
The system must never skip to a lower level if a higher-confidence source exists for the current billing route.
|
||||
|
||||
## Provider-Specific Truth Rules
|
||||
|
||||
### OpenAI Direct
|
||||
|
||||
Preferred truth:
|
||||
|
||||
1. Costs API for reconciled spend
|
||||
2. Official pricing page for live estimate
|
||||
|
||||
### Anthropic Direct
|
||||
|
||||
Preferred truth:
|
||||
|
||||
1. Usage & Cost API for reconciled spend
|
||||
2. Official pricing docs for live estimate
|
||||
|
||||
### OpenRouter
|
||||
|
||||
Preferred truth:
|
||||
|
||||
1. `GET /api/v1/generation` for reconciled `total_cost`
|
||||
2. `GET /api/v1/models` pricing for live estimate
|
||||
|
||||
Do not use underlying provider public pricing as the source of truth for OpenRouter billing.
|
||||
|
||||
### Gemini / Vertex
|
||||
|
||||
Preferred truth:
|
||||
|
||||
1. official billing export or billing API for reconciled spend when available for the route
|
||||
2. official pricing docs for estimate
|
||||
|
||||
### DeepSeek
|
||||
|
||||
Preferred truth:
|
||||
|
||||
1. official machine-readable cost source if available in the future
|
||||
2. official pricing docs snapshot today
|
||||
|
||||
### Subscription-Included Routes
|
||||
|
||||
Preferred truth:
|
||||
|
||||
1. explicit route config marking the model as included in subscription
|
||||
|
||||
These should display `included`, not an API list-price estimate.
|
||||
|
||||
### Custom Endpoint / Local Model
|
||||
|
||||
Preferred truth:
|
||||
|
||||
1. user override
|
||||
2. custom contract config
|
||||
3. unknown
|
||||
|
||||
These should default to `unknown`.
|
||||
|
||||
## Pricing Catalog
|
||||
|
||||
Replace the current `MODEL_PRICING` dict with a richer pricing catalog.
|
||||
|
||||
Suggested record:
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class PricingEntry:
|
||||
provider: str
|
||||
route_pattern: str
|
||||
model_pattern: str
|
||||
|
||||
input_cost_per_million: Decimal | None = None
|
||||
output_cost_per_million: Decimal | None = None
|
||||
cache_read_cost_per_million: Decimal | None = None
|
||||
cache_write_cost_per_million: Decimal | None = None
|
||||
request_cost: Decimal | None = None
|
||||
image_cost: Decimal | None = None
|
||||
|
||||
source: str = "official_docs_snapshot"
|
||||
source_url: str | None = None
|
||||
fetched_at: datetime | None = None
|
||||
pricing_version: str | None = None
|
||||
```
|
||||
|
||||
The catalog should be route-aware:
|
||||
|
||||
- `openai:gpt-5`
|
||||
- `anthropic:claude-opus-4-6`
|
||||
- `openrouter:anthropic/claude-opus-4.6`
|
||||
- `copilot:gpt-4o`
|
||||
|
||||
This avoids conflating direct-provider billing with aggregator billing.
|
||||
|
||||
## Pricing Sync Architecture
|
||||
|
||||
Introduce a pricing sync subsystem instead of manually maintaining a single hardcoded table.
|
||||
|
||||
Suggested modules:
|
||||
|
||||
- `agent/pricing/catalog.py`
|
||||
- `agent/pricing/sources.py`
|
||||
- `agent/pricing/sync.py`
|
||||
- `agent/pricing/reconcile.py`
|
||||
- `agent/pricing/types.py`
|
||||
|
||||
### Sync Sources
|
||||
|
||||
- OpenRouter models API
|
||||
- official provider docs snapshots where no API exists
|
||||
- user overrides from config
|
||||
|
||||
### Sync Output
|
||||
|
||||
Cache pricing entries locally with:
|
||||
|
||||
- source URL
|
||||
- fetch timestamp
|
||||
- version/hash
|
||||
- confidence/source type
|
||||
|
||||
### Sync Frequency
|
||||
|
||||
- startup warm cache
|
||||
- background refresh every 6 to 24 hours depending on source
|
||||
- manual `hermes pricing sync`
|
||||
|
||||
## Reconciliation Architecture
|
||||
|
||||
Live requests may produce only an estimate initially. Hermes should reconcile them later when a provider exposes actual billed cost.
|
||||
|
||||
Suggested flow:
|
||||
|
||||
1. Agent call completes.
|
||||
2. Hermes stores canonical usage plus reconciliation ids.
|
||||
3. Hermes computes an immediate estimate if a pricing source exists.
|
||||
4. A reconciliation worker fetches actual cost when supported.
|
||||
5. Session and message records are updated with `actual` cost.
|
||||
|
||||
This can run:
|
||||
|
||||
- inline for cheap lookups
|
||||
- asynchronously for delayed provider accounting
|
||||
|
||||
## Persistence Changes
|
||||
|
||||
Session storage should stop storing only aggregate prompt/completion totals.
|
||||
|
||||
Add fields for both usage and cost certainty:
|
||||
|
||||
- `input_tokens`
|
||||
- `output_tokens`
|
||||
- `cache_read_tokens`
|
||||
- `cache_write_tokens`
|
||||
- `reasoning_tokens`
|
||||
- `estimated_cost_usd`
|
||||
- `actual_cost_usd`
|
||||
- `cost_status`
|
||||
- `cost_source`
|
||||
- `pricing_version`
|
||||
- `billing_provider`
|
||||
- `billing_mode`
|
||||
|
||||
If schema expansion is too large for one PR, add a new pricing events table:
|
||||
|
||||
```text
|
||||
session_cost_events
|
||||
id
|
||||
session_id
|
||||
request_id
|
||||
provider
|
||||
model
|
||||
billing_mode
|
||||
input_tokens
|
||||
output_tokens
|
||||
cache_read_tokens
|
||||
cache_write_tokens
|
||||
estimated_cost_usd
|
||||
actual_cost_usd
|
||||
cost_status
|
||||
cost_source
|
||||
pricing_version
|
||||
created_at
|
||||
updated_at
|
||||
```
|
||||
|
||||
## Hermes Touchpoints
|
||||
|
||||
### `run_agent.py`
|
||||
|
||||
Current responsibility:
|
||||
|
||||
- parse raw provider usage
|
||||
- update session token counters
|
||||
|
||||
New responsibility:
|
||||
|
||||
- build `CanonicalUsage`
|
||||
- update canonical counters
|
||||
- store reconciliation ids
|
||||
- emit usage event to pricing subsystem
|
||||
|
||||
### `agent/usage_pricing.py`
|
||||
|
||||
Current responsibility:
|
||||
|
||||
- static lookup table
|
||||
- direct cost arithmetic
|
||||
|
||||
New responsibility:
|
||||
|
||||
- move or replace with pricing catalog facade
|
||||
- no fuzzy model-family heuristics
|
||||
- no direct pricing without billing-route context
|
||||
|
||||
### `cli.py`
|
||||
|
||||
Current responsibility:
|
||||
|
||||
- compute session cost directly from prompt/completion totals
|
||||
|
||||
New responsibility:
|
||||
|
||||
- display `CostResult`
|
||||
- show status badges:
|
||||
- `actual`
|
||||
- `estimated`
|
||||
- `included`
|
||||
- `n/a`
|
||||
|
||||
### `agent/insights.py`
|
||||
|
||||
Current responsibility:
|
||||
|
||||
- recompute historical estimates from static pricing
|
||||
|
||||
New responsibility:
|
||||
|
||||
- aggregate stored pricing events
|
||||
- prefer actual cost over estimate
|
||||
- surface estimates only when reconciliation is unavailable
|
||||
|
||||
## UX Rules
|
||||
|
||||
### Status Bar
|
||||
|
||||
Show one of:
|
||||
|
||||
- `$1.42`
|
||||
- `~$1.42`
|
||||
- `included`
|
||||
- `cost n/a`
|
||||
|
||||
Where:
|
||||
|
||||
- `$1.42` means `actual`
|
||||
- `~$1.42` means `estimated`
|
||||
- `included` means subscription-backed or explicitly zero-cost route
|
||||
- `cost n/a` means unknown
|
||||
|
||||
### `/usage`
|
||||
|
||||
Show:
|
||||
|
||||
- token buckets
|
||||
- estimated cost
|
||||
- actual cost if available
|
||||
- cost status
|
||||
- pricing source
|
||||
|
||||
### `/insights`
|
||||
|
||||
Aggregate:
|
||||
|
||||
- actual cost totals
|
||||
- estimated-only totals
|
||||
- unknown-cost sessions count
|
||||
- included-cost sessions count
|
||||
|
||||
## Config And Overrides
|
||||
|
||||
Add user-configurable pricing overrides in config:
|
||||
|
||||
```yaml
|
||||
pricing:
|
||||
mode: hybrid
|
||||
sync_on_startup: true
|
||||
sync_interval_hours: 12
|
||||
overrides:
|
||||
- provider: openrouter
|
||||
model: anthropic/claude-opus-4.6
|
||||
billing_mode: custom_contract
|
||||
input_cost_per_million: 4.25
|
||||
output_cost_per_million: 22.0
|
||||
cache_read_cost_per_million: 0.5
|
||||
cache_write_cost_per_million: 6.0
|
||||
included_routes:
|
||||
- provider: copilot
|
||||
model: "*"
|
||||
- provider: codex-subscription
|
||||
model: "*"
|
||||
```
|
||||
|
||||
Overrides must win over catalog defaults for the matching billing route.
|
||||
|
||||
## Rollout Plan
|
||||
|
||||
### Phase 1
|
||||
|
||||
- add canonical usage model
|
||||
- split cache token buckets in `run_agent.py`
|
||||
- stop pricing cache-inflated prompt totals
|
||||
- preserve current UI with improved backend math
|
||||
|
||||
### Phase 2
|
||||
|
||||
- add route-aware pricing catalog
|
||||
- integrate OpenRouter models API sync
|
||||
- add `estimated` vs `included` vs `unknown`
|
||||
|
||||
### Phase 3
|
||||
|
||||
- add reconciliation for OpenRouter generation cost
|
||||
- add actual cost persistence
|
||||
- update `/insights` to prefer actual cost
|
||||
|
||||
### Phase 4
|
||||
|
||||
- add direct OpenAI and Anthropic reconciliation paths
|
||||
- add user overrides and contract pricing
|
||||
- add pricing sync CLI command
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
Add tests for:
|
||||
|
||||
- OpenAI cached token subtraction
|
||||
- Anthropic cache read/write separation
|
||||
- OpenRouter estimated vs actual reconciliation
|
||||
- subscription-backed models showing `included`
|
||||
- custom endpoints showing `n/a`
|
||||
- override precedence
|
||||
- stale catalog fallback behavior
|
||||
|
||||
Current tests that assume heuristic pricing should be replaced with route-aware expectations.
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- exact enterprise billing reconstruction without an official source or user override
|
||||
- backfilling perfect historical cost for old sessions that lack cache bucket data
|
||||
- scraping arbitrary provider web pages at request time
|
||||
|
||||
## Recommendation
|
||||
|
||||
Do not expand the existing `MODEL_PRICING` dict.
|
||||
|
||||
That path cannot satisfy the product requirement. Hermes should instead migrate to:
|
||||
|
||||
- canonical usage normalization
|
||||
- route-aware pricing sources
|
||||
- estimate-then-reconcile cost lifecycle
|
||||
- explicit certainty states in the UI
|
||||
|
||||
This is the minimum architecture that makes the statement "Hermes pricing is backed by official sources where possible, and otherwise clearly labeled" defensible.
|
||||
@@ -1,329 +0,0 @@
|
||||
# Container-Aware CLI Review Fixes Spec
|
||||
|
||||
**PR:** NousResearch/hermes-agent#7543
|
||||
**Review:** cursor[bot] bugbot review (4094049442) + two prior rounds
|
||||
**Date:** 2026-04-12
|
||||
**Branch:** `feat/container-aware-cli-clean`
|
||||
|
||||
## Review Issues Summary
|
||||
|
||||
Six issues were raised across three bugbot review rounds. Three were fixed in intermediate commits (38277a6a, 726cf90f). This spec addresses remaining design concerns surfaced by those reviews and simplifies the implementation based on interview decisions.
|
||||
|
||||
| # | Issue | Severity | Status |
|
||||
|---|-------|----------|--------|
|
||||
| 1 | `os.execvp` retry loop unreachable | Medium | Fixed in 79e8cd12 (switched to subprocess.run) |
|
||||
| 2 | Redundant `shutil.which("sudo")` | Medium | Fixed in 38277a6a (reuses `sudo` var) |
|
||||
| 3 | Missing `chown -h` on symlink update | Low | Fixed in 38277a6a |
|
||||
| 4 | Container routing after `parse_args()` | High | Fixed in 726cf90f |
|
||||
| 5 | Hardcoded `/home/${user}` | Medium | Fixed in 726cf90f |
|
||||
| 6 | Group membership not gated on `container.enable` | Low | Fixed in 726cf90f |
|
||||
|
||||
The mechanical fixes are in place but the overall design needs revision. The retry loop, error swallowing, and process model have deeper issues than what the bugbot flagged.
|
||||
|
||||
---
|
||||
|
||||
## Spec: Revised `_exec_in_container`
|
||||
|
||||
### Design Principles
|
||||
|
||||
1. **Let it crash.** No silent fallbacks. If `.container-mode` exists but something goes wrong, the error propagates naturally (Python traceback). The only case where container routing is skipped is when `.container-mode` doesn't exist or `HERMES_DEV=1`.
|
||||
2. **No retries.** Probe once for sudo, exec once. If it fails, docker/podman's stderr reaches the user verbatim.
|
||||
3. **Completely transparent.** No error wrapping, no prefixes, no spinners. Docker's output goes straight through.
|
||||
4. **`os.execvp` on the happy path.** Replace the Python process entirely so there's no idle parent during interactive sessions. Note: `execvp` never returns on success (process is replaced) and raises `OSError` on failure (it does not return a value). The container process's exit code becomes the process exit code by definition — no explicit propagation needed.
|
||||
5. **One human-readable exception to "let it crash".** `subprocess.TimeoutExpired` from the sudo probe gets a specific catch with a readable message, since a raw traceback for "your Docker daemon is slow" is confusing. All other exceptions propagate naturally.
|
||||
|
||||
### Execution Flow
|
||||
|
||||
```
|
||||
1. get_container_exec_info()
|
||||
- HERMES_DEV=1 → return None (skip routing)
|
||||
- Inside container → return None (skip routing)
|
||||
- .container-mode doesn't exist → return None (skip routing)
|
||||
- .container-mode exists → parse and return dict
|
||||
- .container-mode exists but malformed/unreadable → LET IT CRASH (no try/except)
|
||||
|
||||
2. _exec_in_container(container_info, sys.argv[1:])
|
||||
a. shutil.which(backend) → if None, print "{backend} not found on PATH" and sys.exit(1)
|
||||
b. Sudo probe: subprocess.run([runtime, "inspect", "--format", "ok", container_name], timeout=15)
|
||||
- If succeeds → needs_sudo = False
|
||||
- If fails → try subprocess.run([sudo, "-n", runtime, "inspect", ...], timeout=15)
|
||||
- If succeeds → needs_sudo = True
|
||||
- If fails → print error with sudoers hint (including why -n is required) and sys.exit(1)
|
||||
- If TimeoutExpired → catch specifically, print human-readable message about slow daemon
|
||||
c. Build exec_cmd: [sudo? + runtime, "exec", tty_flags, "-u", exec_user, env_flags, container, hermes_bin, *cli_args]
|
||||
d. os.execvp(exec_cmd[0], exec_cmd)
|
||||
- On success: process is replaced — Python is gone, container exit code IS the process exit code
|
||||
- On OSError: let it crash (natural traceback)
|
||||
```
|
||||
|
||||
### Changes to `hermes_cli/main.py`
|
||||
|
||||
#### `_exec_in_container` — rewrite
|
||||
|
||||
Remove:
|
||||
- The entire retry loop (`max_retries`, `for attempt in range(...)`)
|
||||
- Spinner logic (`"Waiting for container..."`, dots)
|
||||
- Exit code classification (125/126/127 handling)
|
||||
- `subprocess.run` for the exec call (keep it only for the sudo probe)
|
||||
- Special TTY vs non-TTY retry counts
|
||||
- The `time` import (no longer needed)
|
||||
|
||||
Change:
|
||||
- Use `os.execvp(exec_cmd[0], exec_cmd)` as the final call
|
||||
- Keep the `subprocess` import only for the sudo probe
|
||||
- Keep TTY detection for the `-it` vs `-i` flag
|
||||
- Keep env var forwarding (TERM, COLORTERM, LANG, LC_ALL)
|
||||
- Keep the sudo probe as-is (it's the one "smart" part)
|
||||
- Bump probe `timeout` from 5s to 15s — cold podman on a loaded machine needs headroom
|
||||
- Catch `subprocess.TimeoutExpired` specifically on both probe calls — print a readable message about the daemon being unresponsive instead of a raw traceback
|
||||
- Expand the sudoers hint error message to explain *why* `-n` (non-interactive) is required: a password prompt would hang the CLI or break piped commands
|
||||
|
||||
The function becomes roughly:
|
||||
|
||||
```python
|
||||
def _exec_in_container(container_info: dict, cli_args: list):
|
||||
"""Replace the current process with a command inside the managed container.
|
||||
|
||||
Probes whether sudo is needed (rootful containers), then os.execvp
|
||||
into the container. If exec fails, the OS error propagates naturally.
|
||||
"""
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
backend = container_info["backend"]
|
||||
container_name = container_info["container_name"]
|
||||
exec_user = container_info["exec_user"]
|
||||
hermes_bin = container_info["hermes_bin"]
|
||||
|
||||
runtime = shutil.which(backend)
|
||||
if not runtime:
|
||||
print(f"Error: {backend} not found on PATH. Cannot route to container.",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Probe whether we need sudo to see the rootful container.
|
||||
# Timeout is 15s — cold podman on a loaded machine can take a while.
|
||||
# TimeoutExpired is caught specifically for a human-readable message;
|
||||
# all other exceptions propagate naturally.
|
||||
needs_sudo = False
|
||||
sudo = None
|
||||
try:
|
||||
probe = subprocess.run(
|
||||
[runtime, "inspect", "--format", "ok", container_name],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
print(
|
||||
f"Error: timed out waiting for {backend} to respond.\n"
|
||||
f"The {backend} daemon may be unresponsive or starting up.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if probe.returncode != 0:
|
||||
sudo = shutil.which("sudo")
|
||||
if sudo:
|
||||
try:
|
||||
probe2 = subprocess.run(
|
||||
[sudo, "-n", runtime, "inspect", "--format", "ok", container_name],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
print(
|
||||
f"Error: timed out waiting for sudo {backend} to respond.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if probe2.returncode == 0:
|
||||
needs_sudo = True
|
||||
else:
|
||||
print(
|
||||
f"Error: container '{container_name}' not found via {backend}.\n"
|
||||
f"\n"
|
||||
f"The NixOS service runs the container as root. Your user cannot\n"
|
||||
f"see it because {backend} uses per-user namespaces.\n"
|
||||
f"\n"
|
||||
f"Fix: grant passwordless sudo for {backend}. The -n (non-interactive)\n"
|
||||
f"flag is required because the CLI calls sudo non-interactively —\n"
|
||||
f"a password prompt would hang or break piped commands:\n"
|
||||
f"\n"
|
||||
f' security.sudo.extraRules = [{{\n'
|
||||
f' users = [ "{os.getenv("USER", "your-user")}" ];\n'
|
||||
f' commands = [{{ command = "{runtime}"; options = [ "NOPASSWD" ]; }}];\n'
|
||||
f' }}];\n'
|
||||
f"\n"
|
||||
f"Or run: sudo hermes {' '.join(cli_args)}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(
|
||||
f"Error: container '{container_name}' not found via {backend}.\n"
|
||||
f"The container may be running under root. Try: sudo hermes {' '.join(cli_args)}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
is_tty = sys.stdin.isatty()
|
||||
tty_flags = ["-it"] if is_tty else ["-i"]
|
||||
|
||||
env_flags = []
|
||||
for var in ("TERM", "COLORTERM", "LANG", "LC_ALL"):
|
||||
val = os.environ.get(var)
|
||||
if val:
|
||||
env_flags.extend(["-e", f"{var}={val}"])
|
||||
|
||||
cmd_prefix = [sudo, "-n", runtime] if needs_sudo else [runtime]
|
||||
exec_cmd = (
|
||||
cmd_prefix + ["exec"]
|
||||
+ tty_flags
|
||||
+ ["-u", exec_user]
|
||||
+ env_flags
|
||||
+ [container_name, hermes_bin]
|
||||
+ cli_args
|
||||
)
|
||||
|
||||
# execvp replaces this process entirely — it never returns on success.
|
||||
# On failure it raises OSError, which propagates naturally.
|
||||
os.execvp(exec_cmd[0], exec_cmd)
|
||||
```
|
||||
|
||||
#### Container routing call site in `main()` — remove try/except
|
||||
|
||||
Current:
|
||||
```python
|
||||
try:
|
||||
from hermes_cli.config import get_container_exec_info
|
||||
container_info = get_container_exec_info()
|
||||
if container_info:
|
||||
_exec_in_container(container_info, sys.argv[1:])
|
||||
sys.exit(1) # exec failed if we reach here
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
pass # Container routing unavailable, proceed locally
|
||||
```
|
||||
|
||||
Revised:
|
||||
```python
|
||||
from hermes_cli.config import get_container_exec_info
|
||||
container_info = get_container_exec_info()
|
||||
if container_info:
|
||||
_exec_in_container(container_info, sys.argv[1:])
|
||||
# Unreachable: os.execvp never returns on success (process is replaced)
|
||||
# and raises OSError on failure (which propagates as a traceback).
|
||||
# This line exists only as a defensive assertion.
|
||||
sys.exit(1)
|
||||
```
|
||||
|
||||
No try/except. If `.container-mode` doesn't exist, `get_container_exec_info()` returns `None` and we skip routing. If it exists but is broken, the exception propagates with a natural traceback.
|
||||
|
||||
Note: `sys.exit(1)` after `_exec_in_container` is dead code in all paths — `os.execvp` either replaces the process or raises. It's kept as a belt-and-suspenders assertion with a comment marking it unreachable, not as actual error handling.
|
||||
|
||||
### Changes to `hermes_cli/config.py`
|
||||
|
||||
#### `get_container_exec_info` — remove inner try/except
|
||||
|
||||
Current code catches `(OSError, IOError)` and returns `None`. This silently hides permission errors, corrupt files, etc.
|
||||
|
||||
Change: Remove the try/except around file reading. Keep the early returns for `HERMES_DEV=1` and `_is_inside_container()`. The `FileNotFoundError` from `open()` when `.container-mode` doesn't exist should still return `None` (this is the "container mode not enabled" case). All other exceptions propagate.
|
||||
|
||||
```python
|
||||
def get_container_exec_info() -> Optional[dict]:
|
||||
if os.environ.get("HERMES_DEV") == "1":
|
||||
return None
|
||||
if _is_inside_container():
|
||||
return None
|
||||
|
||||
container_mode_file = get_hermes_home() / ".container-mode"
|
||||
|
||||
try:
|
||||
with open(container_mode_file, "r") as f:
|
||||
# ... parse key=value lines ...
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
# All other exceptions (PermissionError, malformed data, etc.) propagate
|
||||
|
||||
return { ... }
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Spec: NixOS Module Changes
|
||||
|
||||
### Symlink creation — simplify to two branches
|
||||
|
||||
Current: 4 branches (symlink exists, directory exists, other file, doesn't exist).
|
||||
|
||||
Revised: 2 branches.
|
||||
|
||||
```bash
|
||||
if [ -d "${symlinkPath}" ] && [ ! -L "${symlinkPath}" ]; then
|
||||
# Real directory — back it up, then create symlink
|
||||
_backup="${symlinkPath}.bak.$(date +%s)"
|
||||
echo "hermes-agent: backing up existing ${symlinkPath} to $_backup"
|
||||
mv "${symlinkPath}" "$_backup"
|
||||
fi
|
||||
# For everything else (symlink, doesn't exist, etc.) — just force-create
|
||||
ln -sfn "${target}" "${symlinkPath}"
|
||||
chown -h ${user}:${cfg.group} "${symlinkPath}"
|
||||
```
|
||||
|
||||
`ln -sfn` handles: existing symlink (replaces), doesn't exist (creates), and after the `mv` above (creates). The only case that needs special handling is a real directory, because `ln -sfn` cannot atomically replace a directory.
|
||||
|
||||
Note: there is a theoretical race between the `[ -d ... ]` check and the `mv` (something could create/remove the directory in between). In practice this is a NixOS activation script running as root during `nixos-rebuild switch` — no other process should be touching `~/.hermes` at that moment. Not worth adding locking for.
|
||||
|
||||
### Sudoers — document, don't auto-configure
|
||||
|
||||
Do NOT add `security.sudo.extraRules` to the module. Document the sudoers requirement in the module's description/comments and in the error message the CLI prints when sudo probe fails.
|
||||
|
||||
### Group membership gating — keep as-is
|
||||
|
||||
The fix in 726cf90f (`cfg.container.enable && cfg.container.hostUsers != []`) is correct. Leftover group membership when container mode is disabled is harmless. No cleanup needed.
|
||||
|
||||
---
|
||||
|
||||
## Spec: Test Rewrite
|
||||
|
||||
The existing test file (`tests/hermes_cli/test_container_aware_cli.py`) has 16 tests. With the simplified exec model, several are obsolete.
|
||||
|
||||
### Tests to keep (update as needed)
|
||||
|
||||
- `test_is_inside_container_dockerenv` — unchanged
|
||||
- `test_is_inside_container_containerenv` — unchanged
|
||||
- `test_is_inside_container_cgroup_docker` — unchanged
|
||||
- `test_is_inside_container_false_on_host` — unchanged
|
||||
- `test_get_container_exec_info_returns_metadata` — unchanged
|
||||
- `test_get_container_exec_info_none_inside_container` — unchanged
|
||||
- `test_get_container_exec_info_none_without_file` — unchanged
|
||||
- `test_get_container_exec_info_skipped_when_hermes_dev` — unchanged
|
||||
- `test_get_container_exec_info_not_skipped_when_hermes_dev_zero` — unchanged
|
||||
- `test_get_container_exec_info_defaults` — unchanged
|
||||
- `test_get_container_exec_info_docker_backend` — unchanged
|
||||
|
||||
### Tests to add
|
||||
|
||||
- `test_get_container_exec_info_crashes_on_permission_error` — verify that `PermissionError` propagates (no silent `None` return)
|
||||
- `test_exec_in_container_calls_execvp` — verify `os.execvp` is called with correct args (runtime, tty flags, user, env, container, binary, cli args)
|
||||
- `test_exec_in_container_sudo_probe_sets_prefix` — verify that when first probe fails and sudo probe succeeds, `os.execvp` is called with `sudo -n` prefix
|
||||
- `test_exec_in_container_no_runtime_hard_fails` — keep existing, verify `sys.exit(1)` when `shutil.which` returns None
|
||||
- `test_exec_in_container_non_tty_uses_i_only` — update to check `os.execvp` args instead of `subprocess.run` args
|
||||
- `test_exec_in_container_probe_timeout_prints_message` — verify that `subprocess.TimeoutExpired` from the probe produces a human-readable error and `sys.exit(1)`, not a raw traceback
|
||||
- `test_exec_in_container_container_not_running_no_sudo` — verify the path where runtime exists (`shutil.which` returns a path) but probe returns non-zero and no sudo is available. Should print the "container may be running under root" error. This is distinct from `no_runtime_hard_fails` which covers `shutil.which` returning None.
|
||||
|
||||
### Tests to delete
|
||||
|
||||
- `test_exec_in_container_tty_retries_on_container_failure` — retry loop removed
|
||||
- `test_exec_in_container_non_tty_retries_silently_exits_126` — retry loop removed
|
||||
- `test_exec_in_container_propagates_hermes_exit_code` — no subprocess.run to check exit codes; execvp replaces the process. Note: exit code propagation still works correctly — when `os.execvp` succeeds, the container's process *becomes* this process, so its exit code is the process exit code by OS semantics. No application code needed, no test needed. A comment in the function docstring documents this intent for future readers.
|
||||
|
||||
---
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- Auto-configuring sudoers rules in the NixOS module
|
||||
- Any changes to `get_container_exec_info` parsing logic beyond the try/except narrowing
|
||||
- Changes to `.container-mode` file format
|
||||
- Changes to the `HERMES_DEV=1` bypass
|
||||
- Changes to container detection logic (`_is_inside_container`)
|
||||
150
gateway/run.py
150
gateway/run.py
@@ -3434,6 +3434,9 @@ class GatewayRunner:
|
||||
if canonical == "insights":
|
||||
return await self._handle_insights_command(event)
|
||||
|
||||
if canonical == "workspace":
|
||||
return await self._handle_workspace_command(event)
|
||||
|
||||
if canonical == "reload-mcp":
|
||||
return await self._handle_reload_mcp_command(event)
|
||||
|
||||
@@ -7263,6 +7266,153 @@ class GatewayRunner:
|
||||
logger.error("Insights command error: %s", e, exc_info=True)
|
||||
return f"Error generating insights: {e}"
|
||||
|
||||
async def _handle_workspace_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /workspace command -- status, search, index management.
|
||||
|
||||
Subcommands: status, search <query>, list, retrieve <path>,
|
||||
delete <path>, index, roots [list|add|remove]. Default is status.
|
||||
"""
|
||||
args = event.get_command_args().strip()
|
||||
parts = args.split() if args else []
|
||||
action = parts[0].lower() if parts else "status"
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
def _run():
|
||||
from pathlib import Path as _Path
|
||||
|
||||
from workspace import get_indexer
|
||||
from workspace.config import load_workspace_config
|
||||
|
||||
config = load_workspace_config()
|
||||
if not config.enabled:
|
||||
return "Workspace is disabled (workspace.enabled = false)."
|
||||
|
||||
indexer = get_indexer(config)
|
||||
|
||||
if action == "status":
|
||||
info = indexer.status()
|
||||
if not info:
|
||||
return "No status available."
|
||||
lines = []
|
||||
for k, v in info.items():
|
||||
if k == "db_size_bytes":
|
||||
lines.append(f" {k}: {v / (1024 * 1024):.1f} MB")
|
||||
else:
|
||||
lines.append(f" {k}: {v}")
|
||||
return "\n".join(lines)
|
||||
|
||||
if action == "search":
|
||||
query = " ".join(parts[1:]).strip()
|
||||
if not query:
|
||||
return "Usage: /workspace search <query>"
|
||||
results = indexer.search(query, limit=10)
|
||||
if not results:
|
||||
return "No results found."
|
||||
out = []
|
||||
for r in results:
|
||||
section = f" [{r.section}]" if r.section else ""
|
||||
snippet = r.content[:200].replace("\n", " ")
|
||||
if len(r.content) > 200:
|
||||
snippet += "..."
|
||||
out.append(
|
||||
f"{r.path}:{r.line_start}-{r.line_end} "
|
||||
f"(score: {r.score:.1f}){section}\n {snippet}"
|
||||
)
|
||||
return "\n\n".join(out)
|
||||
|
||||
if action == "list":
|
||||
files = indexer.list_files()
|
||||
if not files:
|
||||
return "No files indexed."
|
||||
lines = [f"{len(files)} indexed files:"]
|
||||
for f in files[:20]:
|
||||
size_kb = f.get("size_bytes", 0) / 1024
|
||||
chunks = f.get("chunks", 0)
|
||||
lines.append(f" {f['path']} ({size_kb:.0f} KB, {chunks} chunks)")
|
||||
if len(files) > 20:
|
||||
lines.append(f" ... and {len(files) - 20} more")
|
||||
return "\n".join(lines)
|
||||
|
||||
if action == "retrieve":
|
||||
if len(parts) < 2:
|
||||
return "Usage: /workspace retrieve <path>"
|
||||
path = str(_Path(parts[1]).expanduser().resolve())
|
||||
results = indexer.retrieve(path)
|
||||
if not results:
|
||||
return f"No indexed chunks for: {path}"
|
||||
lines = [f"{len(results)} chunks for {path}:"]
|
||||
for r in results[:10]:
|
||||
section = f" [{r.section}]" if r.section else ""
|
||||
snippet = r.content[:150].replace("\n", " ")
|
||||
if len(r.content) > 150:
|
||||
snippet += "..."
|
||||
lines.append(
|
||||
f" chunk {r.chunk_index}: lines {r.line_start}-{r.line_end}{section}\n {snippet}"
|
||||
)
|
||||
if len(results) > 10:
|
||||
lines.append(f" ... and {len(results) - 10} more chunks")
|
||||
return "\n".join(lines)
|
||||
|
||||
if action == "delete":
|
||||
if len(parts) < 2:
|
||||
return "Usage: /workspace delete <path>"
|
||||
path = str(_Path(parts[1]).expanduser().resolve())
|
||||
deleted = indexer.delete(path)
|
||||
return f"Deleted from index: {path}" if deleted else f"Not found in index: {path}"
|
||||
|
||||
if action == "index":
|
||||
summary = indexer.index()
|
||||
return (
|
||||
f"Indexed {summary.files_indexed} files "
|
||||
f"({summary.chunks_created} chunks), "
|
||||
f"skipped {summary.files_skipped}, "
|
||||
f"errored {summary.files_errored}, "
|
||||
f"pruned {summary.files_pruned} stale. "
|
||||
f"Took {summary.duration_seconds:.1f}s."
|
||||
)
|
||||
|
||||
if action == "roots":
|
||||
sub = parts[1].lower() if len(parts) > 1 else "list"
|
||||
if sub == "list":
|
||||
roots = config.knowledgebase.roots
|
||||
if not roots:
|
||||
return "No workspace roots configured."
|
||||
return "\n".join(
|
||||
f" {r.path}" + (" (recursive)" if r.recursive else "")
|
||||
for r in roots
|
||||
)
|
||||
if sub == "add":
|
||||
if len(parts) < 3:
|
||||
return "Usage: /workspace roots add <path> [--recursive]"
|
||||
from workspace.commands import _add_root
|
||||
|
||||
root_path = str(_Path(parts[2]).expanduser().resolve())
|
||||
recursive = "--recursive" in parts[3:]
|
||||
_add_root(root_path, recursive)
|
||||
return f"Added workspace root: {root_path} (recursive={recursive})"
|
||||
if sub == "remove":
|
||||
if len(parts) < 3:
|
||||
return "Usage: /workspace roots remove <path>"
|
||||
from workspace.commands import _remove_root
|
||||
|
||||
root_path = str(_Path(parts[2]).expanduser().resolve())
|
||||
_remove_root(root_path)
|
||||
return f"Removed workspace root: {root_path}"
|
||||
return "Usage: /workspace roots [list|add|remove]"
|
||||
|
||||
return (
|
||||
f"Unknown workspace subcommand: {action}\n"
|
||||
"Usage: /workspace [status|search <query>|list|retrieve <path>|"
|
||||
"delete <path>|index|roots ...]"
|
||||
)
|
||||
|
||||
try:
|
||||
return await loop.run_in_executor(None, _run)
|
||||
except Exception as e:
|
||||
logger.error("Workspace command error: %s", e, exc_info=True)
|
||||
return f"Error: {e}"
|
||||
|
||||
async def _handle_reload_mcp_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
@@ -145,6 +145,10 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
|
||||
cli_only=True, args_hint="[connect|disconnect|status]",
|
||||
subcommands=("connect", "disconnect", "status")),
|
||||
CommandDef("workspace", "Workspace status, search, and index management",
|
||||
"Tools & Skills",
|
||||
args_hint="[status|index|list|search|retrieve|delete|roots]",
|
||||
subcommands=("status", "index", "list", "search", "retrieve", "delete", "roots")),
|
||||
CommandDef("plugins", "List installed plugins and their status",
|
||||
"Tools & Skills", cli_only=True),
|
||||
|
||||
|
||||
@@ -818,6 +818,27 @@ DEFAULT_CONFIG = {
|
||||
"force_ipv4": False,
|
||||
},
|
||||
|
||||
# Workspace — local document directory for curated files.
|
||||
"workspace": {
|
||||
"enabled": True,
|
||||
"path": "", # empty = HERMES_HOME/workspace
|
||||
},
|
||||
|
||||
# Knowledgebase — indexing and search configuration for workspace files.
|
||||
"knowledgebase": {
|
||||
"roots": [], # [{path: "/abs/path", recursive: false}]
|
||||
"chunking": {
|
||||
"strategy": "standard", # "standard" | "semantic" | "neural"
|
||||
"chunk_size": 512, # words per chunk
|
||||
},
|
||||
"indexing": {
|
||||
"max_file_mb": 10, # skip files over this size
|
||||
},
|
||||
"search": {
|
||||
"default_limit": 20, # default search result count
|
||||
},
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 19,
|
||||
}
|
||||
|
||||
@@ -19,6 +19,13 @@ Usage:
|
||||
hermes cron status # Check if cron scheduler is running
|
||||
hermes doctor # Check configuration and dependencies
|
||||
hermes honcho setup # Configure Honcho AI memory integration
|
||||
hermes workspace roots list/add/remove # Manage workspace root directories
|
||||
hermes workspace index # Index workspace files
|
||||
hermes workspace search <query> # Search indexed content
|
||||
hermes workspace search <query> --path <prefix> # Search with path filter
|
||||
hermes workspace search <query> --glob <pattern> # Search with glob pattern
|
||||
hermes workspace search <query> --limit <n> # Limit number of results
|
||||
hermes workspace search <query> --human # Human-readable output format
|
||||
hermes honcho status # Show Honcho config and connection status
|
||||
hermes honcho sessions # List directory → session name mappings
|
||||
hermes honcho map <name> # Map current directory to a session name
|
||||
@@ -8345,6 +8352,90 @@ Examples:
|
||||
)
|
||||
logs_parser.set_defaults(func=cmd_logs)
|
||||
|
||||
# =========================================================================
|
||||
# workspace command
|
||||
# =========================================================================
|
||||
workspace_parser = subparsers.add_parser(
|
||||
"workspace",
|
||||
help="Workspace indexing and search",
|
||||
description="Manage workspace roots, index files, search, and inspect the FTS5 index",
|
||||
)
|
||||
workspace_flag_parent = argparse.ArgumentParser(add_help=False)
|
||||
workspace_flag_parent.add_argument(
|
||||
"--human",
|
||||
action="store_true",
|
||||
help="Human-readable Rich output instead of JSON",
|
||||
)
|
||||
workspace_subparsers = workspace_parser.add_subparsers(dest="workspace_action")
|
||||
|
||||
# workspace roots
|
||||
roots_parser = workspace_subparsers.add_parser(
|
||||
"roots",
|
||||
help="Manage workspace roots",
|
||||
parents=[workspace_flag_parent],
|
||||
)
|
||||
roots_sub = roots_parser.add_subparsers(dest="roots_action")
|
||||
roots_sub.add_parser("list", help="List configured workspace roots", parents=[workspace_flag_parent])
|
||||
roots_add = roots_sub.add_parser("add", help="Add a workspace root", parents=[workspace_flag_parent])
|
||||
roots_add.add_argument("path", help="Directory path to add as workspace root")
|
||||
roots_add.add_argument("--recursive", action="store_true", help="Recursively index subdirectories")
|
||||
roots_rm = roots_sub.add_parser("remove", help="Remove a workspace root", parents=[workspace_flag_parent])
|
||||
roots_rm.add_argument("path", help="Directory path to remove")
|
||||
|
||||
# workspace index
|
||||
workspace_subparsers.add_parser(
|
||||
"index",
|
||||
help="Index all workspace roots into FTS5",
|
||||
parents=[workspace_flag_parent],
|
||||
)
|
||||
|
||||
# workspace search
|
||||
ws_search = workspace_subparsers.add_parser(
|
||||
"search",
|
||||
help="Search indexed workspace files",
|
||||
parents=[workspace_flag_parent],
|
||||
)
|
||||
ws_search.add_argument("query", help="Search query")
|
||||
ws_search.add_argument("--limit", type=int, help="Max results")
|
||||
ws_search.add_argument("--path", help="Filter by absolute path prefix")
|
||||
ws_search.add_argument("--glob", help="Filter by filename glob pattern")
|
||||
|
||||
# workspace status
|
||||
workspace_subparsers.add_parser(
|
||||
"status",
|
||||
help="Show workspace index status",
|
||||
parents=[workspace_flag_parent],
|
||||
)
|
||||
|
||||
# workspace list
|
||||
workspace_subparsers.add_parser(
|
||||
"list",
|
||||
help="List all indexed files",
|
||||
parents=[workspace_flag_parent],
|
||||
)
|
||||
|
||||
# workspace retrieve
|
||||
ws_retrieve = workspace_subparsers.add_parser(
|
||||
"retrieve",
|
||||
help="Retrieve all indexed chunks for a file",
|
||||
parents=[workspace_flag_parent],
|
||||
)
|
||||
ws_retrieve.add_argument("path", help="Absolute path to the file")
|
||||
|
||||
# workspace delete
|
||||
ws_delete = workspace_subparsers.add_parser(
|
||||
"delete",
|
||||
help="Delete a file from the workspace index",
|
||||
parents=[workspace_flag_parent],
|
||||
)
|
||||
ws_delete.add_argument("path", help="Absolute path to the file to remove")
|
||||
|
||||
def cmd_workspace(args):
|
||||
from workspace.commands import workspace_command
|
||||
workspace_command(args)
|
||||
|
||||
workspace_parser.set_defaults(func=cmd_workspace)
|
||||
|
||||
# =========================================================================
|
||||
# Parse and execute
|
||||
# =========================================================================
|
||||
|
||||
213
hermes_cli/workspace_slash.py
Normal file
213
hermes_cli/workspace_slash.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""Slash command handler for /workspace in the interactive CLI.
|
||||
|
||||
Parses /workspace [subcommand] [args] and formats output with Rich.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from rich.console import Console
|
||||
|
||||
|
||||
def handle_workspace_slash(cmd: str, console: Optional[Console] = None) -> None:
|
||||
console = console or Console()
|
||||
parts = cmd.strip().split()
|
||||
if parts and parts[0].lower() in ("/workspace", "workspace"):
|
||||
parts = parts[1:]
|
||||
|
||||
if not parts:
|
||||
_print_status(console)
|
||||
return
|
||||
|
||||
action = parts[0].lower()
|
||||
|
||||
if action == "status":
|
||||
_print_status(console)
|
||||
elif action == "index":
|
||||
_print_index(console)
|
||||
elif action == "list":
|
||||
_print_list(console)
|
||||
elif action == "search":
|
||||
query = " ".join(parts[1:]).strip()
|
||||
if not query:
|
||||
console.print("Usage: /workspace search <query>")
|
||||
return
|
||||
_print_search(console, query)
|
||||
elif action == "retrieve":
|
||||
path = parts[1] if len(parts) > 1 else ""
|
||||
if not path:
|
||||
console.print("Usage: /workspace retrieve <path>")
|
||||
return
|
||||
_print_retrieve(console, path)
|
||||
elif action == "delete":
|
||||
path = parts[1] if len(parts) > 1 else ""
|
||||
if not path:
|
||||
console.print("Usage: /workspace delete <path>")
|
||||
return
|
||||
_print_delete(console, path)
|
||||
elif action == "roots":
|
||||
_print_roots(console, parts[1:])
|
||||
else:
|
||||
console.print(
|
||||
"Usage: /workspace [status|index|list|search <query>"
|
||||
"|retrieve <path>|delete <path>|roots ...]"
|
||||
)
|
||||
|
||||
|
||||
def _get_indexer_and_config():
|
||||
from workspace import get_indexer
|
||||
from workspace.config import load_workspace_config
|
||||
|
||||
config = load_workspace_config()
|
||||
if not config.enabled:
|
||||
return None, config
|
||||
return get_indexer(config), config
|
||||
|
||||
|
||||
def _print_status(console: Console) -> None:
|
||||
indexer, config = _get_indexer_and_config()
|
||||
if indexer is None:
|
||||
console.print("[bold red]Workspace is disabled[/]")
|
||||
return
|
||||
info = indexer.status()
|
||||
if not info:
|
||||
console.print("No status available.")
|
||||
return
|
||||
for k, v in info.items():
|
||||
if k == "db_size_bytes":
|
||||
console.print(f" {k}: {v / (1024 * 1024):.1f} MB")
|
||||
else:
|
||||
console.print(f" {k}: {v}")
|
||||
|
||||
|
||||
def _print_search(console: Console, query: str) -> None:
|
||||
indexer, _ = _get_indexer_and_config()
|
||||
if indexer is None:
|
||||
console.print("[bold red]Workspace is disabled[/]")
|
||||
return
|
||||
results = indexer.search(query, limit=20)
|
||||
if not results:
|
||||
console.print("No results found.")
|
||||
return
|
||||
for r in results:
|
||||
section = f" {r.section}" if r.section else ""
|
||||
console.print(
|
||||
f"\n{r.path}:{r.line_start}-{r.line_end} "
|
||||
f"(score: {r.score:.1f}){section}"
|
||||
)
|
||||
snippet = r.content[:200].replace("\n", " ")
|
||||
if len(r.content) > 200:
|
||||
snippet += "..."
|
||||
console.print(f" {snippet}")
|
||||
|
||||
|
||||
def _print_list(console: Console) -> None:
|
||||
indexer, _ = _get_indexer_and_config()
|
||||
if indexer is None:
|
||||
console.print("[bold red]Workspace is disabled[/]")
|
||||
return
|
||||
files = indexer.list_files()
|
||||
if not files:
|
||||
console.print("No files indexed.")
|
||||
return
|
||||
console.print(f"{len(files)} indexed files:\n")
|
||||
for f in files:
|
||||
size_kb = f.get("size_bytes", 0) / 1024
|
||||
chunks = f.get("chunks", 0)
|
||||
console.print(f" {f['path']} ({size_kb:.0f} KB, {chunks} chunks)")
|
||||
|
||||
|
||||
def _print_retrieve(console: Console, raw_path: str) -> None:
|
||||
indexer, _ = _get_indexer_and_config()
|
||||
if indexer is None:
|
||||
console.print("[bold red]Workspace is disabled[/]")
|
||||
return
|
||||
path = str(Path(raw_path).expanduser().resolve())
|
||||
results = indexer.retrieve(path)
|
||||
if not results:
|
||||
console.print(f"No indexed chunks for: {path}")
|
||||
return
|
||||
console.print(f"{len(results)} chunks for {path}:\n")
|
||||
for r in results:
|
||||
section = f" [{r.section}]" if r.section else ""
|
||||
console.print(f" chunk {r.chunk_index}: lines {r.line_start}-{r.line_end}{section}")
|
||||
snippet = r.content[:200].replace("\n", " ")
|
||||
if len(r.content) > 200:
|
||||
snippet += "..."
|
||||
console.print(f" {snippet}\n")
|
||||
|
||||
|
||||
def _print_delete(console: Console, raw_path: str) -> None:
|
||||
indexer, _ = _get_indexer_and_config()
|
||||
if indexer is None:
|
||||
console.print("[bold red]Workspace is disabled[/]")
|
||||
return
|
||||
path = str(Path(raw_path).expanduser().resolve())
|
||||
deleted = indexer.delete(path)
|
||||
if deleted:
|
||||
console.print(f"Deleted from index: {path}")
|
||||
else:
|
||||
console.print(f"Not found in index: {path}")
|
||||
|
||||
|
||||
def _print_index(console: Console) -> None:
|
||||
indexer, _ = _get_indexer_and_config()
|
||||
if indexer is None:
|
||||
console.print("[bold red]Workspace is disabled[/]")
|
||||
return
|
||||
|
||||
def _progress(current: int, total: int, path: str) -> None:
|
||||
name = Path(path).name
|
||||
console.print(f" [{current}/{total}] {name}", highlight=False)
|
||||
|
||||
summary = indexer.index(progress=_progress)
|
||||
console.print(
|
||||
f"\nIndexed {summary.files_indexed} files "
|
||||
f"({summary.chunks_created} chunks), "
|
||||
f"skipped {summary.files_skipped}, "
|
||||
f"errored {summary.files_errored}, "
|
||||
f"pruned {summary.files_pruned} stale. "
|
||||
f"Took {summary.duration_seconds:.1f}s."
|
||||
)
|
||||
if summary.errors:
|
||||
console.print("\n[bold red]Errors:[/]")
|
||||
for err in summary.errors:
|
||||
console.print(f" [{err.stage}] {err.path}: {err.message}")
|
||||
|
||||
|
||||
def _print_roots(console: Console, parts: list[str]) -> None:
|
||||
from workspace.config import load_workspace_config
|
||||
|
||||
if not parts or parts[0].lower() == "list":
|
||||
config = load_workspace_config()
|
||||
roots = config.knowledgebase.roots
|
||||
if not roots:
|
||||
console.print("No workspace roots configured.")
|
||||
return
|
||||
for r in roots:
|
||||
flag = " (recursive)" if r.recursive else ""
|
||||
console.print(f" {r.path}{flag}")
|
||||
return
|
||||
|
||||
action = parts[0].lower()
|
||||
if action == "add":
|
||||
if len(parts) < 2:
|
||||
console.print("Usage: /workspace roots add <path> [--recursive]")
|
||||
return
|
||||
path = str(Path(parts[1]).expanduser().resolve())
|
||||
recursive = "--recursive" in parts[2:]
|
||||
from workspace.commands import _add_root
|
||||
|
||||
_add_root(path, recursive)
|
||||
console.print(f"Added workspace root: {path} (recursive={recursive})")
|
||||
elif action == "remove":
|
||||
if len(parts) < 2:
|
||||
console.print("Usage: /workspace roots remove <path>")
|
||||
return
|
||||
path = str(Path(parts[1]).expanduser().resolve())
|
||||
from workspace.commands import _remove_root
|
||||
|
||||
_remove_root(path)
|
||||
console.print(f"Removed workspace root: {path}")
|
||||
else:
|
||||
console.print("Usage: /workspace roots [list|add|remove]")
|
||||
221
plugins/workspace/__init__.py
Normal file
221
plugins/workspace/__init__.py
Normal file
@@ -0,0 +1,221 @@
|
||||
"""Workspace indexer plugin discovery.
|
||||
|
||||
Scans ``plugins/workspace/<name>/`` directories for indexer plugins.
|
||||
Each subdirectory must contain ``__init__.py`` with a class implementing
|
||||
the BaseIndexer ABC.
|
||||
|
||||
Usage:
|
||||
from plugins.workspace import discover_workspace_indexers, load_workspace_indexer
|
||||
|
||||
available = discover_workspace_indexers()
|
||||
indexer_cls = load_workspace_indexer("witchcraft")
|
||||
"""
|
||||
|
||||
import importlib
|
||||
import importlib.util
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_WORKSPACE_PLUGINS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
def discover_workspace_indexers() -> list[tuple[str, str, bool]]:
|
||||
"""Scan plugins/workspace/ for available indexer plugins.
|
||||
|
||||
Returns list of (name, description, is_available) tuples.
|
||||
Does NOT import the indexers — just reads plugin.yaml for metadata
|
||||
and does a lightweight availability check.
|
||||
"""
|
||||
results: list[tuple[str, str, bool]] = []
|
||||
if not _WORKSPACE_PLUGINS_DIR.is_dir():
|
||||
return results
|
||||
|
||||
for child in sorted(_WORKSPACE_PLUGINS_DIR.iterdir()):
|
||||
if not child.is_dir() or child.name.startswith(("_", ".")):
|
||||
continue
|
||||
init_file = child / "__init__.py"
|
||||
if not init_file.exists():
|
||||
continue
|
||||
|
||||
# Read description from plugin.yaml if available
|
||||
desc = ""
|
||||
yaml_file = child / "plugin.yaml"
|
||||
if yaml_file.exists():
|
||||
try:
|
||||
import yaml
|
||||
|
||||
with open(yaml_file) as f:
|
||||
meta = yaml.safe_load(f) or {}
|
||||
desc = meta.get("description", "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Quick availability check — try loading
|
||||
available = True
|
||||
try:
|
||||
cls = _load_indexer_from_dir(child)
|
||||
available = cls is not None
|
||||
except Exception:
|
||||
available = False
|
||||
|
||||
results.append((child.name, desc, available))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def load_workspace_indexer(name: str) -> Optional[type]:
|
||||
"""Load and return a workspace indexer class by name.
|
||||
|
||||
Returns the class (not an instance) so the caller can instantiate
|
||||
with ``cls(config)``. Returns None if not found or on failure.
|
||||
"""
|
||||
engine_dir = _WORKSPACE_PLUGINS_DIR / name
|
||||
if not engine_dir.is_dir():
|
||||
logger.debug(
|
||||
"Workspace indexer '%s' not found in %s", name, _WORKSPACE_PLUGINS_DIR
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
cls = _load_indexer_from_dir(engine_dir)
|
||||
if cls:
|
||||
return cls
|
||||
logger.warning("Workspace indexer '%s' loaded but no indexer class found", name)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning("Failed to load workspace indexer '%s': %s", name, e)
|
||||
return None
|
||||
|
||||
|
||||
def _load_indexer_from_dir(indexer_dir: Path) -> Optional[type]:
|
||||
"""Import an indexer module and extract the BaseIndexer subclass.
|
||||
|
||||
The module must have either:
|
||||
- A register(ctx) function (plugin-style) — we simulate a ctx
|
||||
- A top-level class that extends BaseIndexer — we return the class
|
||||
"""
|
||||
name = indexer_dir.name
|
||||
module_name = f"plugins.workspace.{name}"
|
||||
init_file = indexer_dir / "__init__.py"
|
||||
|
||||
if not init_file.exists():
|
||||
return None
|
||||
|
||||
# Check if already loaded
|
||||
if module_name in sys.modules:
|
||||
mod = sys.modules[module_name]
|
||||
else:
|
||||
# Handle relative imports within the plugin
|
||||
# First ensure the parent packages are registered
|
||||
for parent in ("plugins", "plugins.workspace"):
|
||||
if parent not in sys.modules:
|
||||
parent_path = Path(__file__).parent
|
||||
if parent == "plugins":
|
||||
parent_path = parent_path.parent
|
||||
parent_init = parent_path / "__init__.py"
|
||||
if parent_init.exists():
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
parent,
|
||||
str(parent_init),
|
||||
submodule_search_locations=[str(parent_path)],
|
||||
)
|
||||
if spec and spec.loader:
|
||||
parent_mod = importlib.util.module_from_spec(spec)
|
||||
sys.modules[parent] = parent_mod
|
||||
try:
|
||||
spec.loader.exec_module(parent_mod)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Now load the indexer module
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
module_name,
|
||||
str(init_file),
|
||||
submodule_search_locations=[str(indexer_dir)],
|
||||
)
|
||||
if not spec or not spec.loader:
|
||||
return None
|
||||
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = mod
|
||||
|
||||
# Register submodules so relative imports work
|
||||
for sub_file in indexer_dir.glob("*.py"):
|
||||
if sub_file.name == "__init__.py":
|
||||
continue
|
||||
sub_name = sub_file.stem
|
||||
full_sub_name = f"{module_name}.{sub_name}"
|
||||
if full_sub_name not in sys.modules:
|
||||
sub_spec = importlib.util.spec_from_file_location(
|
||||
full_sub_name, str(sub_file)
|
||||
)
|
||||
if sub_spec and sub_spec.loader:
|
||||
sub_mod = importlib.util.module_from_spec(sub_spec)
|
||||
sys.modules[full_sub_name] = sub_mod
|
||||
try:
|
||||
sub_spec.loader.exec_module(sub_mod)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Failed to load submodule %s: %s", full_sub_name, e
|
||||
)
|
||||
|
||||
try:
|
||||
spec.loader.exec_module(mod)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to exec_module %s: %s", module_name, e)
|
||||
sys.modules.pop(module_name, None)
|
||||
return None
|
||||
|
||||
# Try register(ctx) pattern first (how plugins are written)
|
||||
if hasattr(mod, "register"):
|
||||
collector = _IndexerCollector()
|
||||
try:
|
||||
mod.register(collector)
|
||||
if collector.indexer_cls:
|
||||
return collector.indexer_cls
|
||||
except Exception as e:
|
||||
logger.debug("register() failed for %s: %s", name, e)
|
||||
|
||||
# Fallback: find a BaseIndexer subclass
|
||||
from workspace.base import BaseIndexer
|
||||
|
||||
for attr_name in dir(mod):
|
||||
attr = getattr(mod, attr_name, None)
|
||||
if (
|
||||
isinstance(attr, type)
|
||||
and issubclass(attr, BaseIndexer)
|
||||
and attr is not BaseIndexer
|
||||
):
|
||||
return attr
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class _IndexerCollector:
|
||||
"""Fake plugin context that captures register_workspace_indexer calls."""
|
||||
|
||||
def __init__(self):
|
||||
self.indexer_cls = None
|
||||
|
||||
def register_workspace_indexer(self, cls):
|
||||
self.indexer_cls = cls
|
||||
|
||||
# No-op for other registration methods
|
||||
def register_tool(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def register_hook(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def register_cli_command(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def register_memory_provider(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def register_context_engine(self, *args, **kwargs):
|
||||
pass
|
||||
244
plugins/workspace/semtools/__init__.py
Normal file
244
plugins/workspace/semtools/__init__.py
Normal file
@@ -0,0 +1,244 @@
|
||||
"""Semtools workspace plugin — semantic search via @llamaindex/semtools.
|
||||
|
||||
semtools is a Rust CLI that does semantic search using model2vec.
|
||||
It auto-indexes files on first search, so index() is mostly a no-op.
|
||||
"""
|
||||
|
||||
import fnmatch
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
from workspace.base import BaseIndexer
|
||||
from workspace.config import WorkspaceConfig
|
||||
from workspace.types import IndexSummary, SearchResult
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SemtoolsIndexer(BaseIndexer):
|
||||
def __init__(self, config: WorkspaceConfig) -> None:
|
||||
self._config = config
|
||||
pc = config.plugin_config
|
||||
self._workspace = pc.get("workspace_name", "hermes")
|
||||
self._top_k = pc.get("top_k", 20)
|
||||
|
||||
def index(self, *, progress=None) -> IndexSummary:
|
||||
"""Discover files but skip actual indexing — semtools auto-indexes on search."""
|
||||
self._ensure_semtools()
|
||||
from workspace.files import discover_workspace_files
|
||||
|
||||
discovery = discover_workspace_files(self._config)
|
||||
return IndexSummary(
|
||||
files_indexed=0,
|
||||
files_skipped=len(discovery.files),
|
||||
files_pruned=0,
|
||||
files_errored=0,
|
||||
chunks_created=0,
|
||||
duration_seconds=0.0,
|
||||
errors=[],
|
||||
errors_truncated=False,
|
||||
)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
limit: int = 20,
|
||||
path_prefix: str | None = None,
|
||||
file_glob: str | None = None,
|
||||
) -> list[SearchResult]:
|
||||
"""Run semtools search against discovered workspace files."""
|
||||
self._ensure_semtools()
|
||||
from workspace.files import discover_workspace_files
|
||||
|
||||
discovery = discover_workspace_files(self._config)
|
||||
files = [str(p) for _, p in discovery.files]
|
||||
|
||||
if path_prefix:
|
||||
files = [f for f in files if f.startswith(path_prefix)]
|
||||
if file_glob:
|
||||
pattern = file_glob if file_glob.startswith("*") else "*" + file_glob
|
||||
files = [f for f in files if fnmatch.fnmatch(f, pattern)]
|
||||
|
||||
if not files:
|
||||
return []
|
||||
|
||||
cmd = [
|
||||
"semtools",
|
||||
"search",
|
||||
query,
|
||||
*files,
|
||||
"--json",
|
||||
"--top-k",
|
||||
str(limit),
|
||||
"--workspace",
|
||||
self._workspace,
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
log.error("semtools search failed: %s", e.stderr)
|
||||
raise RuntimeError(f"semtools search failed: {e.stderr}") from e
|
||||
except FileNotFoundError as e:
|
||||
raise RuntimeError(
|
||||
"semtools binary not found. Install with: npm i -g @llamaindex/semtools"
|
||||
) from e
|
||||
|
||||
return self._parse_results(result.stdout)
|
||||
|
||||
def status(self) -> dict:
|
||||
installed = shutil.which("semtools") is not None
|
||||
info: dict = {
|
||||
"backend": "semtools",
|
||||
"installed": installed,
|
||||
"workspace_name": self._workspace,
|
||||
}
|
||||
if not installed:
|
||||
return info
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["semtools", "workspace", "status", "--json", self._workspace],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
ws_info = json.loads(result.stdout)
|
||||
info["root_dir"] = ws_info.get("root_dir")
|
||||
info["total_documents"] = ws_info.get("total_documents", 0)
|
||||
except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
|
||||
log.debug("semtools workspace status failed: %s", e)
|
||||
return info
|
||||
|
||||
def list_files(self) -> list[dict]:
|
||||
"""List files discoverable under configured roots.
|
||||
|
||||
Semtools auto-indexes on search, so this returns the discovery set
|
||||
that WOULD be indexed rather than what's actually in the embedding store.
|
||||
"""
|
||||
from workspace.files import discover_workspace_files
|
||||
|
||||
discovery = discover_workspace_files(self._config)
|
||||
return [
|
||||
{
|
||||
"path": str(p),
|
||||
"root": str(root),
|
||||
"size_bytes": p.stat().st_size if p.exists() else 0,
|
||||
"chunks": 0,
|
||||
"modified": "",
|
||||
"indexed": "",
|
||||
}
|
||||
for root, p in discovery.files
|
||||
]
|
||||
|
||||
def delete(self, path: str) -> bool:
|
||||
"""Semtools doesn't expose per-file delete; runs workspace prune instead.
|
||||
|
||||
Prune removes stale entries (files that no longer exist on disk).
|
||||
Returns True if the file is gone from disk AND prune succeeded.
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
if Path(path).exists():
|
||||
return False
|
||||
try:
|
||||
subprocess.run(
|
||||
["semtools", "workspace", "prune", self._workspace],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
log.warning("semtools workspace prune failed: %s", e.stderr)
|
||||
return False
|
||||
|
||||
def _ensure_semtools(self) -> None:
|
||||
"""Install semtools if not already present (idempotent)."""
|
||||
if shutil.which("semtools"):
|
||||
return
|
||||
if not shutil.which("npm"):
|
||||
raise RuntimeError(
|
||||
"npm is required to install semtools. Install Node.js first."
|
||||
)
|
||||
try:
|
||||
subprocess.run(
|
||||
["npm", "i", "-g", "@llamaindex/semtools"],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise RuntimeError(f"Failed to install semtools via npm: {e.stderr}") from e
|
||||
if not shutil.which("semtools"):
|
||||
raise RuntimeError(
|
||||
"semtools installed but not found on PATH after npm install"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_results(stdout: str) -> list[SearchResult]:
|
||||
"""Parse semtools JSON output into SearchResult objects.
|
||||
|
||||
semtools outputs::
|
||||
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"filename": "/path/to/file.py",
|
||||
"start_line_number": 0,
|
||||
"end_line_number": 7,
|
||||
"match_line_number": 3,
|
||||
"distance": 0.219,
|
||||
"content": "..."
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
Distance is a dissimilarity metric (lower = better match).
|
||||
We convert to a similarity score: score = 1.0 - distance.
|
||||
Line numbers from semtools are 0-based; we convert to 1-based.
|
||||
"""
|
||||
try:
|
||||
data = json.loads(stdout)
|
||||
except json.JSONDecodeError:
|
||||
log.warning("Failed to parse semtools JSON output")
|
||||
return []
|
||||
|
||||
results_raw = data.get("results", [])
|
||||
results: list[SearchResult] = []
|
||||
|
||||
for i, item in enumerate(results_raw):
|
||||
distance = item.get("distance", 1.0)
|
||||
score = max(0.0, 1.0 - distance)
|
||||
|
||||
start_line = item.get("start_line_number", 0) + 1
|
||||
end_line = item.get("end_line_number", 0) + 1
|
||||
content = item.get("content", "")
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
path=item.get("filename", ""),
|
||||
line_start=start_line,
|
||||
line_end=end_line,
|
||||
section=None,
|
||||
chunk_index=i,
|
||||
score=round(score, 6),
|
||||
tokens=0,
|
||||
modified="",
|
||||
content=content,
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def register(ctx):
|
||||
ctx.register_workspace_indexer(SemtoolsIndexer)
|
||||
@@ -34,6 +34,10 @@ dependencies = [
|
||||
"edge-tts>=7.2.7,<8",
|
||||
# Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
|
||||
"PyJWT[crypto]>=2.12.0,<3", # CVE-2026-32597
|
||||
# Workspace .hermesignore parsing (gitignore-style patterns)
|
||||
"pathspec>=0.12.0,<1",
|
||||
# Workspace encoding detection for non-UTF8 files
|
||||
"charset-normalizer>=3.3.0,<4",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
@@ -64,6 +68,8 @@ sms = ["aiohttp>=3.9.0,<4"]
|
||||
acp = ["agent-client-protocol>=0.9.0,<1.0"]
|
||||
mistral = ["mistralai>=2.3.0,<3"]
|
||||
bedrock = ["boto3>=1.35.0,<2"]
|
||||
workspace = ["chonkie[code]>=1.6.0,<2", "hermes-agent[parsing]"]
|
||||
parsing = ["markitdown[pdf,docx,pptx]>=0.1.0"]
|
||||
termux = [
|
||||
# Tested Android / Termux path: keeps the core CLI feature-rich while
|
||||
# avoiding extras that currently depend on non-Android wheels (notably
|
||||
@@ -126,7 +132,7 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector
|
||||
hermes_cli = ["web_dist/**/*"]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
|
||||
include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "workspace"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
|
||||
@@ -83,6 +83,7 @@ from agent.prompt_builder import (
|
||||
DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
|
||||
MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
|
||||
build_nous_subscription_prompt,
|
||||
build_workspace_guidance,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
fetch_model_metadata,
|
||||
@@ -3733,6 +3734,9 @@ class AIAgent:
|
||||
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
|
||||
if "skill_manage" in self.valid_tool_names:
|
||||
tool_guidance.append(SKILLS_GUIDANCE)
|
||||
workspace_block = build_workspace_guidance(self.valid_tool_names)
|
||||
if workspace_block:
|
||||
tool_guidance.append(workspace_block)
|
||||
if tool_guidance:
|
||||
prompt_parts.append(" ".join(tool_guidance))
|
||||
|
||||
|
||||
@@ -1052,4 +1052,110 @@ class TestOpenAIModelExecutionGuidance:
|
||||
# =========================================================================
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Workspace guidance assembler
|
||||
# =========================================================================
|
||||
|
||||
from agent.prompt_builder import (
|
||||
build_workspace_guidance,
|
||||
WORKSPACE_SEARCH_GUIDANCE_CORE,
|
||||
WORKSPACE_RETRIEVE_GUIDANCE,
|
||||
WORKSPACE_LIST_GUIDANCE,
|
||||
WORKSPACE_INDEX_GUIDANCE,
|
||||
)
|
||||
|
||||
|
||||
class TestWorkspaceGuidance:
|
||||
def test_returns_none_when_search_unavailable(self):
|
||||
"""If workspace_search is not in tools, guidance is not injected at all."""
|
||||
assert build_workspace_guidance(set()) is None
|
||||
assert build_workspace_guidance({"memory", "todo"}) is None
|
||||
|
||||
def test_core_only_when_only_search_available(self):
|
||||
"""With just workspace_search, guidance contains core paragraph only."""
|
||||
out = build_workspace_guidance({"workspace_search"})
|
||||
assert out is not None
|
||||
assert WORKSPACE_SEARCH_GUIDANCE_CORE in out
|
||||
assert WORKSPACE_RETRIEVE_GUIDANCE not in out
|
||||
assert WORKSPACE_LIST_GUIDANCE not in out
|
||||
assert WORKSPACE_INDEX_GUIDANCE not in out
|
||||
|
||||
def test_core_plus_retrieve(self):
|
||||
"""Adds retrieve paragraph when workspace_retrieve is also present."""
|
||||
out = build_workspace_guidance({"workspace_search", "workspace_retrieve"})
|
||||
assert out is not None
|
||||
assert WORKSPACE_SEARCH_GUIDANCE_CORE in out
|
||||
assert WORKSPACE_RETRIEVE_GUIDANCE in out
|
||||
assert WORKSPACE_LIST_GUIDANCE not in out
|
||||
|
||||
def test_core_plus_list(self):
|
||||
out = build_workspace_guidance({"workspace_search", "workspace_list"})
|
||||
assert out is not None
|
||||
assert WORKSPACE_LIST_GUIDANCE in out
|
||||
|
||||
def test_core_plus_index(self):
|
||||
out = build_workspace_guidance({"workspace_search", "workspace_index"})
|
||||
assert out is not None
|
||||
assert WORKSPACE_INDEX_GUIDANCE in out
|
||||
|
||||
def test_all_tools_available(self):
|
||||
"""Full workspace toolset: all paragraphs present in stable order."""
|
||||
out = build_workspace_guidance({
|
||||
"workspace_search", "workspace_retrieve",
|
||||
"workspace_list", "workspace_index", "workspace_delete",
|
||||
})
|
||||
assert out is not None
|
||||
assert WORKSPACE_SEARCH_GUIDANCE_CORE in out
|
||||
assert WORKSPACE_RETRIEVE_GUIDANCE in out
|
||||
assert WORKSPACE_LIST_GUIDANCE in out
|
||||
assert WORKSPACE_INDEX_GUIDANCE in out
|
||||
core_pos = out.index(WORKSPACE_SEARCH_GUIDANCE_CORE)
|
||||
retrieve_pos = out.index(WORKSPACE_RETRIEVE_GUIDANCE)
|
||||
list_pos = out.index(WORKSPACE_LIST_GUIDANCE)
|
||||
index_pos = out.index(WORKSPACE_INDEX_GUIDANCE)
|
||||
assert core_pos < retrieve_pos < list_pos < index_pos
|
||||
|
||||
def test_delete_not_prompted(self):
|
||||
"""workspace_delete is destructive — we do NOT nudge the model toward it."""
|
||||
out = build_workspace_guidance({
|
||||
"workspace_search", "workspace_delete",
|
||||
})
|
||||
assert out is not None
|
||||
assert "delete" not in out.lower()
|
||||
|
||||
def test_output_is_single_string(self):
|
||||
out = build_workspace_guidance({"workspace_search"})
|
||||
assert isinstance(out, str)
|
||||
assert len(out.strip()) > 0
|
||||
|
||||
def test_wiring_into_system_prompt(self):
|
||||
"""End-to-end: the assembler output reaches the system prompt when
|
||||
workspace_search is in valid_tool_names.
|
||||
|
||||
Uses the same tool_guidance collection pattern as _build_system_prompt
|
||||
in run_agent.py so we verify the contract without booting AIAgent.
|
||||
"""
|
||||
from agent.prompt_builder import (
|
||||
MEMORY_GUIDANCE,
|
||||
build_workspace_guidance,
|
||||
)
|
||||
valid_tool_names = {"memory", "workspace_search", "workspace_retrieve"}
|
||||
tool_guidance = []
|
||||
if "memory" in valid_tool_names:
|
||||
tool_guidance.append(MEMORY_GUIDANCE)
|
||||
ws = build_workspace_guidance(valid_tool_names)
|
||||
if ws:
|
||||
tool_guidance.append(ws)
|
||||
combined = " ".join(tool_guidance)
|
||||
assert WORKSPACE_SEARCH_GUIDANCE_CORE in combined
|
||||
assert WORKSPACE_RETRIEVE_GUIDANCE in combined
|
||||
assert MEMORY_GUIDANCE in combined
|
||||
|
||||
def test_wiring_skips_when_workspace_unavailable(self):
|
||||
valid_tool_names = {"memory"}
|
||||
tool_guidance = []
|
||||
ws = build_workspace_guidance(valid_tool_names)
|
||||
if ws:
|
||||
tool_guidance.append(ws)
|
||||
combined = " ".join(tool_guidance)
|
||||
assert WORKSPACE_SEARCH_GUIDANCE_CORE not in combined
|
||||
|
||||
40
tests/workspace/conftest.py
Normal file
40
tests/workspace/conftest.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from workspace.config import KnowledgebaseConfig, WorkspaceConfig
|
||||
from workspace.constants import DEFAULT_IGNORE_PATTERNS
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def make_workspace_config(tmp_path: Path):
|
||||
def _make(raw: dict | None = None) -> WorkspaceConfig:
|
||||
raw = raw or {}
|
||||
hermes_home = tmp_path / "cfg_home"
|
||||
hermes_home.mkdir(exist_ok=True)
|
||||
ws_root = hermes_home / "workspace"
|
||||
ws_raw = raw.get("workspace", {})
|
||||
kb_raw = raw.get("knowledgebase", {})
|
||||
cfg = WorkspaceConfig(
|
||||
enabled=ws_raw.get("enabled", True),
|
||||
workspace_root=ws_root,
|
||||
knowledgebase=KnowledgebaseConfig.model_validate(kb_raw),
|
||||
)
|
||||
cfg.workspace_root.mkdir(parents=True, exist_ok=True)
|
||||
(cfg.workspace_root / ".hermesignore").write_text(
|
||||
DEFAULT_IGNORE_PATTERNS + "\n.hermesignore\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return cfg
|
||||
|
||||
return _make
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def write_file():
|
||||
def _write(path: Path, text: str) -> Path:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(text, encoding="utf-8")
|
||||
return path
|
||||
|
||||
return _write
|
||||
46
tests/workspace/test_base_indexer.py
Normal file
46
tests/workspace/test_base_indexer.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# tests/workspace/test_base_indexer.py
|
||||
"""Tests for BaseIndexer ABC contract."""
|
||||
|
||||
import pytest
|
||||
|
||||
from workspace.base import BaseIndexer
|
||||
|
||||
|
||||
def test_base_indexer_cannot_be_instantiated_directly():
|
||||
with pytest.raises(TypeError, match="abstract"):
|
||||
BaseIndexer(None)
|
||||
|
||||
|
||||
def test_concrete_subclass_must_implement_index_and_search():
|
||||
class Incomplete(BaseIndexer):
|
||||
def __init__(self, config):
|
||||
pass
|
||||
|
||||
with pytest.raises(TypeError, match="abstract"):
|
||||
Incomplete(None)
|
||||
|
||||
|
||||
def test_concrete_subclass_with_both_methods_instantiates():
|
||||
class Complete(BaseIndexer):
|
||||
def __init__(self, config):
|
||||
self._config = config
|
||||
|
||||
def index(self, *, progress=None):
|
||||
from workspace.types import IndexSummary
|
||||
|
||||
return IndexSummary(
|
||||
files_indexed=0,
|
||||
files_skipped=0,
|
||||
files_pruned=0,
|
||||
files_errored=0,
|
||||
chunks_created=0,
|
||||
duration_seconds=0.0,
|
||||
errors=[],
|
||||
errors_truncated=False,
|
||||
)
|
||||
|
||||
def search(self, query, *, limit=20, path_prefix=None, file_glob=None):
|
||||
return []
|
||||
|
||||
indexer = Complete(None)
|
||||
assert indexer.status() == {}
|
||||
125
tests/workspace/test_config_pydantic.py
Normal file
125
tests/workspace/test_config_pydantic.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""Tests for Pydantic config models."""
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from workspace.config import (
|
||||
ChunkingConfig,
|
||||
IndexingConfig,
|
||||
KnowledgebaseConfig,
|
||||
SearchConfig,
|
||||
WorkspaceConfig,
|
||||
load_workspace_config,
|
||||
)
|
||||
|
||||
|
||||
def test_chunking_config_defaults():
|
||||
c = ChunkingConfig()
|
||||
assert c.chunk_size == 512
|
||||
assert c.overlap == 32
|
||||
|
||||
|
||||
def test_chunking_config_clamps_overlap_when_none():
|
||||
c = ChunkingConfig(chunk_size=20)
|
||||
assert c.overlap == 19
|
||||
|
||||
|
||||
def test_chunking_config_explicit_overlap():
|
||||
c = ChunkingConfig(chunk_size=512, overlap=64)
|
||||
assert c.overlap == 64
|
||||
|
||||
|
||||
def test_chunking_config_rejects_zero_chunk_size():
|
||||
with pytest.raises(ValidationError):
|
||||
ChunkingConfig(chunk_size=0)
|
||||
|
||||
|
||||
def test_chunking_config_rejects_negative_chunk_size():
|
||||
with pytest.raises(ValidationError):
|
||||
ChunkingConfig(chunk_size=-1)
|
||||
|
||||
|
||||
def test_chunking_config_rejects_overlap_gte_chunk_size():
|
||||
with pytest.raises(ValidationError):
|
||||
ChunkingConfig(chunk_size=100, overlap=100)
|
||||
|
||||
|
||||
def test_chunking_config_rejects_negative_overlap():
|
||||
with pytest.raises(ValidationError):
|
||||
ChunkingConfig(chunk_size=100, overlap=-1)
|
||||
|
||||
|
||||
def test_indexing_config_rejects_zero():
|
||||
with pytest.raises(ValidationError):
|
||||
IndexingConfig(max_file_mb=0)
|
||||
|
||||
|
||||
def test_search_config_rejects_zero():
|
||||
with pytest.raises(ValidationError):
|
||||
SearchConfig(default_limit=0)
|
||||
|
||||
|
||||
def test_workspace_config_defaults():
|
||||
c = WorkspaceConfig()
|
||||
assert c.enabled is True
|
||||
assert c.indexer == "default"
|
||||
assert c.plugin_config == {}
|
||||
|
||||
|
||||
def test_workspace_config_is_frozen():
|
||||
c = WorkspaceConfig()
|
||||
with pytest.raises(ValidationError):
|
||||
c.enabled = False
|
||||
|
||||
|
||||
def test_knowledgebase_config_from_dict():
|
||||
kb = KnowledgebaseConfig.model_validate(
|
||||
{
|
||||
"roots": [{"path": "/tmp/test", "recursive": True}],
|
||||
"chunking": {"chunk_size": 256},
|
||||
}
|
||||
)
|
||||
assert len(kb.roots) == 1
|
||||
assert kb.roots[0].path == "/tmp/test"
|
||||
assert kb.roots[0].recursive is True
|
||||
assert kb.chunking.chunk_size == 256
|
||||
|
||||
|
||||
def test_deprecated_keys_are_silently_ignored():
|
||||
kb = KnowledgebaseConfig.model_validate(
|
||||
{
|
||||
"chunking": {
|
||||
"strategy": "semantic",
|
||||
"threshold": 0,
|
||||
"chunk_size": 128,
|
||||
}
|
||||
}
|
||||
)
|
||||
assert kb.chunking.chunk_size == 128
|
||||
assert not hasattr(kb.chunking, "strategy")
|
||||
|
||||
|
||||
def test_load_workspace_config_from_raw_dict(tmp_path):
|
||||
raw = {
|
||||
"workspace": {
|
||||
"enabled": True,
|
||||
"path": str(tmp_path / "ws"),
|
||||
"indexer": "witchcraft",
|
||||
"plugin_config": {"db_path": "/tmp/wc"},
|
||||
},
|
||||
"knowledgebase": {
|
||||
"chunking": {"chunk_size": 1024},
|
||||
},
|
||||
}
|
||||
cfg = load_workspace_config(raw)
|
||||
assert cfg.indexer == "witchcraft"
|
||||
assert cfg.plugin_config == {"db_path": "/tmp/wc"}
|
||||
assert cfg.knowledgebase.chunking.chunk_size == 1024
|
||||
assert cfg.workspace_root == (tmp_path / "ws").resolve()
|
||||
|
||||
|
||||
def test_load_workspace_config_default_when_empty():
|
||||
raw = {}
|
||||
cfg = load_workspace_config(raw)
|
||||
assert cfg.enabled is True
|
||||
assert cfg.indexer == "default"
|
||||
83
tests/workspace/test_default_indexer.py
Normal file
83
tests/workspace/test_default_indexer.py
Normal file
@@ -0,0 +1,83 @@
|
||||
"""Tests for DefaultIndexer — verifies it satisfies BaseIndexer contract."""
|
||||
|
||||
from workspace.base import BaseIndexer
|
||||
from workspace.default import DefaultIndexer
|
||||
from workspace.types import IndexSummary, SearchResult
|
||||
|
||||
|
||||
def test_default_indexer_is_base_indexer_subclass():
|
||||
assert issubclass(DefaultIndexer, BaseIndexer)
|
||||
|
||||
|
||||
def test_default_indexer_indexes_and_searches(make_workspace_config, write_file):
|
||||
cfg = make_workspace_config()
|
||||
write_file(
|
||||
cfg.workspace_root / "docs" / "hello.md", "# Hello\n\nWorld of workspace.\n"
|
||||
)
|
||||
|
||||
indexer = DefaultIndexer(cfg)
|
||||
summary = indexer.index()
|
||||
|
||||
assert isinstance(summary, IndexSummary)
|
||||
assert summary.files_indexed == 1
|
||||
assert summary.files_errored == 0
|
||||
|
||||
results = indexer.search("workspace")
|
||||
assert isinstance(results, list)
|
||||
assert len(results) > 0
|
||||
assert all(isinstance(r, SearchResult) for r in results)
|
||||
|
||||
|
||||
def test_default_indexer_search_respects_limit(make_workspace_config, write_file):
|
||||
cfg = make_workspace_config()
|
||||
for i in range(5):
|
||||
write_file(
|
||||
cfg.workspace_root / "docs" / f"doc{i}.md",
|
||||
f"# Doc {i}\n\nThis document talks about testing limit param.\n",
|
||||
)
|
||||
|
||||
indexer = DefaultIndexer(cfg)
|
||||
indexer.index()
|
||||
|
||||
results = indexer.search("document", limit=2)
|
||||
assert len(results) <= 2
|
||||
|
||||
|
||||
def test_default_indexer_status_returns_dict(make_workspace_config, write_file):
|
||||
cfg = make_workspace_config()
|
||||
write_file(cfg.workspace_root / "docs" / "a.md", "# A\n\nContent.\n")
|
||||
|
||||
indexer = DefaultIndexer(cfg)
|
||||
indexer.index()
|
||||
|
||||
status = indexer.status()
|
||||
assert isinstance(status, dict)
|
||||
assert "file_count" in status
|
||||
assert "chunk_count" in status
|
||||
assert "db_path" in status
|
||||
|
||||
|
||||
def test_default_indexer_index_is_idempotent(make_workspace_config, write_file):
|
||||
cfg = make_workspace_config()
|
||||
write_file(cfg.workspace_root / "docs" / "a.md", "# A\n\nContent A.\n")
|
||||
|
||||
indexer = DefaultIndexer(cfg)
|
||||
first = indexer.index()
|
||||
assert first.files_indexed == 1
|
||||
|
||||
second = indexer.index()
|
||||
assert second.files_indexed == 0
|
||||
assert second.files_skipped >= 1
|
||||
|
||||
|
||||
def test_default_indexer_progress_callback(make_workspace_config, write_file):
|
||||
cfg = make_workspace_config()
|
||||
write_file(cfg.workspace_root / "docs" / "a.md", "# A\n\nContent.\n")
|
||||
|
||||
calls = []
|
||||
indexer = DefaultIndexer(cfg)
|
||||
indexer.index(progress=lambda cur, total, path: calls.append((cur, total, path)))
|
||||
|
||||
assert len(calls) > 0
|
||||
assert calls[0][0] == 1
|
||||
assert calls[0][1] >= 1
|
||||
409
tests/workspace/test_indexer_pipeline.py
Normal file
409
tests/workspace/test_indexer_pipeline.py
Normal file
@@ -0,0 +1,409 @@
|
||||
"""End-to-end tests for the Pipeline-based indexer.
|
||||
|
||||
Exercises the behavior the workspace indexer is expected to produce
|
||||
after migrating from manual Chonkie wiring to `chonkie.Pipeline`:
|
||||
|
||||
- Markdown files emit one ChunkRecord per modality (text/code/table/image)
|
||||
with the correct `kind`, and no legacy block_index/src/link/row_count metadata.
|
||||
- Small markdown files with a code block are still split into two records
|
||||
(prose + code) rather than collapsed into a single chunk.
|
||||
- Overlap context is populated and is a suffix of the previous chunk's content.
|
||||
- Deprecated config keys (strategy, threshold) are silently ignored.
|
||||
- Config signature changes cause re-indexing.
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
from workspace.default import DefaultIndexer
|
||||
from workspace.store import SQLiteFTS5Store
|
||||
|
||||
|
||||
def test_markdown_pipeline_emits_clean_metadata_per_modality(
|
||||
make_workspace_config, write_file
|
||||
):
|
||||
cfg = make_workspace_config({"knowledgebase": {"chunking": {"chunk_size": 64}}})
|
||||
md = write_file(
|
||||
cfg.workspace_root / "docs" / "mixed.md",
|
||||
"# Title\n\n"
|
||||
"Intro prose for the markdown pipeline.\n\n"
|
||||
"```python\n"
|
||||
"def first():\n"
|
||||
" return 1\n"
|
||||
"```\n\n"
|
||||
"| Name | Score |\n"
|
||||
"| ---- | ----- |\n"
|
||||
"| A | 10 |\n\n"
|
||||
"\n\n"
|
||||
"## Second\n\n"
|
||||
"More prose.\n",
|
||||
)
|
||||
|
||||
summary = DefaultIndexer(cfg).index()
|
||||
assert summary.files_indexed == 1
|
||||
assert summary.files_errored == 0
|
||||
|
||||
with SQLiteFTS5Store(cfg.workspace_root) as store:
|
||||
rows = store.conn.execute(
|
||||
"SELECT kind, content, chunk_metadata, chunk_index, section, "
|
||||
"start_line, end_line FROM chunks "
|
||||
"WHERE abs_path = ? ORDER BY chunk_index",
|
||||
(str(md.resolve()),),
|
||||
).fetchall()
|
||||
|
||||
kinds = [r["kind"] for r in rows]
|
||||
assert "markdown_text" in kinds
|
||||
assert "markdown_code" in kinds
|
||||
assert "markdown_table" in kinds
|
||||
assert "markdown_image" in kinds
|
||||
|
||||
# chunk_index is 0..N-1, strictly increasing
|
||||
assert [r["chunk_index"] for r in rows] == list(range(len(rows)))
|
||||
|
||||
# Code rows: language present, no block_index
|
||||
code_rows = [r for r in rows if r["kind"] == "markdown_code"]
|
||||
assert code_rows, "expected at least one markdown_code row"
|
||||
for r in code_rows:
|
||||
meta = json.loads(r["chunk_metadata"])
|
||||
assert meta == {"language": "python"}
|
||||
|
||||
# Table rows: no chunk_metadata (NULL)
|
||||
table_rows = [r for r in rows if r["kind"] == "markdown_table"]
|
||||
assert table_rows
|
||||
for r in table_rows:
|
||||
assert r["chunk_metadata"] is None
|
||||
|
||||
# Image rows: content is the alias; no chunk_metadata
|
||||
image_rows = [r for r in rows if r["kind"] == "markdown_image"]
|
||||
assert image_rows
|
||||
for r in image_rows:
|
||||
assert r["content"] == "first image"
|
||||
assert r["chunk_metadata"] is None
|
||||
|
||||
# Section assignment: the "Second" heading affects later rows
|
||||
sections = {r["section"] for r in rows if r["section"]}
|
||||
assert any("Title" in s for s in sections)
|
||||
|
||||
# Line numbers are 1-indexed and ordered
|
||||
assert all(r["start_line"] >= 1 for r in rows)
|
||||
assert all(r["end_line"] >= r["start_line"] for r in rows)
|
||||
|
||||
|
||||
def test_small_markdown_file_is_split_into_modalities(
|
||||
make_workspace_config, write_file
|
||||
):
|
||||
"""Small markdown files with a code block must produce separate records for
|
||||
prose and code. Every file flows through the Pipeline regardless of size;
|
||||
there is no single-chunk short-circuit."""
|
||||
cfg = make_workspace_config({"knowledgebase": {"chunking": {"chunk_size": 512}}})
|
||||
md = write_file(
|
||||
cfg.workspace_root / "docs" / "tiny.md",
|
||||
"# Tiny\n\nShort intro.\n\n```python\nprint('hi')\n```\n",
|
||||
)
|
||||
|
||||
summary = DefaultIndexer(cfg).index()
|
||||
assert summary.files_indexed == 1
|
||||
|
||||
with SQLiteFTS5Store(cfg.workspace_root) as store:
|
||||
rows = store.conn.execute(
|
||||
"SELECT kind, content FROM chunks WHERE abs_path = ? ORDER BY chunk_index",
|
||||
(str(md.resolve()),),
|
||||
).fetchall()
|
||||
|
||||
kinds = [r["kind"] for r in rows]
|
||||
assert "markdown_text" in kinds
|
||||
assert "markdown_code" in kinds
|
||||
assert len(rows) >= 2, f"small markdown must still be multimodal, got {kinds}"
|
||||
|
||||
# Guard against the code fence being accidentally swallowed into the prose
|
||||
# row — `kinds` containing both labels could still false-pass if the
|
||||
# "markdown_text" row itself contained the code block body.
|
||||
text_row = next(r for r in rows if r["kind"] == "markdown_text")
|
||||
assert "print('hi')" not in text_row["content"]
|
||||
|
||||
|
||||
def test_overlap_context_propagates_and_is_prefix_of_next_chunk(
|
||||
make_workspace_config, write_file
|
||||
):
|
||||
"""Multi-chunk prose file: every non-last chunk has non-NULL context,
|
||||
and that context is a prefix of the NEXT chunk's content. Chonkie's
|
||||
OverlapRefinery with method='suffix' in mode='token' attaches the first
|
||||
context_size tokens of chunk N+1 onto chunk N as `context`. FTS indexes
|
||||
this column so a term that only appears at the start of chunk N+1's content
|
||||
is still findable via chunk N's context field.
|
||||
"""
|
||||
sentences = [
|
||||
f"Sentence number {i} carries unique marker token WORD{i:03d}."
|
||||
for i in range(60)
|
||||
]
|
||||
cfg = make_workspace_config(
|
||||
{"knowledgebase": {"chunking": {"chunk_size": 64, "overlap": 8}}}
|
||||
)
|
||||
f = write_file(
|
||||
cfg.workspace_root / "notes" / "long.txt", "\n".join(sentences) + "\n"
|
||||
)
|
||||
|
||||
summary = DefaultIndexer(cfg).index()
|
||||
assert summary.files_indexed == 1
|
||||
|
||||
with SQLiteFTS5Store(cfg.workspace_root) as store:
|
||||
rows = store.conn.execute(
|
||||
"SELECT content, context FROM chunks WHERE abs_path = ? ORDER BY chunk_index",
|
||||
(str(f.resolve()),),
|
||||
).fetchall()
|
||||
|
||||
assert len(rows) >= 2, "fixture must produce multiple chunks"
|
||||
|
||||
non_null_contexts = [r for r in rows if r["context"] is not None]
|
||||
assert len(non_null_contexts) >= 1, "at least one chunk must carry overlap context"
|
||||
|
||||
# For every chunk whose `context` is set, that context must appear at the
|
||||
# START of the NEXT chunk's content (method="suffix" in mode="token" takes
|
||||
# the first N tokens of chunk N+1 and attaches them to chunk N as `context`).
|
||||
for i in range(len(rows) - 1):
|
||||
ctx = rows[i]["context"]
|
||||
if ctx is None:
|
||||
continue
|
||||
next_content = rows[i + 1]["content"]
|
||||
assert ctx.strip() in next_content, (
|
||||
f"chunk {i} context is not a substring of chunk {i + 1} content\n"
|
||||
f" context: {ctx!r}\n next: {next_content!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_deprecated_strategy_and_threshold_keys_are_silently_ignored(
|
||||
make_workspace_config, write_file
|
||||
):
|
||||
"""Old configs that still set `strategy: semantic` or `threshold: 0` must load
|
||||
cleanly after the migration (fields are gone from ChunkingConfig, unknown keys
|
||||
pass through _deep_merge and are dropped by from_dict). No ValueError, no warning
|
||||
suppression hack — just a clean no-op."""
|
||||
cfg = make_workspace_config(
|
||||
{
|
||||
"knowledgebase": {
|
||||
"chunking": {
|
||||
"strategy": "semantic",
|
||||
"threshold": 0,
|
||||
"chunk_size": 128,
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
assert cfg.knowledgebase.chunking.chunk_size == 128
|
||||
assert not hasattr(cfg.knowledgebase.chunking, "strategy")
|
||||
assert not hasattr(cfg.knowledgebase.chunking, "threshold")
|
||||
|
||||
# And indexing works end-to-end with the legacy-keyed config.
|
||||
write_file(cfg.workspace_root / "docs" / "readme.md", "# Hi\n\nSome prose.\n")
|
||||
summary = DefaultIndexer(cfg).index()
|
||||
assert summary.files_indexed == 1
|
||||
assert summary.files_errored == 0
|
||||
|
||||
|
||||
def test_config_signature_change_invalidates_existing_index(
|
||||
make_workspace_config, write_file
|
||||
):
|
||||
"""Changing a field that belongs in the signature (chunk_size) must cause
|
||||
already-indexed files to be re-indexed on the next run rather than skipped.
|
||||
This guards against accidentally dropping a field from _config_signature."""
|
||||
cfg = make_workspace_config({"knowledgebase": {"chunking": {"chunk_size": 512}}})
|
||||
write_file(cfg.workspace_root / "docs" / "a.md", "# A\n\nContent A.\n")
|
||||
|
||||
first = DefaultIndexer(cfg).index()
|
||||
assert first.files_indexed == 1
|
||||
assert first.files_skipped == 0
|
||||
|
||||
# Same config → second run skips.
|
||||
second = DefaultIndexer(cfg).index()
|
||||
assert second.files_indexed == 0
|
||||
assert second.files_skipped == 1
|
||||
|
||||
# Changed chunk_size → third run re-indexes.
|
||||
cfg2 = make_workspace_config({"knowledgebase": {"chunking": {"chunk_size": 256}}})
|
||||
third = DefaultIndexer(cfg2).index()
|
||||
assert third.files_indexed == 1
|
||||
assert third.files_skipped == 0
|
||||
|
||||
|
||||
def test_concurrent_index_does_not_crash(
|
||||
tmp_path: Path, make_workspace_config, write_file
|
||||
):
|
||||
"""Two simultaneous index_workspace() calls against the same workspace must
|
||||
both succeed, and the SQLite DB must pass PRAGMA integrity_check.
|
||||
|
||||
Pre-fix, the second process' sqlite3.connect() had no busy-timeout, so it
|
||||
would fail with `OperationalError: database is locked` the instant the
|
||||
first process held a lock for schema init. The `sqlite3.connect(..., timeout=5.0)`
|
||||
change makes the second process wait for the lock instead of crashing.
|
||||
|
||||
Gap Worker 3's original variant of this test passed before the Pipeline
|
||||
migration because the old `_ChunkerCache` imported chonkie lazily, which
|
||||
added enough startup skew that the race never landed. Eager
|
||||
`_build_pipelines()` removed that skew; this test is the regression guard.
|
||||
"""
|
||||
cfg = make_workspace_config()
|
||||
# Seed a few small markdown files so both runs have something to do.
|
||||
for i in range(5):
|
||||
write_file(
|
||||
cfg.workspace_root / "docs" / f"note_{i}.md",
|
||||
f"# Note {i}\n\nSome content for note {i}.\n",
|
||||
)
|
||||
|
||||
hermes_home = tmp_path / "cfg_home"
|
||||
workspace_root = cfg.workspace_root
|
||||
|
||||
# The project root is 3 levels up from this test file. The spawned
|
||||
# subprocess won't have pytest's conftest setup, so we prepend the project
|
||||
# root to sys.path inside the helper script to make `workspace` importable.
|
||||
project_root = Path(__file__).resolve().parents[2]
|
||||
|
||||
# Write a small helper script that loads a config pointing at the same
|
||||
# workspace_root and runs index_workspace. Using subprocess for clean
|
||||
# process-isolation (fresh interpreter per worker).
|
||||
script = tmp_path / "_run_index.py"
|
||||
script.write_text(
|
||||
textwrap.dedent(
|
||||
f"""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, {str(project_root)!r})
|
||||
|
||||
from workspace.config import WorkspaceConfig
|
||||
from workspace.default import DefaultIndexer
|
||||
|
||||
hermes_home = Path({str(hermes_home)!r})
|
||||
cfg = WorkspaceConfig(workspace_root=hermes_home / "workspace")
|
||||
summary = DefaultIndexer(cfg).index()
|
||||
sys.exit(0)
|
||||
"""
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
p1 = subprocess.Popen([sys.executable, str(script)])
|
||||
p2 = subprocess.Popen([sys.executable, str(script)])
|
||||
rc1 = p1.wait(timeout=120)
|
||||
rc2 = p2.wait(timeout=120)
|
||||
|
||||
assert rc1 == 0, f"first concurrent indexer exited {rc1}"
|
||||
assert rc2 == 0, f"second concurrent indexer exited {rc2}"
|
||||
|
||||
# Both processes survived — now verify the DB is not corrupted.
|
||||
with SQLiteFTS5Store(workspace_root) as store:
|
||||
result = store.conn.execute("PRAGMA integrity_check").fetchone()
|
||||
assert result[0] == "ok", f"PRAGMA integrity_check returned: {result[0]!r}"
|
||||
|
||||
|
||||
def test_search_path_prefix_resolves_symlinks(
|
||||
tmp_path: Path, make_workspace_config, write_file
|
||||
):
|
||||
"""search_workspace must resolve `path_prefix` before handing it to the
|
||||
store. The indexer stores resolved absolute paths (`file_path.resolve()`);
|
||||
the store does a literal byte-prefix match. Callers using the Python API
|
||||
with a symlinked path got silent empty results pre-fix. The CLI handled
|
||||
this in `commands.py:174` — this test guards that the Python API entry
|
||||
now mirrors that behavior.
|
||||
"""
|
||||
real_docs = tmp_path / "real-docs"
|
||||
real_docs.mkdir()
|
||||
write_file(
|
||||
real_docs / "alpha.md", "# Alpha\n\nThe alpha document describes things.\n"
|
||||
)
|
||||
write_file(
|
||||
real_docs / "beta.md", "# Beta\n\nThe beta document explains more things.\n"
|
||||
)
|
||||
|
||||
# Symlink `tmp_path/linked` -> `real-docs`. (Making "workspace/linked" a
|
||||
# sub-path would require first creating a workspace dir — plain `linked`
|
||||
# under tmp_path is enough to exercise the resolver.)
|
||||
linked = tmp_path / "linked"
|
||||
linked.symlink_to(real_docs, target_is_directory=True)
|
||||
|
||||
cfg = make_workspace_config(
|
||||
{"knowledgebase": {"roots": [{"path": str(linked), "recursive": True}]}},
|
||||
)
|
||||
|
||||
summary = DefaultIndexer(cfg).index()
|
||||
assert summary.files_indexed == 2
|
||||
|
||||
# Via the symlink path — must still return hits, because search
|
||||
# resolves path_prefix before the byte-prefix compare.
|
||||
via_symlink = DefaultIndexer(cfg).search(
|
||||
"document",
|
||||
path_prefix=str(linked),
|
||||
)
|
||||
assert len(via_symlink) > 0, "search must resolve symlinked path_prefix"
|
||||
|
||||
# Via the resolved real path — the counts must match.
|
||||
via_resolved = DefaultIndexer(cfg).search(
|
||||
"document",
|
||||
path_prefix=str(real_docs.resolve()),
|
||||
)
|
||||
assert len(via_symlink) == len(via_resolved)
|
||||
|
||||
|
||||
def test_hermesignore_never_indexed(make_workspace_config, write_file):
|
||||
""".hermesignore files are discovery-level infrastructure, not indexable
|
||||
content. make_workspace_config seeds one at the workspace root; writing
|
||||
another one in a subdirectory must also be excluded. Post-fix the filter
|
||||
is hardcoded in `discover_workspace_files`, so this holds regardless of
|
||||
user-edited ignore patterns.
|
||||
"""
|
||||
cfg = make_workspace_config()
|
||||
# Additional .hermesignore in a nested directory.
|
||||
write_file(
|
||||
cfg.workspace_root / "docs" / ".hermesignore",
|
||||
"# nested ignore rules\n*.bak\n",
|
||||
)
|
||||
# Plus a legitimate markdown file so the index has something in it.
|
||||
write_file(cfg.workspace_root / "docs" / "ok.md", "# Ok\n\nSome prose.\n")
|
||||
|
||||
summary = DefaultIndexer(cfg).index()
|
||||
assert summary.files_errored == 0
|
||||
|
||||
with SQLiteFTS5Store(cfg.workspace_root) as store:
|
||||
rows = store.conn.execute(
|
||||
"SELECT abs_path FROM chunks WHERE abs_path LIKE ?",
|
||||
("%.hermesignore",),
|
||||
).fetchall()
|
||||
|
||||
assert rows == [], (
|
||||
f"expected no .hermesignore rows, got: {[r['abs_path'] for r in rows]}"
|
||||
)
|
||||
|
||||
|
||||
def test_summary_reports_filtered_empty_and_oversized(
|
||||
make_workspace_config, write_file
|
||||
):
|
||||
"""Files dropped at discovery (zero-size or over `max_file_mb`) must count
|
||||
toward `files_skipped` in the IndexSummary — otherwise dropped files just
|
||||
vanish from the report and the user has no signal that their config is
|
||||
filtering things out.
|
||||
"""
|
||||
cfg = make_workspace_config(
|
||||
{"knowledgebase": {"indexing": {"max_file_mb": 1}}},
|
||||
)
|
||||
|
||||
# Two zero-byte files.
|
||||
(cfg.workspace_root / "docs" / "empty1.md").parent.mkdir(
|
||||
parents=True, exist_ok=True
|
||||
)
|
||||
(cfg.workspace_root / "docs" / "empty1.md").write_bytes(b"")
|
||||
(cfg.workspace_root / "docs" / "empty2.txt").write_bytes(b"")
|
||||
|
||||
# One oversized file (2 MiB > max_file_mb=1).
|
||||
oversized = cfg.workspace_root / "docs" / "huge.md"
|
||||
oversized.write_bytes(b"a" * (2 * 1024 * 1024))
|
||||
|
||||
# One real file that should be indexed.
|
||||
write_file(cfg.workspace_root / "docs" / "real.md", "# Real\n\nActual content.\n")
|
||||
|
||||
summary = DefaultIndexer(cfg).index()
|
||||
|
||||
assert summary.files_indexed == 1
|
||||
assert summary.files_skipped == 3
|
||||
assert summary.files_errored == 0
|
||||
447
tests/workspace/test_parsers.py
Normal file
447
tests/workspace/test_parsers.py
Normal file
@@ -0,0 +1,447 @@
|
||||
import sys
|
||||
import types
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
try:
|
||||
import chonkie # noqa: F401 — force early import to avoid xdist C-extension race
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
from workspace.constants import BINARY_SUFFIXES, CODE_SUFFIXES, MARKDOWN_SUFFIXES, PARSEABLE_SUFFIXES
|
||||
|
||||
|
||||
class TestParseableSuffixes:
|
||||
def test_contains_expected_extensions(self):
|
||||
assert PARSEABLE_SUFFIXES == frozenset({".pdf", ".docx", ".pptx"})
|
||||
|
||||
def test_is_subset_of_binary_suffixes(self):
|
||||
assert PARSEABLE_SUFFIXES <= BINARY_SUFFIXES
|
||||
|
||||
def test_no_overlap_with_code_suffixes(self):
|
||||
assert PARSEABLE_SUFFIXES & CODE_SUFFIXES == frozenset()
|
||||
|
||||
def test_no_overlap_with_markdown_suffixes(self):
|
||||
assert PARSEABLE_SUFFIXES & MARKDOWN_SUFFIXES == frozenset()
|
||||
|
||||
|
||||
from workspace.config import KnowledgebaseConfig, ParsingConfig
|
||||
|
||||
|
||||
class TestParsingConfig:
|
||||
def test_default_values(self):
|
||||
cfg = ParsingConfig()
|
||||
assert cfg.default == "markitdown"
|
||||
assert cfg.overrides == {}
|
||||
|
||||
def test_custom_default(self):
|
||||
cfg = ParsingConfig(default="pandoc")
|
||||
assert cfg.default == "pandoc"
|
||||
|
||||
def test_per_extension_overrides(self):
|
||||
cfg = ParsingConfig(overrides={".docx": "pandoc"})
|
||||
assert cfg.overrides[".docx"] == "pandoc"
|
||||
|
||||
def test_frozen(self):
|
||||
cfg = ParsingConfig()
|
||||
with pytest.raises(Exception):
|
||||
cfg.default = "pandoc"
|
||||
|
||||
def test_nested_in_knowledgebase_config(self):
|
||||
kb = KnowledgebaseConfig()
|
||||
assert isinstance(kb.parsing, ParsingConfig)
|
||||
assert kb.parsing.default == "markitdown"
|
||||
|
||||
def test_knowledgebase_config_from_raw(self):
|
||||
kb = KnowledgebaseConfig.model_validate(
|
||||
{"parsing": {"default": "pandoc", "overrides": {".docx": "pandoc"}}}
|
||||
)
|
||||
assert kb.parsing.default == "pandoc"
|
||||
assert kb.parsing.overrides == {".docx": "pandoc"}
|
||||
|
||||
def test_unknown_keys_ignored(self):
|
||||
cfg = ParsingConfig.model_validate({"default": "markitdown", "future_key": True})
|
||||
assert cfg.default == "markitdown"
|
||||
assert not hasattr(cfg, "future_key")
|
||||
|
||||
|
||||
class TestFileParserABC:
|
||||
def test_cannot_instantiate_directly(self):
|
||||
from workspace.parsers import FileParser
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
FileParser()
|
||||
|
||||
def test_parse_returns_none_on_exception(self):
|
||||
from workspace.parsers import FileParser
|
||||
|
||||
class ExplodingParser(FileParser):
|
||||
name = "exploding"
|
||||
|
||||
def supported_suffixes(self) -> frozenset[str]:
|
||||
return frozenset({".boom"})
|
||||
|
||||
def _convert(self, path: Path) -> str:
|
||||
raise RuntimeError("kaboom")
|
||||
|
||||
parser = ExplodingParser()
|
||||
result = parser.parse(Path("/fake/file.boom"))
|
||||
assert result is None
|
||||
|
||||
def test_parse_returns_none_on_empty_output(self):
|
||||
from workspace.parsers import FileParser
|
||||
|
||||
class EmptyParser(FileParser):
|
||||
name = "empty"
|
||||
|
||||
def supported_suffixes(self) -> frozenset[str]:
|
||||
return frozenset({".empty"})
|
||||
|
||||
def _convert(self, path: Path) -> str:
|
||||
return " \n \n "
|
||||
|
||||
parser = EmptyParser()
|
||||
result = parser.parse(Path("/fake/file.empty"))
|
||||
assert result is None
|
||||
|
||||
def test_parse_returns_content_on_success(self):
|
||||
from workspace.parsers import FileParser
|
||||
|
||||
class GoodParser(FileParser):
|
||||
name = "good"
|
||||
|
||||
def supported_suffixes(self) -> frozenset[str]:
|
||||
return frozenset({".good"})
|
||||
|
||||
def _convert(self, path: Path) -> str:
|
||||
return "# Converted\n\nContent."
|
||||
|
||||
parser = GoodParser()
|
||||
result = parser.parse(Path("/fake/file.good"))
|
||||
assert result == "# Converted\n\nContent."
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_markitdown():
|
||||
"""Mock the markitdown module regardless of whether it's installed."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.markdown = "# Converted\n\nParsed content from document."
|
||||
mock_instance = MagicMock()
|
||||
mock_instance.convert.return_value = mock_result
|
||||
mock_class = MagicMock(return_value=mock_instance)
|
||||
|
||||
module = types.ModuleType("markitdown")
|
||||
module.MarkItDown = mock_class
|
||||
|
||||
with patch.dict(sys.modules, {"markitdown": module}):
|
||||
yield mock_instance
|
||||
|
||||
|
||||
class TestMarkitdownParser:
|
||||
def test_name(self):
|
||||
from workspace.parsers import MarkitdownParser
|
||||
|
||||
assert MarkitdownParser.name == "markitdown"
|
||||
|
||||
def test_supported_suffixes(self):
|
||||
from workspace.parsers import MarkitdownParser
|
||||
|
||||
suffixes = MarkitdownParser().supported_suffixes()
|
||||
assert ".pdf" in suffixes
|
||||
assert ".docx" in suffixes
|
||||
assert ".pptx" in suffixes
|
||||
|
||||
def test_parse_calls_markitdown_convert(self, tmp_path, mock_markitdown):
|
||||
from workspace.parsers import MarkitdownParser
|
||||
|
||||
pdf = tmp_path / "report.pdf"
|
||||
pdf.write_bytes(b"%PDF-1.4 fake content")
|
||||
|
||||
parser = MarkitdownParser()
|
||||
result = parser.parse(pdf)
|
||||
|
||||
assert result == "# Converted\n\nParsed content from document."
|
||||
mock_markitdown.convert.assert_called_once_with(str(pdf))
|
||||
|
||||
def test_parse_returns_none_when_markitdown_not_installed(self, tmp_path):
|
||||
from workspace.parsers import MarkitdownParser
|
||||
|
||||
pdf = tmp_path / "report.pdf"
|
||||
pdf.write_bytes(b"%PDF-1.4 fake")
|
||||
|
||||
with patch.dict(sys.modules, {"markitdown": None}):
|
||||
parser = MarkitdownParser()
|
||||
result = parser.parse(pdf)
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestPandocParser:
|
||||
def test_name(self):
|
||||
from workspace.parsers import PandocParser
|
||||
|
||||
assert PandocParser.name == "pandoc"
|
||||
|
||||
def test_supported_suffixes(self):
|
||||
from workspace.parsers import PandocParser
|
||||
|
||||
suffixes = PandocParser().supported_suffixes()
|
||||
assert ".pdf" in suffixes
|
||||
assert ".docx" in suffixes
|
||||
assert ".pptx" in suffixes
|
||||
|
||||
def test_parse_calls_pandoc_subprocess(self, tmp_path):
|
||||
from workspace.parsers import PandocParser
|
||||
|
||||
docx = tmp_path / "report.docx"
|
||||
docx.write_bytes(b"PK fake docx content")
|
||||
|
||||
mock_completed = MagicMock()
|
||||
mock_completed.stdout = "# Converted\n\nFrom pandoc."
|
||||
mock_completed.check_returncode = MagicMock()
|
||||
|
||||
with patch("workspace.parsers.subprocess.run", return_value=mock_completed) as mock_run:
|
||||
parser = PandocParser()
|
||||
result = parser.parse(docx)
|
||||
|
||||
assert result == "# Converted\n\nFrom pandoc."
|
||||
mock_run.assert_called_once_with(
|
||||
["pandoc", str(docx), "-t", "markdown"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
)
|
||||
|
||||
def test_parse_returns_none_when_pandoc_not_found(self, tmp_path):
|
||||
from workspace.parsers import PandocParser
|
||||
|
||||
docx = tmp_path / "report.docx"
|
||||
docx.write_bytes(b"PK fake")
|
||||
|
||||
with patch(
|
||||
"workspace.parsers.subprocess.run",
|
||||
side_effect=FileNotFoundError("pandoc not found"),
|
||||
):
|
||||
parser = PandocParser()
|
||||
result = parser.parse(docx)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_parse_returns_none_on_nonzero_exit(self, tmp_path):
|
||||
import subprocess as sp
|
||||
from workspace.parsers import PandocParser
|
||||
|
||||
docx = tmp_path / "report.docx"
|
||||
docx.write_bytes(b"PK fake")
|
||||
|
||||
mock_completed = MagicMock()
|
||||
mock_completed.check_returncode.side_effect = sp.CalledProcessError(1, "pandoc")
|
||||
|
||||
with patch("workspace.parsers.subprocess.run", return_value=mock_completed):
|
||||
parser = PandocParser()
|
||||
result = parser.parse(docx)
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestCompositeParser:
|
||||
def _make_stub_parser(self, name: str, suffixes: frozenset[str], output: str):
|
||||
"""Create a concrete FileParser stub for testing."""
|
||||
from workspace.parsers import FileParser
|
||||
|
||||
class StubParser(FileParser):
|
||||
def supported_suffixes(self) -> frozenset[str]:
|
||||
return suffixes
|
||||
|
||||
def _convert(self, path):
|
||||
return output
|
||||
|
||||
StubParser.name = name
|
||||
return StubParser()
|
||||
|
||||
def test_routes_to_correct_parser(self, tmp_path):
|
||||
from workspace.parsers import CompositeParser
|
||||
|
||||
pdf_parser = self._make_stub_parser("pdf_backend", frozenset({".pdf"}), "# From PDF")
|
||||
docx_parser = self._make_stub_parser("docx_backend", frozenset({".docx"}), "# From DOCX")
|
||||
|
||||
composite = CompositeParser({".pdf": pdf_parser, ".docx": docx_parser})
|
||||
|
||||
pdf = tmp_path / "test.pdf"
|
||||
pdf.write_bytes(b"fake")
|
||||
assert composite.parse(pdf) == "# From PDF"
|
||||
|
||||
docx = tmp_path / "test.docx"
|
||||
docx.write_bytes(b"fake")
|
||||
assert composite.parse(docx) == "# From DOCX"
|
||||
|
||||
def test_returns_none_for_unknown_suffix(self, tmp_path):
|
||||
from workspace.parsers import CompositeParser
|
||||
|
||||
composite = CompositeParser({})
|
||||
txt = tmp_path / "test.txt"
|
||||
txt.write_text("hello")
|
||||
assert composite.parse(txt) is None
|
||||
|
||||
def test_can_parse(self):
|
||||
from workspace.parsers import CompositeParser
|
||||
|
||||
stub = self._make_stub_parser("stub", frozenset({".pdf"}), "content")
|
||||
composite = CompositeParser({".pdf": stub})
|
||||
|
||||
assert composite.can_parse(".pdf") is True
|
||||
assert composite.can_parse(".docx") is False
|
||||
assert composite.can_parse(".txt") is False
|
||||
|
||||
|
||||
class TestBuildParser:
|
||||
def test_default_config_routes_all_parseable_suffixes(self, mock_markitdown):
|
||||
from workspace.config import ParsingConfig
|
||||
from workspace.parsers import build_parser
|
||||
|
||||
composite = build_parser(ParsingConfig())
|
||||
assert composite.can_parse(".pdf")
|
||||
assert composite.can_parse(".docx")
|
||||
assert composite.can_parse(".pptx")
|
||||
assert not composite.can_parse(".txt")
|
||||
|
||||
def test_override_routes_extension_to_different_backend(self, mock_markitdown):
|
||||
from workspace.config import ParsingConfig
|
||||
from workspace.parsers import build_parser
|
||||
|
||||
cfg = ParsingConfig(overrides={".docx": "pandoc"})
|
||||
composite = build_parser(cfg)
|
||||
|
||||
# .docx should be routed to pandoc, not markitdown
|
||||
assert composite._routing[".docx"].name == "pandoc"
|
||||
assert composite._routing[".pdf"].name == "markitdown"
|
||||
|
||||
def test_unknown_backend_name_skips_extension(self, mock_markitdown):
|
||||
from workspace.config import ParsingConfig
|
||||
from workspace.parsers import build_parser
|
||||
|
||||
cfg = ParsingConfig(overrides={".pdf": "nonexistent_backend"})
|
||||
composite = build_parser(cfg)
|
||||
|
||||
assert not composite.can_parse(".pdf")
|
||||
assert composite.can_parse(".docx")
|
||||
|
||||
def test_pandoc_as_default(self, mock_markitdown):
|
||||
from workspace.config import ParsingConfig
|
||||
from workspace.parsers import build_parser
|
||||
|
||||
cfg = ParsingConfig(default="pandoc")
|
||||
composite = build_parser(cfg)
|
||||
|
||||
assert composite._routing[".pdf"].name == "pandoc"
|
||||
assert composite._routing[".docx"].name == "pandoc"
|
||||
assert composite._routing[".pptx"].name == "pandoc"
|
||||
|
||||
|
||||
from workspace.files import discover_workspace_files
|
||||
|
||||
|
||||
class TestDiscoveryWithParseableFiles:
|
||||
def test_pdf_files_are_discovered(self, make_workspace_config):
|
||||
cfg = make_workspace_config()
|
||||
pdf = cfg.workspace_root / "docs" / "report.pdf"
|
||||
pdf.parent.mkdir(parents=True, exist_ok=True)
|
||||
pdf.write_bytes(b"%PDF-1.4 fake content")
|
||||
|
||||
result = discover_workspace_files(cfg)
|
||||
discovered_names = [p.name for _, p in result.files]
|
||||
assert "report.pdf" in discovered_names
|
||||
|
||||
def test_docx_files_are_discovered(self, make_workspace_config):
|
||||
cfg = make_workspace_config()
|
||||
docx = cfg.workspace_root / "docs" / "report.docx"
|
||||
docx.parent.mkdir(parents=True, exist_ok=True)
|
||||
docx.write_bytes(b"PK fake docx")
|
||||
|
||||
result = discover_workspace_files(cfg)
|
||||
discovered_names = [p.name for _, p in result.files]
|
||||
assert "report.docx" in discovered_names
|
||||
|
||||
def test_pptx_files_are_discovered(self, make_workspace_config):
|
||||
cfg = make_workspace_config()
|
||||
pptx = cfg.workspace_root / "docs" / "slides.pptx"
|
||||
pptx.parent.mkdir(parents=True, exist_ok=True)
|
||||
pptx.write_bytes(b"PK fake pptx")
|
||||
|
||||
result = discover_workspace_files(cfg)
|
||||
discovered_names = [p.name for _, p in result.files]
|
||||
assert "slides.pptx" in discovered_names
|
||||
|
||||
def test_true_binaries_still_excluded(self, make_workspace_config):
|
||||
cfg = make_workspace_config()
|
||||
exe = cfg.workspace_root / "docs" / "app.exe"
|
||||
exe.parent.mkdir(parents=True, exist_ok=True)
|
||||
exe.write_bytes(b"MZ fake exe")
|
||||
|
||||
result = discover_workspace_files(cfg)
|
||||
discovered_names = [p.name for _, p in result.files]
|
||||
assert "app.exe" not in discovered_names
|
||||
|
||||
|
||||
class TestDefaultIndexerParsing:
|
||||
def test_indexes_pdf_via_parser(self, make_workspace_config, mock_markitdown):
|
||||
cfg = make_workspace_config()
|
||||
pdf = cfg.workspace_root / "docs" / "report.pdf"
|
||||
pdf.parent.mkdir(parents=True, exist_ok=True)
|
||||
pdf.write_bytes(b"%PDF-1.4 fake content")
|
||||
|
||||
from workspace.default import DefaultIndexer
|
||||
|
||||
indexer = DefaultIndexer(cfg)
|
||||
summary = indexer.index()
|
||||
|
||||
assert summary.files_indexed == 1
|
||||
assert summary.files_errored == 0
|
||||
assert summary.chunks_created >= 1
|
||||
|
||||
def test_parsed_content_is_searchable(self, make_workspace_config, mock_markitdown):
|
||||
cfg = make_workspace_config()
|
||||
pdf = cfg.workspace_root / "docs" / "report.pdf"
|
||||
pdf.parent.mkdir(parents=True, exist_ok=True)
|
||||
pdf.write_bytes(b"%PDF-1.4 fake content")
|
||||
|
||||
from workspace.default import DefaultIndexer
|
||||
|
||||
indexer = DefaultIndexer(cfg)
|
||||
indexer.index()
|
||||
|
||||
results = indexer.search("Parsed content")
|
||||
assert len(results) > 0
|
||||
assert any("report.pdf" in r.path for r in results)
|
||||
|
||||
def test_parse_failure_counts_as_error(self, make_workspace_config, mock_markitdown):
|
||||
cfg = make_workspace_config()
|
||||
pdf = cfg.workspace_root / "docs" / "broken.pdf"
|
||||
pdf.parent.mkdir(parents=True, exist_ok=True)
|
||||
pdf.write_bytes(b"%PDF-1.4 fake")
|
||||
|
||||
mock_markitdown.convert.side_effect = RuntimeError("corrupt PDF")
|
||||
|
||||
from workspace.default import DefaultIndexer
|
||||
|
||||
indexer = DefaultIndexer(cfg)
|
||||
summary = indexer.index()
|
||||
|
||||
assert summary.files_errored == 1
|
||||
assert summary.files_indexed == 0
|
||||
assert any(e.stage == "parse" for e in summary.errors)
|
||||
|
||||
def test_text_files_still_use_read_file_text(self, make_workspace_config, write_file):
|
||||
"""Ensure non-parseable text files still go through the normal path."""
|
||||
cfg = make_workspace_config()
|
||||
write_file(cfg.workspace_root / "docs" / "readme.md", "# Hello\n\nWorld.\n")
|
||||
|
||||
from workspace.default import DefaultIndexer
|
||||
|
||||
indexer = DefaultIndexer(cfg)
|
||||
summary = indexer.index()
|
||||
|
||||
assert summary.files_indexed == 1
|
||||
assert summary.files_errored == 0
|
||||
80
tests/workspace/test_plugin_discovery.py
Normal file
80
tests/workspace/test_plugin_discovery.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""Tests for workspace plugin discovery."""
|
||||
|
||||
import textwrap
|
||||
|
||||
|
||||
def test_load_workspace_indexer_returns_none_for_unknown():
|
||||
from plugins.workspace import load_workspace_indexer
|
||||
|
||||
result = load_workspace_indexer("nonexistent_plugin_xyz")
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_load_workspace_indexer_finds_register_pattern(tmp_path, monkeypatch):
|
||||
plugin_dir = tmp_path / "fake_plugin"
|
||||
plugin_dir.mkdir()
|
||||
(plugin_dir / "__init__.py").write_text(
|
||||
textwrap.dedent("""\
|
||||
from workspace.base import BaseIndexer
|
||||
from workspace.types import IndexSummary
|
||||
|
||||
class FakeIndexer(BaseIndexer):
|
||||
def __init__(self, config):
|
||||
self._config = config
|
||||
def index(self, *, progress=None):
|
||||
return IndexSummary(
|
||||
files_indexed=0, files_skipped=0, files_pruned=0,
|
||||
files_errored=0, chunks_created=0, duration_seconds=0.0,
|
||||
errors=[], errors_truncated=False,
|
||||
)
|
||||
def search(self, query, *, limit=20, path_prefix=None, file_glob=None):
|
||||
return []
|
||||
|
||||
def register(ctx):
|
||||
ctx.register_workspace_indexer(FakeIndexer)
|
||||
"""),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
from plugins.workspace import _load_indexer_from_dir
|
||||
|
||||
cls = _load_indexer_from_dir(plugin_dir)
|
||||
assert cls is not None
|
||||
assert cls.__name__ == "FakeIndexer"
|
||||
|
||||
|
||||
def test_load_workspace_indexer_finds_bare_subclass(tmp_path):
|
||||
plugin_dir = tmp_path / "bare_plugin"
|
||||
plugin_dir.mkdir()
|
||||
(plugin_dir / "__init__.py").write_text(
|
||||
textwrap.dedent("""\
|
||||
from workspace.base import BaseIndexer
|
||||
from workspace.types import IndexSummary
|
||||
|
||||
class BareIndexer(BaseIndexer):
|
||||
def __init__(self, config):
|
||||
self._config = config
|
||||
def index(self, *, progress=None):
|
||||
return IndexSummary(
|
||||
files_indexed=0, files_skipped=0, files_pruned=0,
|
||||
files_errored=0, chunks_created=0, duration_seconds=0.0,
|
||||
errors=[], errors_truncated=False,
|
||||
)
|
||||
def search(self, query, *, limit=20, path_prefix=None, file_glob=None):
|
||||
return []
|
||||
"""),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
from plugins.workspace import _load_indexer_from_dir
|
||||
|
||||
cls = _load_indexer_from_dir(plugin_dir)
|
||||
assert cls is not None
|
||||
assert cls.__name__ == "BareIndexer"
|
||||
|
||||
|
||||
def test_discover_workspace_indexers_returns_list():
|
||||
from plugins.workspace import discover_workspace_indexers
|
||||
|
||||
result = discover_workspace_indexers()
|
||||
assert isinstance(result, list)
|
||||
50
tests/workspace/test_plugin_integration.py
Normal file
50
tests/workspace/test_plugin_integration.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""End-to-end integration tests for workspace plugin architecture."""
|
||||
|
||||
from workspace import get_indexer
|
||||
from workspace.base import BaseIndexer
|
||||
from workspace.config import WorkspaceConfig
|
||||
from workspace.default import DefaultIndexer
|
||||
|
||||
|
||||
def test_get_indexer_returns_default_when_not_configured(make_workspace_config):
|
||||
cfg = make_workspace_config()
|
||||
indexer = get_indexer(cfg)
|
||||
assert isinstance(indexer, DefaultIndexer)
|
||||
assert isinstance(indexer, BaseIndexer)
|
||||
|
||||
|
||||
def test_get_indexer_falls_back_on_unknown_plugin(make_workspace_config):
|
||||
cfg = make_workspace_config()
|
||||
cfg = WorkspaceConfig(
|
||||
workspace_root=cfg.workspace_root,
|
||||
indexer="nonexistent_xyz",
|
||||
)
|
||||
indexer = get_indexer(cfg)
|
||||
assert isinstance(indexer, DefaultIndexer)
|
||||
|
||||
|
||||
def test_full_round_trip_through_factory(make_workspace_config, write_file):
|
||||
cfg = make_workspace_config()
|
||||
write_file(
|
||||
cfg.workspace_root / "docs" / "test.md", "# Test\n\nSearchable content here.\n"
|
||||
)
|
||||
|
||||
indexer = get_indexer(cfg)
|
||||
summary = indexer.index()
|
||||
assert summary.files_indexed == 1
|
||||
|
||||
results = indexer.search("searchable")
|
||||
assert len(results) > 0
|
||||
assert results[0].path.endswith("test.md")
|
||||
|
||||
|
||||
def test_status_works_through_factory(make_workspace_config, write_file):
|
||||
cfg = make_workspace_config()
|
||||
write_file(cfg.workspace_root / "docs" / "a.md", "# A\n\nContent.\n")
|
||||
|
||||
indexer = get_indexer(cfg)
|
||||
indexer.index()
|
||||
|
||||
status = indexer.status()
|
||||
assert status["file_count"] >= 1
|
||||
assert status["chunk_count"] >= 1
|
||||
268
tests/workspace/test_workspace_edge_cases.py
Normal file
268
tests/workspace/test_workspace_edge_cases.py
Normal file
@@ -0,0 +1,268 @@
|
||||
import json
|
||||
import shutil
|
||||
import sqlite3
|
||||
import sys
|
||||
from argparse import Namespace
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from workspace.commands import workspace_command
|
||||
from workspace.default import DefaultIndexer
|
||||
from workspace.store import SQLiteFTS5Store
|
||||
|
||||
|
||||
def test_default_overlap_is_clamped_for_small_chunk_sizes(make_workspace_config):
|
||||
cfg = make_workspace_config(
|
||||
{"knowledgebase": {"chunking": {"chunk_size": 12}}},
|
||||
)
|
||||
# Default overlap is 32, clamped to chunk_size - 1 when chunk_size is smaller.
|
||||
assert cfg.knowledgebase.chunking.overlap == 11
|
||||
|
||||
|
||||
def test_markdown_metadata_has_clean_shape(make_workspace_config, write_file):
|
||||
"""Code rows carry only `language`; tables and images carry no metadata."""
|
||||
cfg = make_workspace_config(
|
||||
{"knowledgebase": {"chunking": {"chunk_size": 64}}},
|
||||
)
|
||||
|
||||
md = write_file(
|
||||
cfg.workspace_root / "docs" / "mixed.md",
|
||||
"""# Title
|
||||
|
||||
Some intro prose to ensure the markdown path is used.
|
||||
|
||||
```python
|
||||
def first_block():
|
||||
return 1
|
||||
```
|
||||
|
||||
| Name | Score |
|
||||
| ---- | ----- |
|
||||
| A | 10 |
|
||||
| B | 20 |
|
||||
|
||||

|
||||
|
||||
## Second
|
||||
|
||||
More prose in the second section.
|
||||
|
||||
```python
|
||||
def second_block():
|
||||
return 2
|
||||
```
|
||||
|
||||
| Lang | Lines |
|
||||
| ---- | ----- |
|
||||
| py | 2 |
|
||||
|
||||

|
||||
""",
|
||||
)
|
||||
|
||||
summary = DefaultIndexer(cfg).index()
|
||||
assert summary.files_indexed == 1
|
||||
assert summary.files_errored == 0
|
||||
|
||||
with SQLiteFTS5Store(cfg.workspace_root) as store:
|
||||
rows = store.conn.execute(
|
||||
"SELECT kind, chunk_metadata, content FROM chunks WHERE abs_path = ? "
|
||||
"ORDER BY start_char",
|
||||
(str(md.resolve()),),
|
||||
).fetchall()
|
||||
|
||||
code_rows = [r for r in rows if r["kind"] == "markdown_code"]
|
||||
table_rows = [r for r in rows if r["kind"] == "markdown_table"]
|
||||
image_rows = [r for r in rows if r["kind"] == "markdown_image"]
|
||||
|
||||
assert len(code_rows) == 2
|
||||
assert len(table_rows) == 2
|
||||
assert len(image_rows) == 2
|
||||
|
||||
# Code: exactly {"language": "python"}, nothing else.
|
||||
for r in code_rows:
|
||||
meta = json.loads(r["chunk_metadata"])
|
||||
assert meta == {"language": "python"}
|
||||
|
||||
# Tables and images have no chunk_metadata.
|
||||
for r in table_rows:
|
||||
assert r["chunk_metadata"] is None
|
||||
for r in image_rows:
|
||||
assert r["chunk_metadata"] is None
|
||||
|
||||
# Image `content` is the alias (the searchable text).
|
||||
assert [r["content"] for r in image_rows] == ["first image", "second image"]
|
||||
|
||||
|
||||
def test_failed_reindex_keeps_previous_committed_rows(
|
||||
make_workspace_config,
|
||||
write_file,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
):
|
||||
cfg = make_workspace_config()
|
||||
|
||||
file_a = write_file(cfg.workspace_root / "docs" / "a.txt", "stable old content\n")
|
||||
file_b = write_file(cfg.workspace_root / "docs" / "b.txt", "other old content\n")
|
||||
|
||||
initial = DefaultIndexer(cfg).index()
|
||||
assert initial.files_indexed == 2
|
||||
|
||||
with SQLiteFTS5Store(cfg.workspace_root) as store:
|
||||
original_record = store.get_file_record(str(file_a.resolve()))
|
||||
assert original_record is not None
|
||||
|
||||
original_insert = SQLiteFTS5Store.insert_chunks
|
||||
target_path = str(file_a.resolve())
|
||||
|
||||
def flaky_insert(self: SQLiteFTS5Store, chunks):
|
||||
if chunks and chunks[0].abs_path == target_path:
|
||||
raise sqlite3.OperationalError("simulated insert failure")
|
||||
return original_insert(self, chunks)
|
||||
|
||||
monkeypatch.setattr(SQLiteFTS5Store, "insert_chunks", flaky_insert)
|
||||
|
||||
file_a.write_text("stable new content that should roll back\n", encoding="utf-8")
|
||||
file_b.write_text("other new content that should succeed\n", encoding="utf-8")
|
||||
|
||||
summary = DefaultIndexer(cfg).index()
|
||||
assert summary.files_indexed == 1
|
||||
assert summary.files_errored == 1
|
||||
|
||||
with SQLiteFTS5Store(cfg.workspace_root) as store:
|
||||
record = store.get_file_record(target_path)
|
||||
assert record is not None
|
||||
assert record.content_hash == original_record.content_hash
|
||||
|
||||
content_rows = store.conn.execute(
|
||||
"SELECT content FROM chunks WHERE abs_path = ? ORDER BY chunk_index",
|
||||
(target_path,),
|
||||
).fetchall()
|
||||
|
||||
assert content_rows
|
||||
combined = "\n".join(row["content"] for row in content_rows)
|
||||
assert "stable old content" in combined
|
||||
assert "stable new content" not in combined
|
||||
|
||||
|
||||
def test_missing_root_skips_stale_prune(
|
||||
tmp_path: Path, make_workspace_config, write_file
|
||||
):
|
||||
external_root = tmp_path / "external"
|
||||
external_root.mkdir()
|
||||
|
||||
cfg = make_workspace_config(
|
||||
{
|
||||
"knowledgebase": {
|
||||
"roots": [{"path": str(external_root), "recursive": True}],
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
local_file = write_file(
|
||||
cfg.workspace_root / "docs" / "local.txt", "local content\n"
|
||||
)
|
||||
external_file = write_file(external_root / "external.txt", "external content\n")
|
||||
|
||||
first = DefaultIndexer(cfg).index()
|
||||
assert first.files_indexed == 2
|
||||
|
||||
shutil.rmtree(external_root)
|
||||
|
||||
second = DefaultIndexer(cfg).index()
|
||||
assert second.files_pruned == 0
|
||||
|
||||
with SQLiteFTS5Store(cfg.workspace_root) as store:
|
||||
indexed = store.all_indexed_paths()
|
||||
|
||||
assert str(local_file.resolve()) in indexed
|
||||
assert str(external_file.resolve()) in indexed
|
||||
|
||||
|
||||
def test_workspace_command_search_disabled_returns_structured_error(
|
||||
make_workspace_config,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
capsys: pytest.CaptureFixture[str],
|
||||
):
|
||||
cfg = make_workspace_config({"workspace": {"enabled": False}})
|
||||
monkeypatch.setattr("workspace.config.load_workspace_config", lambda: cfg)
|
||||
|
||||
args = Namespace(
|
||||
workspace_action="search",
|
||||
query="needle",
|
||||
limit=None,
|
||||
path=None,
|
||||
glob=None,
|
||||
human=False,
|
||||
)
|
||||
|
||||
with pytest.raises(SystemExit) as exc:
|
||||
workspace_command(args)
|
||||
|
||||
assert exc.value.code == 1
|
||||
err = json.loads(capsys.readouterr().err)
|
||||
assert err["error"] == "Workspace is disabled (workspace.enabled = false)"
|
||||
|
||||
|
||||
def test_workspace_command_wraps_unexpected_errors_as_json(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
capsys: pytest.CaptureFixture[str],
|
||||
):
|
||||
monkeypatch.setattr(
|
||||
"workspace.config.load_workspace_config",
|
||||
lambda: (_ for _ in ()).throw(ValueError("bad config")),
|
||||
)
|
||||
|
||||
args = Namespace(
|
||||
workspace_action="search",
|
||||
query="needle",
|
||||
limit=None,
|
||||
path=None,
|
||||
glob=None,
|
||||
human=False,
|
||||
)
|
||||
|
||||
with pytest.raises(SystemExit) as exc:
|
||||
workspace_command(args)
|
||||
|
||||
assert exc.value.code == 1
|
||||
err = json.loads(capsys.readouterr().err)
|
||||
assert err == {"error": "bad config", "error_type": "ValueError"}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("argv", "expected"),
|
||||
[
|
||||
(
|
||||
["hermes", "workspace", "search", "needle", "--human"],
|
||||
("search", None, True),
|
||||
),
|
||||
(["hermes", "workspace", "roots", "list", "--human"], ("roots", "list", True)),
|
||||
],
|
||||
)
|
||||
def test_workspace_human_flag_parses_after_subcommands(
|
||||
argv: list[str],
|
||||
expected: tuple[str, str | None, bool],
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
):
|
||||
from hermes_cli import main as main_mod
|
||||
import workspace.commands as workspace_commands
|
||||
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def fake_workspace_command(args):
|
||||
captured["workspace_action"] = args.workspace_action
|
||||
captured["roots_action"] = getattr(args, "roots_action", None)
|
||||
captured["human"] = getattr(args, "human", False)
|
||||
|
||||
monkeypatch.setattr("hermes_cli.config.get_container_exec_info", lambda: None)
|
||||
monkeypatch.setattr(workspace_commands, "workspace_command", fake_workspace_command)
|
||||
monkeypatch.setattr(sys, "argv", argv)
|
||||
|
||||
main_mod.main()
|
||||
|
||||
assert (
|
||||
captured["workspace_action"],
|
||||
captured["roots_action"],
|
||||
captured["human"],
|
||||
) == expected
|
||||
292
tools/workspace_tools.py
Normal file
292
tools/workspace_tools.py
Normal file
@@ -0,0 +1,292 @@
|
||||
"""Workspace tools — search, index, and manage the workspace knowledgebase.
|
||||
|
||||
Each workspace operation is a separate tool with a focused schema.
|
||||
All tools register under toolset="workspace" and are gated on
|
||||
workspace.enabled in the hermes config.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from tools.registry import registry, tool_error, tool_result
|
||||
|
||||
|
||||
def _check_workspace_enabled() -> bool:
|
||||
try:
|
||||
from workspace.config import load_workspace_config
|
||||
|
||||
return load_workspace_config().enabled
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _get_indexer():
|
||||
from workspace import get_indexer
|
||||
from workspace.config import load_workspace_config
|
||||
|
||||
return get_indexer(load_workspace_config())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# workspace_search
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SEARCH_SCHEMA = {
|
||||
"name": "workspace_search",
|
||||
"description": (
|
||||
"BM25 full-text search across files indexed in the workspace "
|
||||
"knowledgebase. Returns ranked chunks with path, line range, "
|
||||
"score, and content snippet. "
|
||||
"PREFER THIS over terminal grep/find/cat when the user asks "
|
||||
"about indexed code or documentation — it is faster, returns "
|
||||
"ranked results, and avoids scanning the filesystem. Fall back "
|
||||
"to reading files directly only if the search output is "
|
||||
"insufficient for answering."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The search query.",
|
||||
},
|
||||
"path_prefix": {
|
||||
"type": "string",
|
||||
"description": "Filter results to files under this absolute path prefix.",
|
||||
},
|
||||
"file_glob": {
|
||||
"type": "string",
|
||||
"description": "Filename glob filter, e.g. '*.md', '*.py'.",
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"description": "Maximum number of results (default 20).",
|
||||
"default": 20,
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _handle_search(args: dict, **kwargs) -> str:
|
||||
try:
|
||||
from workspace.constants import resolve_path_prefix
|
||||
|
||||
query = args.get("query", "").strip()
|
||||
if not query:
|
||||
return tool_error("query is required")
|
||||
indexer = _get_indexer()
|
||||
results = indexer.search(
|
||||
query,
|
||||
limit=args.get("limit", 20),
|
||||
path_prefix=resolve_path_prefix(args.get("path_prefix")),
|
||||
file_glob=args.get("file_glob"),
|
||||
)
|
||||
return tool_result([r.to_dict() for r in results])
|
||||
except Exception as e:
|
||||
return tool_error(str(e))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# workspace_index
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
INDEX_SCHEMA = {
|
||||
"name": "workspace_index",
|
||||
"description": (
|
||||
"Rebuild the workspace index. Scans all configured roots, "
|
||||
"chunks files, and updates the FTS5 search index. "
|
||||
"This is expensive — only call when files have changed."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _handle_index(args: dict, **kwargs) -> str:
|
||||
try:
|
||||
indexer = _get_indexer()
|
||||
summary = indexer.index()
|
||||
return tool_result(summary.to_dict())
|
||||
except Exception as e:
|
||||
return tool_error(str(e))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# workspace_status
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
STATUS_SCHEMA = {
|
||||
"name": "workspace_status",
|
||||
"description": (
|
||||
"Show workspace index statistics: file count, chunk count, "
|
||||
"database size, and database path."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _handle_status(args: dict, **kwargs) -> str:
|
||||
try:
|
||||
indexer = _get_indexer()
|
||||
return tool_result(indexer.status())
|
||||
except Exception as e:
|
||||
return tool_error(str(e))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# workspace_list
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
LIST_SCHEMA = {
|
||||
"name": "workspace_list",
|
||||
"description": "List all files currently in the workspace index with size and chunk count.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"description": "Maximum number of files to return (default 50).",
|
||||
"default": 50,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _handle_list(args: dict, **kwargs) -> str:
|
||||
try:
|
||||
indexer = _get_indexer()
|
||||
files = indexer.list_files()
|
||||
limit = args.get("limit", 50)
|
||||
return tool_result(files[:limit])
|
||||
except Exception as e:
|
||||
return tool_error(str(e))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# workspace_retrieve
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
RETRIEVE_SCHEMA = {
|
||||
"name": "workspace_retrieve",
|
||||
"description": (
|
||||
"Get all indexed chunks for a specific file by its absolute path. "
|
||||
"Unlike search, this returns every chunk — useful when you know "
|
||||
"which file you want but need its full indexed content."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "Absolute path to the file.",
|
||||
},
|
||||
},
|
||||
"required": ["path"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _handle_retrieve(args: dict, **kwargs) -> str:
|
||||
try:
|
||||
raw_path = args.get("path", "")
|
||||
if not raw_path:
|
||||
return tool_error("path is required")
|
||||
resolved = str(Path(raw_path).expanduser().resolve())
|
||||
indexer = _get_indexer()
|
||||
results = indexer.retrieve(resolved)
|
||||
return tool_result({"path": resolved, "chunks": [r.to_dict() for r in results]})
|
||||
except Exception as e:
|
||||
return tool_error(str(e))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# workspace_delete
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DELETE_SCHEMA = {
|
||||
"name": "workspace_delete",
|
||||
"description": "Remove a file and its chunks from the workspace index.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "Absolute path to the file to remove from the index.",
|
||||
},
|
||||
},
|
||||
"required": ["path"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _handle_delete(args: dict, **kwargs) -> str:
|
||||
try:
|
||||
raw_path = args.get("path", "")
|
||||
if not raw_path:
|
||||
return tool_error("path is required")
|
||||
resolved = str(Path(raw_path).expanduser().resolve())
|
||||
indexer = _get_indexer()
|
||||
deleted = indexer.delete(resolved)
|
||||
return tool_result({"path": resolved, "deleted": deleted})
|
||||
except Exception as e:
|
||||
return tool_error(str(e))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
registry.register(
|
||||
name="workspace_search",
|
||||
toolset="workspace",
|
||||
schema=SEARCH_SCHEMA,
|
||||
handler=_handle_search,
|
||||
check_fn=_check_workspace_enabled,
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="workspace_index",
|
||||
toolset="workspace",
|
||||
schema=INDEX_SCHEMA,
|
||||
handler=_handle_index,
|
||||
check_fn=_check_workspace_enabled,
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="workspace_status",
|
||||
toolset="workspace",
|
||||
schema=STATUS_SCHEMA,
|
||||
handler=_handle_status,
|
||||
check_fn=_check_workspace_enabled,
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="workspace_list",
|
||||
toolset="workspace",
|
||||
schema=LIST_SCHEMA,
|
||||
handler=_handle_list,
|
||||
check_fn=_check_workspace_enabled,
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="workspace_retrieve",
|
||||
toolset="workspace",
|
||||
schema=RETRIEVE_SCHEMA,
|
||||
handler=_handle_retrieve,
|
||||
check_fn=_check_workspace_enabled,
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="workspace_delete",
|
||||
toolset="workspace",
|
||||
schema=DELETE_SCHEMA,
|
||||
handler=_handle_delete,
|
||||
check_fn=_check_workspace_enabled,
|
||||
)
|
||||
12
toolsets.py
12
toolsets.py
@@ -60,6 +60,8 @@ _HERMES_CORE_TOOLS = [
|
||||
"send_message",
|
||||
# Home Assistant smart home control (gated on HASS_TOKEN via check_fn)
|
||||
"ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
|
||||
# Workspace knowledgebase (gated on workspace.enabled via check_fn)
|
||||
"workspace_search", "workspace_index",
|
||||
]
|
||||
|
||||
|
||||
@@ -201,6 +203,15 @@ TOOLSETS = {
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"workspace": {
|
||||
"description": "Workspace knowledgebase — full tool suite for index management",
|
||||
"tools": [
|
||||
"workspace_search", "workspace_index", "workspace_status",
|
||||
"workspace_list", "workspace_retrieve", "workspace_delete",
|
||||
],
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"feishu_doc": {
|
||||
"description": "Read Feishu/Lark document content",
|
||||
"tools": ["feishu_doc_read"],
|
||||
@@ -216,7 +227,6 @@ TOOLSETS = {
|
||||
"includes": []
|
||||
},
|
||||
|
||||
|
||||
# Scenario-specific toolsets
|
||||
|
||||
"debugging": {
|
||||
|
||||
572
uv.lock
generated
572
uv.lock
generated
@@ -2,10 +2,14 @@ version = 1
|
||||
revision = 3
|
||||
requires-python = ">=3.11"
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.14'",
|
||||
"python_full_version == '3.13.*'",
|
||||
"python_full_version == '3.12.*'",
|
||||
"python_full_version < '3.12'",
|
||||
"python_full_version >= '3.14' and sys_platform == 'win32'",
|
||||
"python_full_version >= '3.14' and sys_platform != 'win32'",
|
||||
"python_full_version == '3.13.*' and sys_platform == 'win32'",
|
||||
"python_full_version == '3.13.*' and sys_platform != 'win32'",
|
||||
"python_full_version == '3.12.*' and sys_platform == 'win32'",
|
||||
"python_full_version == '3.12.*' and sys_platform != 'win32'",
|
||||
"python_full_version < '3.12' and sys_platform == 'win32'",
|
||||
"python_full_version < '3.12' and sys_platform != 'win32'",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -426,7 +430,7 @@ wheels = [
|
||||
[[package]]
|
||||
name = "atroposlib"
|
||||
version = "0.4.0"
|
||||
source = { git = "https://github.com/NousResearch/atropos.git#c421582b6f7ce8a32f751aab3117d3824ac8f709" }
|
||||
source = { git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30#c20c85256e5a45ad31edf8b7276e9c5ee1995a30" }
|
||||
dependencies = [
|
||||
{ name = "aiofiles" },
|
||||
{ name = "aiohttp" },
|
||||
@@ -549,6 +553,19 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/4a/45/ec96b29162a402fc4c1c5512d114d7b3787b9d1c2ec241d9568b4816ee23/base58-2.1.1-py3-none-any.whl", hash = "sha256:11a36f4d3ce51dfc1043f3218591ac4eb1ceb172919cebe05b52a5bcc8d245c2", size = 5621, upload-time = "2021-10-30T22:12:16.658Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.14.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "soupsieve" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload-time = "2025-11-30T15:08:26.084Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "blinker"
|
||||
version = "1.9.0"
|
||||
@@ -558,6 +575,34 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "boto3"
|
||||
version = "1.42.91"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "botocore" },
|
||||
{ name = "jmespath" },
|
||||
{ name = "s3transfer" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a7/c0/98b8cec7ca22dde776df48c58940ae1abc425593959b7226e270760d726f/boto3-1.42.91.tar.gz", hash = "sha256:03d70532b17f7f84df37ca7e8c21553280454dea53ae12b15d1cfef9b16fcb8a", size = 113181, upload-time = "2026-04-17T19:31:06.251Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/02/29/faba6521257c34085cc9b439ef98235b581772580f417fa3629728007270/boto3-1.42.91-py3-none-any.whl", hash = "sha256:04e72071cde022951ce7f81bd9933c90095ab8923e8ced61c8dacfe9edac0f5c", size = 140553, upload-time = "2026-04-17T19:31:02.57Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "botocore"
|
||||
version = "1.42.91"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "jmespath" },
|
||||
{ name = "python-dateutil" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/21/bc/a4b7c46471c2e789ad8c4c7acfd7f302fdb481d93ff870f441249b924ae6/botocore-1.42.91.tar.gz", hash = "sha256:d252e27bc454afdbf5ed3dc617aa423f2c855c081e98b7963093399483ecc698", size = 15213010, upload-time = "2026-04-17T19:30:50.793Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b1/fc/24cc0a47c824f13933e210e9ad034b4fba22f7185b8d904c0fbf5a3b2be8/botocore-1.42.91-py3-none-any.whl", hash = "sha256:7a28c3cc6bfab5724ad18899d52402b776a0de7d87fa20c3c5270bcaaf199ce8", size = 14897344, upload-time = "2026-04-17T19:30:44.245Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cachetools"
|
||||
version = "5.5.2"
|
||||
@@ -756,6 +801,65 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "chonkie"
|
||||
version = "1.6.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "chonkie-core" },
|
||||
{ name = "httpx" },
|
||||
{ name = "numpy" },
|
||||
{ name = "tenacity" },
|
||||
{ name = "tqdm" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/39/92/2b4c37a6c6d64f03b4520aadc45e2a69685b7e24ea9b1335d2e5a15c58f2/chonkie-1.6.2.tar.gz", hash = "sha256:e7b39c449ef04ffe7e42121eee159a385bfa34edcadcab003c92f93d858db4ba", size = 188510, upload-time = "2026-04-07T01:22:15.362Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ee/da/701756dca71489b12309e7459f2778e28fae8e1b4facdf738c4fc896efd9/chonkie-1.6.2-py3-none-any.whl", hash = "sha256:46e438c410e74208c002dbba13d8d7eb4c23a316b8cf3cb0cf1ea1a922b8819d", size = 235054, upload-time = "2026-04-07T01:22:13.651Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
code = [
|
||||
{ name = "magika", version = "0.6.2", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'win32'" },
|
||||
{ name = "magika", version = "0.6.3", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'win32'" },
|
||||
{ name = "tree-sitter" },
|
||||
{ name = "tree-sitter-language-pack" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "chonkie-core"
|
||||
version = "0.10.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "numpy" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d2/63/f351f5b6b1ecb17cbd92e482b1ae346b8abaeb105a4748eceae80bd04fa9/chonkie_core-0.10.1.tar.gz", hash = "sha256:cbca989c32c983ee3f7f899494b8efdaa32796429675705cf9c37b27f9dd8a46", size = 53359, upload-time = "2026-03-30T06:18:04.156Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/21/f6/4c1d41559ed8fd1f8d38657a06de12a36deb41ded858686391e7ad857047/chonkie_core-0.10.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:caca851909a1a002a3ae4aa24759a192a89848015debd95d572f38459fea7aee", size = 354831, upload-time = "2026-03-30T06:17:29.08Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/49/fe/b282f9db7835f3e94a78cd53c8f1f60fe2b3ccc27cdb0ae8340ee4983801/chonkie_core-0.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c563b3bebd00570a7f3d8e45a38cb72269633854ded63aee0785d7be437af40b", size = 342275, upload-time = "2026-03-30T06:17:30.461Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/79/62b0147b8e4c2c590b988c663fba0fed0d28663e6c123465711a0a488d60/chonkie_core-0.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36c52bcc69cf353c1a92edd09e06b42bad1b5d2cac03ae4299c0e2ad2102c60b", size = 377074, upload-time = "2026-03-30T06:17:31.537Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/53/d0/e49e62ff577ed632148701c5087ca066d6f1cd0295b5ab90561ebb88cfcd/chonkie_core-0.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e519e26ddefa4cafeea56cacf8710598e553a731850d01b065d6517f07bb92cf", size = 394795, upload-time = "2026-03-30T06:17:33.008Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d2/60/18491e23f8737d1ade2316b27b44152026c8177e21e38d84d7a0bed44f7e/chonkie_core-0.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:833dd44a170084b5a02d5857846ca8a7ca63fea8d16d1b1d4a533f91262942e1", size = 231219, upload-time = "2026-03-30T06:17:33.957Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b9/10/c30d5e690dfd09783eb6c1bfec54ec56e1adb361305f46129de777efb674/chonkie_core-0.10.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1ca877e793b7ed473742617ce7e1037f51c4f46006c55ba2381318efb0116c2b", size = 352863, upload-time = "2026-03-30T06:17:35.35Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/15/7d/bffd4948dc65a7fb81b02e283e67529ff8c23b61c31d4aeab28a2fd49fd2/chonkie_core-0.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a2704f225814ac4cf442d8c0f8232cf9eeded1ccea79cf52cafe303efdb9c06c", size = 339890, upload-time = "2026-03-30T06:17:36.851Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ba/54/9ef76613793ac8bd102b4153502e0f5debcdea4e58f49a1c802b38270e6f/chonkie_core-0.10.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971818ff0cabf153d45e987b23f2a15c3b04befbc04825498a0a99afb9ddb3b5", size = 373163, upload-time = "2026-03-30T06:17:38.216Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/82/45/849e1b6bd571008a55b23e3454a353fdba4ceb430dc30a3289df6145f7be/chonkie_core-0.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da1c35265cbe4829be1475eee7776c789db0957226d30c815f3b32ac51d987f1", size = 392321, upload-time = "2026-03-30T06:17:39.329Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b0/46/6ece98af607e4923bea35ff33e7e5066bea0d01540e4b29b542cff1bd784/chonkie_core-0.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:6f3ee0368343b42e684c98b71257f0cd8f01d630a5cb38aff3c7d74ab3493dcc", size = 229079, upload-time = "2026-03-30T06:17:40.612Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/1d/5aef85721794f9ae7f6c72d51fa642ce6d869bd74f2ad53462419a0045da/chonkie_core-0.10.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:9eb3fce4f30e6c1d6b2bea9339919030e627e34af7c8862ce2a779b85f59de59", size = 352346, upload-time = "2026-03-30T06:17:41.996Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b2/5e/5a38dd0131f8f4732a22fd87c6c016bb3411d694c88d78237eec53cd8b65/chonkie_core-0.10.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d28b7dc9bbc7eaca7bf0e92bcccfe315f34b6fb105a092c20c40b76ccefdf367", size = 339606, upload-time = "2026-03-30T06:17:43.475Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/98/c5/c082a71cb4f51c2e436c1ed9418b901f8563e56dfa59de764fd557c86452/chonkie_core-0.10.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05354f92ce5d689fc93a7a94dd8d92a2ca1a1d00165c668733da3e62530ad33f", size = 372753, upload-time = "2026-03-30T06:17:44.543Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/19/64/57f291744eee0294cd03a7670bb3766af245fa4120c1f94e2e51bead5aca/chonkie_core-0.10.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:171a3c510e7232a8737fc040cac52bc71eb17fae8f7ab0e27e2886131e741b4e", size = 391609, upload-time = "2026-03-30T06:17:46.125Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/79/5b/73e4c8853c481530e5f97b753f95e0d62daad4928f824db6075417d0abae/chonkie_core-0.10.1-cp313-cp313-win_amd64.whl", hash = "sha256:3a14c1ffd83fd009d5908571fa366a7ef9e38e103237b323143a0abcc8c270c6", size = 229372, upload-time = "2026-03-30T06:17:47.492Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cd/7a/516ad80dfafbcd4e8757c0b6b82d509d6a23deef63a48476ff8584679e1c/chonkie_core-0.10.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a344b7fe0de01121b765f9a2e86b09e34fad8c9127d25680b530d2be8c963ff0", size = 372403, upload-time = "2026-03-30T06:17:48.774Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/67/205616f31a50bd2987eff7a4a8de6b78dfc2ccf61529bbcf99dea2d2e2fb/chonkie_core-0.10.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:872adfb8440176570c83b495fea637a636368ce940939d55373b1b62be852557", size = 352811, upload-time = "2026-03-30T06:17:50.077Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0b/62/bfa64d61a0e4f9922b2b649bbdf28a0ce704f81821ce289ce878842f5078/chonkie_core-0.10.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0dd326652c536068e70f2c89c0c88d0253a4b9218dc7cea9988963a3c2bd078b", size = 339713, upload-time = "2026-03-30T06:17:51.101Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/98/c5c5b8dbd4f0e3cedca21f5343012ecb0e9c56dbd46fa0811750401bda87/chonkie_core-0.10.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7ecabea9f4330e83d4ea15234cc14f5f89d4fa60adcbcbee8e9e41878d4e9f2", size = 372897, upload-time = "2026-03-30T06:17:52.527Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2d/b9/26273c62d4c8dc1d463f10bd31db2aecf96a8d5e0af09e02d58149ef0af2/chonkie_core-0.10.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14f2ae30ca7505de5e36ed0ba7af3644fb6d27cb1e38723dcb2ebd4b22b3e2ff", size = 391219, upload-time = "2026-03-30T06:17:53.705Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e7/16/0f2b690c04cb29e6d8a959eddef40743d405dc4da442f53cbe24f971b8c3/chonkie_core-0.10.1-cp314-cp314-win_amd64.whl", hash = "sha256:13e021b4fae2f2dda227ff349ec6eafe58bf308fd28bc65cdc840d31926f9e59", size = 229014, upload-time = "2026-03-30T06:17:54.774Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/71/2a/891bc6425a0b30687e58356cdaaa2cd5c58ace46f039e1c8588d246128ed/chonkie_core-0.10.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fb6a0efbd2fefe793a9a72db849c380366a46e6ef4b7bff0b0fe7a065d2bf68", size = 371819, upload-time = "2026-03-30T06:17:55.831Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/29/00/f2c1382e1a2c8fa89527123174536292da48d37942e5ac455b76cbb1daaa/chonkie_core-0.10.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f028f269eec4330cef7197315f68fb581dd2e6265e4bd11e3a3a4c7e4c38c84", size = 378685, upload-time = "2026-03-30T06:18:02.066Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9c/80/2184deed173da611c40ac215624bc7ccc2c4624c78b82d1fd4659060f3be/chonkie_core-0.10.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13c373138a28c72b150001c620fc6ccb3e13506fc610f2c3a74d3052c201640b", size = 396226, upload-time = "2026-03-30T06:18:03.194Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "click"
|
||||
version = "8.3.1"
|
||||
@@ -777,6 +881,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cobble"
|
||||
version = "0.1.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/54/7a/a507c709be2c96e1bb6102eb7b7f4026c5e5e223ef7d745a17d239e9d844/cobble-0.1.4.tar.gz", hash = "sha256:de38be1539992c8a06e569630717c485a5f91be2192c461ea2b220607dfa78aa", size = 3805, upload-time = "2024-06-01T18:11:09.528Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/e1/3714a2f371985215c219c2a70953d38e3eed81ef165aed061d21de0e998b/cobble-0.1.4-py3-none-any.whl", hash = "sha256:36c91b1655e599fd428e2b95fdd5f0da1ca2e9f1abb0bc871dec21a0e78a2b44", size = 3984, upload-time = "2024-06-01T18:11:07.911Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
@@ -1228,6 +1341,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e0/c3/7f67dea8ccf8fdcb9c99033bbe3e90b9e7395415843accb81428c441be2d/debugpy-1.8.20-py2.py3-none-any.whl", hash = "sha256:5be9bed9ae3be00665a06acaa48f8329d2b9632f15fd09f6a9a8c8d9907e54d7", size = 5337658, upload-time = "2026-01-29T23:04:17.404Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "defusedxml"
|
||||
version = "0.7.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deprecated"
|
||||
version = "1.3.1"
|
||||
@@ -1838,10 +1960,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "hermes-agent"
|
||||
version = "0.9.0"
|
||||
version = "0.10.0"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "anthropic" },
|
||||
{ name = "charset-normalizer" },
|
||||
{ name = "edge-tts" },
|
||||
{ name = "exa-py" },
|
||||
{ name = "fal-client" },
|
||||
@@ -1851,6 +1974,7 @@ dependencies = [
|
||||
{ name = "jinja2" },
|
||||
{ name = "openai" },
|
||||
{ name = "parallel-web" },
|
||||
{ name = "pathspec" },
|
||||
{ name = "prompt-toolkit" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "pyjwt", extra = ["crypto"] },
|
||||
@@ -1871,6 +1995,7 @@ all = [
|
||||
{ name = "aiosqlite", marker = "sys_platform == 'linux'" },
|
||||
{ name = "alibabacloud-dingtalk" },
|
||||
{ name = "asyncpg", marker = "sys_platform == 'linux'" },
|
||||
{ name = "boto3" },
|
||||
{ name = "croniter" },
|
||||
{ name = "daytona" },
|
||||
{ name = "debugpy" },
|
||||
@@ -1893,12 +2018,16 @@ all = [
|
||||
{ name = "pytest-xdist" },
|
||||
{ name = "python-telegram-bot", extra = ["webhooks"] },
|
||||
{ name = "pywinpty", marker = "sys_platform == 'win32'" },
|
||||
{ name = "qrcode" },
|
||||
{ name = "simple-term-menu" },
|
||||
{ name = "slack-bolt" },
|
||||
{ name = "slack-sdk" },
|
||||
{ name = "sounddevice" },
|
||||
{ name = "uvicorn", extra = ["standard"] },
|
||||
]
|
||||
bedrock = [
|
||||
{ name = "boto3" },
|
||||
]
|
||||
cli = [
|
||||
{ name = "simple-term-menu" },
|
||||
]
|
||||
@@ -1918,9 +2047,11 @@ dev = [
|
||||
dingtalk = [
|
||||
{ name = "alibabacloud-dingtalk" },
|
||||
{ name = "dingtalk-stream" },
|
||||
{ name = "qrcode" },
|
||||
]
|
||||
feishu = [
|
||||
{ name = "lark-oapi" },
|
||||
{ name = "qrcode" },
|
||||
]
|
||||
homeassistant = [
|
||||
{ name = "aiohttp" },
|
||||
@@ -1941,6 +2072,7 @@ messaging = [
|
||||
{ name = "aiohttp" },
|
||||
{ name = "discord-py", extra = ["voice"] },
|
||||
{ name = "python-telegram-bot", extra = ["webhooks"] },
|
||||
{ name = "qrcode" },
|
||||
{ name = "slack-bolt" },
|
||||
{ name = "slack-sdk" },
|
||||
]
|
||||
@@ -1950,6 +2082,9 @@ mistral = [
|
||||
modal = [
|
||||
{ name = "modal" },
|
||||
]
|
||||
parsing = [
|
||||
{ name = "markitdown", extra = ["docx", "pdf", "pptx"] },
|
||||
]
|
||||
pty = [
|
||||
{ name = "ptyprocess", marker = "sys_platform != 'win32'" },
|
||||
{ name = "pywinpty", marker = "sys_platform == 'win32'" },
|
||||
@@ -1974,6 +2109,7 @@ termux = [
|
||||
{ name = "honcho-ai" },
|
||||
{ name = "mcp" },
|
||||
{ name = "ptyprocess", marker = "sys_platform != 'win32'" },
|
||||
{ name = "python-telegram-bot", extra = ["webhooks"] },
|
||||
{ name = "pywinpty", marker = "sys_platform == 'win32'" },
|
||||
{ name = "simple-term-menu" },
|
||||
]
|
||||
@@ -1989,6 +2125,10 @@ web = [
|
||||
{ name = "fastapi" },
|
||||
{ name = "uvicorn", extra = ["standard"] },
|
||||
]
|
||||
workspace = [
|
||||
{ name = "chonkie", extra = ["code"] },
|
||||
{ name = "markitdown", extra = ["docx", "pdf", "pptx"] },
|
||||
]
|
||||
yc-bench = [
|
||||
{ name = "yc-bench", marker = "python_full_version >= '3.12'" },
|
||||
]
|
||||
@@ -2003,7 +2143,10 @@ requires-dist = [
|
||||
{ name = "alibabacloud-dingtalk", marker = "extra == 'dingtalk'", specifier = ">=2.0.0" },
|
||||
{ name = "anthropic", specifier = ">=0.39.0,<1" },
|
||||
{ name = "asyncpg", marker = "extra == 'matrix'", specifier = ">=0.29" },
|
||||
{ name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git" },
|
||||
{ name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30" },
|
||||
{ name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.35.0,<2" },
|
||||
{ name = "charset-normalizer", specifier = ">=3.3.0,<4" },
|
||||
{ name = "chonkie", extras = ["code"], marker = "extra == 'workspace'", specifier = ">=1.6.0,<2" },
|
||||
{ name = "croniter", marker = "extra == 'cron'", specifier = ">=6.0.0,<7" },
|
||||
{ name = "daytona", marker = "extra == 'daytona'", specifier = ">=0.148.0,<1" },
|
||||
{ name = "debugpy", marker = "extra == 'dev'", specifier = ">=1.8.0,<2" },
|
||||
@@ -2020,6 +2163,7 @@ requires-dist = [
|
||||
{ name = "firecrawl-py", specifier = ">=4.16.0,<5" },
|
||||
{ name = "hermes-agent", extras = ["acp"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["acp"], marker = "extra == 'termux'" },
|
||||
{ name = "hermes-agent", extras = ["bedrock"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["cli"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["cli"], marker = "extra == 'termux'" },
|
||||
{ name = "hermes-agent", extras = ["cron"], marker = "extra == 'all'" },
|
||||
@@ -2037,6 +2181,7 @@ requires-dist = [
|
||||
{ name = "hermes-agent", extras = ["messaging"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["mistral"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["modal"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["parsing"], marker = "extra == 'workspace'" },
|
||||
{ name = "hermes-agent", extras = ["pty"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["pty"], marker = "extra == 'termux'" },
|
||||
{ name = "hermes-agent", extras = ["slack"], marker = "extra == 'all'" },
|
||||
@@ -2049,6 +2194,7 @@ requires-dist = [
|
||||
{ name = "jinja2", specifier = ">=3.1.5,<4" },
|
||||
{ name = "lark-oapi", marker = "extra == 'feishu'", specifier = ">=1.5.3,<2" },
|
||||
{ name = "markdown", marker = "extra == 'matrix'", specifier = ">=3.6,<4" },
|
||||
{ name = "markitdown", extras = ["pdf", "docx", "pptx"], marker = "extra == 'parsing'", specifier = ">=0.1.0" },
|
||||
{ name = "mautrix", extras = ["encryption"], marker = "extra == 'matrix'", specifier = ">=0.20,<1" },
|
||||
{ name = "mcp", marker = "extra == 'dev'", specifier = ">=1.2.0,<2" },
|
||||
{ name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.2.0,<2" },
|
||||
@@ -2057,6 +2203,7 @@ requires-dist = [
|
||||
{ name = "numpy", marker = "extra == 'voice'", specifier = ">=1.24.0,<3" },
|
||||
{ name = "openai", specifier = ">=2.21.0,<3" },
|
||||
{ name = "parallel-web", specifier = ">=0.4.2,<1" },
|
||||
{ name = "pathspec", specifier = ">=0.12.0,<1" },
|
||||
{ name = "prompt-toolkit", specifier = ">=3.0.52,<4" },
|
||||
{ name = "ptyprocess", marker = "sys_platform != 'win32' and extra == 'pty'", specifier = ">=0.7.0,<1" },
|
||||
{ name = "pydantic", specifier = ">=2.12.5,<3" },
|
||||
@@ -2066,8 +2213,12 @@ requires-dist = [
|
||||
{ name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.0,<4" },
|
||||
{ name = "python-dotenv", specifier = ">=1.2.1,<2" },
|
||||
{ name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'messaging'", specifier = ">=22.6,<23" },
|
||||
{ name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'termux'", specifier = ">=22.6,<23" },
|
||||
{ name = "pywinpty", marker = "sys_platform == 'win32' and extra == 'pty'", specifier = ">=2.0.0,<3" },
|
||||
{ name = "pyyaml", specifier = ">=6.0.2,<7" },
|
||||
{ name = "qrcode", marker = "extra == 'dingtalk'", specifier = ">=7.0,<8" },
|
||||
{ name = "qrcode", marker = "extra == 'feishu'", specifier = ">=7.0,<8" },
|
||||
{ name = "qrcode", marker = "extra == 'messaging'", specifier = ">=7.0,<8" },
|
||||
{ name = "requests", specifier = ">=2.33.0,<3" },
|
||||
{ name = "rich", specifier = ">=14.3.3,<15" },
|
||||
{ name = "simple-term-menu", marker = "extra == 'cli'", specifier = ">=1.0,<2" },
|
||||
@@ -2077,13 +2228,13 @@ requires-dist = [
|
||||
{ name = "slack-sdk", marker = "extra == 'slack'", specifier = ">=3.27.0,<4" },
|
||||
{ name = "sounddevice", marker = "extra == 'voice'", specifier = ">=0.4.6,<1" },
|
||||
{ name = "tenacity", specifier = ">=9.1.4,<10" },
|
||||
{ name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git" },
|
||||
{ name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b" },
|
||||
{ name = "uvicorn", extras = ["standard"], marker = "extra == 'rl'", specifier = ">=0.24.0,<1" },
|
||||
{ name = "uvicorn", extras = ["standard"], marker = "extra == 'web'", specifier = ">=0.24.0,<1" },
|
||||
{ name = "wandb", marker = "extra == 'rl'", specifier = ">=0.15.0,<1" },
|
||||
{ name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git" },
|
||||
{ name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c" },
|
||||
]
|
||||
provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "termux", "dingtalk", "feishu", "web", "rl", "yc-bench", "all"]
|
||||
provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "bedrock", "workspace", "parsing", "termux", "dingtalk", "feishu", "web", "rl", "yc-bench", "all"]
|
||||
|
||||
[[package]]
|
||||
name = "hf-transfer"
|
||||
@@ -2410,6 +2561,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jmespath"
|
||||
version = "1.1.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "joblib"
|
||||
version = "1.5.3"
|
||||
@@ -2624,6 +2784,165 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/78/fd/da11826dda0d332e360b9ead6c0c992d612ecb85b00df494823843cfcda3/litellm-1.81.15-py3-none-any.whl", hash = "sha256:2fa253658702509ce09fe0e172e5a47baaadf697fb0f784c7fd4ff665ae76ae1", size = 14682123, upload-time = "2026-02-24T06:52:48.084Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lxml"
|
||||
version = "6.1.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/28/30/9abc9e34c657c33834eaf6cd02124c61bdf5944d802aa48e69be8da3585d/lxml-6.1.0.tar.gz", hash = "sha256:bfd57d8008c4965709a919c3e9a98f76c2c7cb319086b3d26858250620023b13", size = 4197006, upload-time = "2026-04-18T04:32:51.613Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5e/5d/3bccad330292946f97962df9d5f2d3ae129cce6e212732a781e856b91e07/lxml-6.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cec05be8c876f92a5aa07b01d60bbb4d11cfbdd654cad0561c0d7b5c043a61b9", size = 8526232, upload-time = "2026-04-18T04:27:40.389Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/51/adc8826570a112f83bb4ddb3a2ab510bbc2ccd62c1b9fe1f34fae2d90b57/lxml-6.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9c03e048b6ce8e77b09c734e931584894ecd58d08296804ca2d0b184c933ce50", size = 4595448, upload-time = "2026-04-18T04:27:44.208Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/54/84/5a9ec07cbe1d2334a6465f863b949a520d2699a755738986dcd3b6b89e3f/lxml-6.1.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:942454ff253da14218f972b23dc72fa4edf6c943f37edd19cd697618b626fac5", size = 4923771, upload-time = "2026-04-18T04:32:17.402Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/23/851cfa33b6b38adb628e45ad51fb27105fa34b2b3ba9d1d4aa7a9428dfe0/lxml-6.1.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d036ee7b99d5148072ac7c9b847193decdfeac633db350363f7bce4fff108f0e", size = 5068101, upload-time = "2026-04-18T04:32:21.437Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b0/38/41bf99c2023c6b79916ba057d83e9db21d642f473cac210201222882d38b/lxml-6.1.0-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ae5d8d5427f3cc317e7950f2da7ad276df0cfa37b8de2f5658959e618ea8512", size = 5002573, upload-time = "2026-04-18T04:32:25.373Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/20/053aa10bdc39747e1e923ce2d45413075e84f70a136045bb09e5eaca41d3/lxml-6.1.0-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:363e47283bde87051b821826e71dde47f107e08614e1aa312ba0c5711e77738c", size = 5202816, upload-time = "2026-04-18T04:32:29.393Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/da/bc710fad8bf04b93baee752c192eaa2210cd3a84f969d0be7830fea55802/lxml-6.1.0-cp311-cp311-manylinux_2_28_i686.whl", hash = "sha256:f504d861d9f2a8f94020130adac88d66de93841707a23a86244263d1e54682f5", size = 5329999, upload-time = "2026-04-18T04:32:34.019Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b3/cb/bf035dedbdf7fab49411aa52e4236f3445e98d38647d85419e6c0d2806b9/lxml-6.1.0-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:23a5dc68e08ed13331d61815c08f260f46b4a60fdd1640bbeb82cf89a9d90289", size = 4659643, upload-time = "2026-04-18T04:32:37.932Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5c/4f/22be31f33727a5e4c7b01b0a874503026e50329b259d3587e0b923cf964b/lxml-6.1.0-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f15401d8d3dbf239e23c818afc10c7207f7b95f9a307e092122b6f86dd43209a", size = 5265963, upload-time = "2026-04-18T04:32:41.881Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c8/2b/d44d0e5c79226017f4ab8c87a802ebe4f89f97e6585a8e4166dffcdd7b6e/lxml-6.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fcf3da95e93349e0647d48d4b36a12783105bcc74cb0c416952f9988410846a3", size = 5045444, upload-time = "2026-04-18T04:32:44.512Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d3/c3/3f034fec1594c331a6dbf9491238fdcc9d66f68cc529e109ec75b97197e1/lxml-6.1.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:0d082495c5fcf426e425a6e28daaba1fcb6d8f854a4ff01effb1f1f381203eb9", size = 4712703, upload-time = "2026-04-18T04:32:47.16Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/12/16/0b83fccc158218aca75a7aa33e97441df737950734246b9fffa39301603d/lxml-6.1.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:e3c4f84b24a1fcba435157d111c4b755099c6ff00a3daee1ad281817de75ed11", size = 5252745, upload-time = "2026-04-18T04:32:50.427Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dd/ee/12e6c1b39a77666c02eaa77f94a870aaf63c4ac3a497b2d52319448b01c6/lxml-6.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:976a6b39b1b13e8c354ad8d3f261f3a4ac6609518af91bdb5094760a08f132c4", size = 5226822, upload-time = "2026-04-18T04:32:53.437Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/20/c7852904858b4723af01d2fc14b5d38ff57cb92f01934a127ebd9a9e51aa/lxml-6.1.0-cp311-cp311-win32.whl", hash = "sha256:857efde87d365706590847b916baff69c0bc9252dc5af030e378c9800c0b10e3", size = 3594026, upload-time = "2026-04-18T04:27:31.903Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/02/05/d60c732b56da5085175c07c74b2df4e6d181b0c9a61e1691474f06ef4b39/lxml-6.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:183bfb45a493081943be7ea2b5adfc2b611e1cf377cefa8b8a8be404f45ef9a7", size = 4025114, upload-time = "2026-04-18T04:27:34.077Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/df/c84dcc175fd690823436d15b41cb920cd5ba5e14cd8bfb00949d5903b320/lxml-6.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:19f4164243fc206d12ed3d866e80e74f5bc3627966520da1a5f97e42c32a3f39", size = 3667742, upload-time = "2026-04-18T04:27:38.45Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d2/d4/9326838b59dc36dfae42eec9656b97520f9997eee1de47b8316aaeed169c/lxml-6.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d2f17a16cd8751e8eb233a7e41aecdf8e511712e00088bf9be455f604cd0d28d", size = 8570663, upload-time = "2026-04-18T04:27:48.253Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d8/a4/053745ce1f8303ccbb788b86c0db3a91b973675cefc42566a188637b7c40/lxml-6.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f0cea5b1d3e6e77d71bd2b9972eb2446221a69dc52bb0b9c3c6f6e5700592d93", size = 4624024, upload-time = "2026-04-18T04:27:52.594Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/90/97/a517944b20f8fd0932ad2109482bee4e29fe721416387a363306667941f6/lxml-6.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc46da94826188ed45cb53bd8e3fc076ae22675aea2087843d4735627f867c6d", size = 4930895, upload-time = "2026-04-18T04:32:56.29Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/94/7c/e08a970727d556caa040a44773c7b7e3ad0f0d73dedc863543e9a8b931f2/lxml-6.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9147d8e386ec3b82c3b15d88927f734f565b0aaadef7def562b853adca45784a", size = 5093820, upload-time = "2026-04-18T04:32:58.94Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/88/ee/2a5c2aa2c32016a226ca25d3e1056a8102ea6e1fe308bf50213586635400/lxml-6.1.0-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5715e0e28736a070f3f34a7ccc09e2fdcba0e3060abbcf61a1a5718ff6d6b105", size = 5005790, upload-time = "2026-04-18T04:33:01.272Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/38/a0db9be8f38ad6043ab9429487c128dd1d30f07956ef43040402f8da49e8/lxml-6.1.0-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4937460dc5df0cdd2f06a86c285c28afda06aefa3af949f9477d3e8df430c485", size = 5630827, upload-time = "2026-04-18T04:33:04.036Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/31/ba/3c13d3fc24b7cacf675f808a3a1baabf43a30d0cd24c98f94548e9aa58eb/lxml-6.1.0-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bc783ee3147e60a25aa0445ea82b3e8aabb83b240f2b95d32cb75587ff781814", size = 5240445, upload-time = "2026-04-18T04:33:06.87Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/55/ba/eeef4ccba09b2212fe239f46c1692a98db1878e0872ae320756488878a94/lxml-6.1.0-cp312-cp312-manylinux_2_28_i686.whl", hash = "sha256:40d9189f80075f2e1f88db21ef815a2b17b28adf8e50aaf5c789bfe737027f32", size = 5350121, upload-time = "2026-04-18T04:33:09.365Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7e/01/1da87c7b587c38d0cbe77a01aae3b9c1c49ed47d76918ef3db8fc151b1ca/lxml-6.1.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:05b9b8787e35bec69e68daf4952b2e6dfcfb0db7ecf1a06f8cdfbbac4eb71aad", size = 4694949, upload-time = "2026-04-18T04:33:11.628Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/88/7db0fe66d5aaf128443ee1623dec3db1576f3e4c17751ec0ef5866468590/lxml-6.1.0-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0f0f08beb0182e3e9a86fae124b3c47a7b41b7b69b225e1377db983802404e54", size = 5243901, upload-time = "2026-04-18T04:33:13.95Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/a8/1346726af7d1f6fca1f11223ba34001462b0a3660416986d37641708d57c/lxml-6.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73becf6d8c81d4c76b1014dbd3584cb26d904492dcf73ca85dc8bff08dcd6d2d", size = 5048054, upload-time = "2026-04-18T04:33:16.965Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2e/b7/85057012f035d1a0c87e02f8c723ca3c3e6e0728bcf4cb62080b21b1c1e3/lxml-6.1.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:1ae225f66e5938f4fa29d37e009a3bb3b13032ac57eb4eb42afa44f6e4054e69", size = 4777324, upload-time = "2026-04-18T04:33:19.832Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/75/6c/ad2f94a91073ef570f33718040e8e160d5fb93331cf1ab3ca1323f939e2d/lxml-6.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:690022c7fae793b0489aa68a658822cea83e0d5933781811cabbf5ea3bcfe73d", size = 5645702, upload-time = "2026-04-18T04:33:22.436Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/89/0bb6c0bd549c19004c60eea9dc554dd78fd647b72314ef25d460e0d208c6/lxml-6.1.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:63aeafc26aac0be8aff14af7871249e87ea1319be92090bfd632ec68e03b16a5", size = 5232901, upload-time = "2026-04-18T04:33:26.21Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/d9/d609a11fb567da9399f525193e2b49847b5a409cdebe737f06a8b7126bdc/lxml-6.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:264c605ab9c0e4aa1a679636f4582c4d3313700009fac3ec9c3412ed0d8f3e1d", size = 5261333, upload-time = "2026-04-18T04:33:28.984Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/3a/ac3f99ec8ac93089e7dd556f279e0d14c24de0a74a507e143a2e4b496e7c/lxml-6.1.0-cp312-cp312-win32.whl", hash = "sha256:56971379bc5ee8037c5a0f09fa88f66cdb7d37c3e38af3e45cf539f41131ac1f", size = 3596289, upload-time = "2026-04-18T04:27:42.819Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/a7/0a915557538593cb1bbeedcd40e13c7a261822c26fecbbdb71dad0c2f540/lxml-6.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:bba078de0031c219e5dd06cf3e6bf8fb8e6e64a77819b358f53bb132e3e03366", size = 3997059, upload-time = "2026-04-18T04:27:46.764Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/92/96/a5dc078cf0126fbfbc35611d77ecd5da80054b5893e28fb213a5613b9e1d/lxml-6.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:c3592631e652afa34999a088f98ba7dfc7d6aff0d535c410bea77a71743f3819", size = 3659552, upload-time = "2026-04-18T04:27:51.133Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/08/03/69347590f1cf4a6d5a4944bb6099e6d37f334784f16062234e1f892fdb1d/lxml-6.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a0092f2b107b69601adf562a57c956fbb596e05e3e6651cabd3054113b007e45", size = 8559689, upload-time = "2026-04-18T04:31:57.785Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3f/58/25e00bb40b185c974cfe156c110474d9a8a8390d5f7c92a4e328189bb60e/lxml-6.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fc7140d7a7386e6b545d41b7358f4d02b656d4053f5fa6859f92f4b9c2572c4d", size = 4617892, upload-time = "2026-04-18T04:32:01.78Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f5/54/92ad98a94ac318dc4f97aaac22ff8d1b94212b2ae8af5b6e9b354bf825f7/lxml-6.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:419c58fc92cc3a2c3fa5f78c63dbf5da70c1fa9c1b25f25727ecee89a96c7de2", size = 4923489, upload-time = "2026-04-18T04:33:31.401Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/15/3b/a20aecfab42bdf4f9b390590d345857ad3ffd7c51988d1c89c53a0c73faf/lxml-6.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:37fabd1452852636cf38ecdcc9dd5ca4bba7a35d6c53fa09725deeb894a87491", size = 5082162, upload-time = "2026-04-18T04:33:34.262Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/45/26/2cdb3d281ac1bd175603e290cbe4bad6eff127c0f8de90bafd6f8548f0fd/lxml-6.1.0-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2853c8b2170cc6cd54a6b4d50d2c1a8a7aeca201f23804b4898525c7a152cfc", size = 4993247, upload-time = "2026-04-18T04:33:36.674Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f6/05/d735aef963740022a08185c84821f689fc903acb3d50326e6b1e9886cc22/lxml-6.1.0-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8e369cbd690e788c8d15e56222d91a09c6a417f49cbc543040cba0fe2e25a79e", size = 5613042, upload-time = "2026-04-18T04:33:39.205Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ee/b8/ead7c10efff731738c72e59ed6eb5791854879fbed7ae98781a12006263a/lxml-6.1.0-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e69aa6805905807186eb00e66c6d97a935c928275182eb02ee40ba00da9623b2", size = 5228304, upload-time = "2026-04-18T04:33:41.647Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6b/10/e9842d2ec322ea65f0a7270aa0315a53abed06058b88ef1b027f620e7a5f/lxml-6.1.0-cp313-cp313-manylinux_2_28_i686.whl", hash = "sha256:4bd1bdb8a9e0e2dd229de19b5f8aebac80e916921b4b2c6ef8a52bc131d0c1f9", size = 5341578, upload-time = "2026-04-18T04:33:44.596Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/89/54/40d9403d7c2775fa7301d3ddd3464689bfe9ba71acc17dfff777071b4fdc/lxml-6.1.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:cbd7b79cdcb4986ad78a2662625882747f09db5e4cd7b2ae178a88c9c51b3dfe", size = 4700209, upload-time = "2026-04-18T04:33:47.552Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/85/b2/bbdcc2cf45dfc7dfffef4fd97e5c47b15919b6a365247d95d6f684ef5e82/lxml-6.1.0-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:43e4d297f11080ec9d64a4b1ad7ac02b4484c9f0e2179d9c4ef78e886e747b88", size = 5232365, upload-time = "2026-04-18T04:33:50.249Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/48/5a/b06875665e53aaba7127611a7bed3b7b9658e20b22bc2dd217a0b7ab0091/lxml-6.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cc16682cc987a3da00aa56a3aa3075b08edb10d9b1e476938cfdbee8f3b67181", size = 5043654, upload-time = "2026-04-18T04:33:52.71Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/9c/e71a069d09641c1a7abeb30e693f828c7c90a41cbe3d650b2d734d876f85/lxml-6.1.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:d6d8efe71429635f0559579092bb5e60560d7b9115ee38c4adbea35632e7fa24", size = 4769326, upload-time = "2026-04-18T04:33:55.244Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cc/06/7a9cd84b3d4ed79adf35f874750abb697dec0b4a81a836037b36e47c091a/lxml-6.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7e39ab3a28af7784e206d8606ec0e4bcad0190f63a492bca95e94e5a4aef7f6e", size = 5635879, upload-time = "2026-04-18T04:33:58.509Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cc/f0/9d57916befc1e54c451712c7ee48e9e74e80ae4d03bdce49914e0aee42cd/lxml-6.1.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:9eb667bf50856c4a58145f8ca2d5e5be160191e79eb9e30855a476191b3c3495", size = 5224048, upload-time = "2026-04-18T04:34:00.943Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/99/75/90c4eefda0c08c92221fe0753db2d6699a4c628f76ff4465ec20dea84cc1/lxml-6.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7f4a77d6f7edf9230cee3e1f7f6764722a41604ee5681844f18db9a81ea0ec33", size = 5250241, upload-time = "2026-04-18T04:34:03.365Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5e/73/16596f7e4e38fa33084b9ccbccc22a15f82a290a055126f2c1541236d2ff/lxml-6.1.0-cp313-cp313-win32.whl", hash = "sha256:28902146ffbe5222df411c5d19e5352490122e14447e98cd118907ee3fd6ee62", size = 3596938, upload-time = "2026-04-18T04:31:56.206Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8e/63/981401c5680c1eb30893f00a19641ac80db5d1e7086c62cb4b13ed813038/lxml-6.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:4a1503c56e4e2b38dc76f2f2da7bae69670c0f1933e27cfa34b2fa5876410b16", size = 3995728, upload-time = "2026-04-18T04:31:58.763Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e7/e8/c358a38ac3e541d16a1b527e4e9cb78c0419b0506a070ace11777e5e8404/lxml-6.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:e0af85773850417d994d019741239b901b22c6680206f46a34766926e466141d", size = 3658372, upload-time = "2026-04-18T04:32:03.629Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/eb/45/cee4cf203ef0bab5c52afc118da61d6b460c928f2893d40023cfa27e0b80/lxml-6.1.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:ab863fd37458fed6456525f297d21239d987800c46e67da5ef04fc6b3dd93ac8", size = 8576713, upload-time = "2026-04-18T04:32:06.831Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8a/a7/eda05babeb7e046839204eaf254cd4d7c9130ce2bbf0d9e90ea41af5654d/lxml-6.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:6fd8b1df8254ff4fd93fd31da1fc15770bde23ac045be9bb1f87425702f61cc9", size = 4623874, upload-time = "2026-04-18T04:32:10.755Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e7/e9/db5846de9b436b91890a62f29d80cd849ea17948a49bf532d5278ee69a9e/lxml-6.1.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:47024feaae386a92a146af0d2aeed65229bf6fff738e6a11dda6b0015fb8fd03", size = 4949535, upload-time = "2026-04-18T04:34:06.657Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/ba/0d3593373dcae1d68f40dc3c41a5a92f2544e68115eb2f62319a4c2a6500/lxml-6.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3f00972f84450204cd5d93a5395965e348956aaceaadec693a22ec743f8ae3eb", size = 5086881, upload-time = "2026-04-18T04:34:09.556Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/43/76/759a7484539ad1af0d125a9afe9c3fb5f82a8779fd1f5f56319d9e4ea2fd/lxml-6.1.0-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97faa0860e13b05b15a51fb4986421ef7a30f0b3334061c416e0981e9450ca4c", size = 5031305, upload-time = "2026-04-18T04:34:12.336Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/b9/c1f0daf981a11e47636126901fd4ab82429e18c57aeb0fc3ad2940b42d8b/lxml-6.1.0-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:972a6451204798675407beaad97b868d0c733d9a74dafefc63120b81b8c2de28", size = 5647522, upload-time = "2026-04-18T04:34:14.89Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/31/e6/1f533dcd205275363d9ba3511bcec52fa2df86abf8abe6a5f2c599f0dc31/lxml-6.1.0-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fe022f20bc4569ec66b63b3fb275a3d628d9d32da6326b2982584104db6d3086", size = 5239310, upload-time = "2026-04-18T04:34:17.652Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c3/8c/4175fb709c78a6e315ed814ed33be3defd8b8721067e70419a6cf6f971da/lxml-6.1.0-cp314-cp314-manylinux_2_28_i686.whl", hash = "sha256:75c4c7c619a744f972f4451bf5adf6d0fb00992a1ffc9fd78e13b0bc817cc99f", size = 5350799, upload-time = "2026-04-18T04:34:20.529Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fd/77/6ffdebc5994975f0dde4acb59761902bd9d9bb84422b9a0bd239a7da9ca8/lxml-6.1.0-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:3648f20d25102a22b6061c688beb3a805099ea4beb0a01ce62975d926944d292", size = 4697693, upload-time = "2026-04-18T04:34:23.541Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/f1/565f36bd5c73294602d48e04d23f81ff4c8736be6ba5e1d1ec670ac9be80/lxml-6.1.0-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:77b9f99b17cbf14026d1e618035077060fc7195dd940d025149f3e2e830fbfcb", size = 5250708, upload-time = "2026-04-18T04:34:26.001Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/11/a68ab9dd18c5c499404deb4005f4bc4e0e88e5b72cd755ad96efec81d18d/lxml-6.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:32662519149fd7a9db354175aa5e417d83485a8039b8aaa62f873ceee7ea4cad", size = 5084737, upload-time = "2026-04-18T04:34:28.32Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ab/78/e8f41e2c74f4af564e6a0348aea69fb6daaefa64bc071ef469823d22cc18/lxml-6.1.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:73d658216fc173cf2c939e90e07b941c5e12736b0bf6a99e7af95459cfe8eabb", size = 4737817, upload-time = "2026-04-18T04:34:30.784Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/06/2d/aa4e117aa2ce2f3b35d9ff246be74a2f8e853baba5d2a92c64744474603a/lxml-6.1.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ac4db068889f8772a4a698c5980ec302771bb545e10c4b095d4c8be26749616f", size = 5670753, upload-time = "2026-04-18T04:34:33.675Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/08/f5/dd745d50c0409031dbfcc4881740542a01e54d6f0110bd420fa7782110b8/lxml-6.1.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:45e9dfbd1b661eb64ba0d4dbe762bd210c42d86dd1e5bd2bdf89d634231beb43", size = 5238071, upload-time = "2026-04-18T04:34:36.12Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3e/74/ad424f36d0340a904665867dab310a3f1f4c96ff4039698de83b77f44c1f/lxml-6.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:89e8d73d09ac696a5ba42ec69787913d53284f12092f651506779314f10ba585", size = 5264319, upload-time = "2026-04-18T04:34:39.035Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/53/36/a15d8b3514ec889bfd6aa3609107fcb6c9189f8dc347f1c0b81eded8d87c/lxml-6.1.0-cp314-cp314-win32.whl", hash = "sha256:ebe33f4ec1b2de38ceb225a1749a2965855bffeef435ba93cd2d5d540783bf2f", size = 3657139, upload-time = "2026-04-18T04:32:20.006Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/a4/263ebb0710851a3c6c937180a9a86df1206fdfe53cc43005aa2237fd7736/lxml-6.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:398443df51c538bd578529aa7e5f7afc6c292644174b47961f3bf87fe5741120", size = 4064195, upload-time = "2026-04-18T04:32:23.876Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/80/68/2000f29d323b6c286de077ad20b429fc52272e44eae6d295467043e56012/lxml-6.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:8c8984e1d8c4b3949e419158fda14d921ff703a9ed8a47236c6eb7a2b6cb4946", size = 3741870, upload-time = "2026-04-18T04:32:27.922Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/30/e9/21383c7c8d43799f0da90224c0d7c921870d476ec9b3e01e1b2c0b8237c5/lxml-6.1.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:1081dd10bc6fa437db2500e13993abf7cc30716d0a2f40e65abb935f02ec559c", size = 8827548, upload-time = "2026-04-18T04:32:15.094Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a5/01/c6bc11cd587030dd4f719f65c5657960649fe3e19196c844c75bf32cd0d6/lxml-6.1.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:dabecc48db5f42ba348d1f5d5afdc54c6c4cc758e676926c7cd327045749517d", size = 4735866, upload-time = "2026-04-18T04:32:18.924Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f3/01/757132fff5f4acf25463b5298f1a46099f3a94480b806547b29ce5e385de/lxml-6.1.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e3dd5fe19c9e0ac818a9c7f132a5e43c1339ec1cbbfecb1a938bd3a47875b7c9", size = 4969476, upload-time = "2026-04-18T04:34:41.889Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fd/fb/1bc8b9d27ed64be7c8903db6c89e74dc8c2cd9ec630a7462e4654316dc5b/lxml-6.1.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9e7b0a4ca6dcc007a4cef00a761bba2dea959de4bd2df98f926b33c92ca5dfb9", size = 5103719, upload-time = "2026-04-18T04:34:44.797Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/e7/5bf82fa28133536a54601aae633b14988e89ed61d4c1eb6b899b023233aa/lxml-6.1.0-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d27bbe326c6b539c64b42638b18bc6003a8d88f76213a97ac9ed4f885efeab7", size = 5027890, upload-time = "2026-04-18T04:34:47.634Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2d/20/e048db5d4b4ea0366648aa595f26bb764b2670903fc585b87436d0a5032c/lxml-6.1.0-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4e425db0c5445ef0ad56b0eec54f89b88b2d884656e536a90b2f52aecb4ca86", size = 5596008, upload-time = "2026-04-18T04:34:51.503Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/c2/d10807bc8da4824b39e5bd01b5d05c077b6fd01bd91584167edf6b269d22/lxml-6.1.0-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4b89b098105b8599dc57adac95d1813409ac476d3c948a498775d3d0c6124bfb", size = 5224451, upload-time = "2026-04-18T04:34:54.263Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3c/15/2ebea45bea427e7f0057e9ce7b2d62c5aba20c6b001cca89ed0aadb3ad41/lxml-6.1.0-cp314-cp314t-manylinux_2_28_i686.whl", hash = "sha256:c4a699432846df86cc3de502ee85f445ebad748a1c6021d445f3e514d2cd4b1c", size = 5312135, upload-time = "2026-04-18T04:34:56.818Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/31/e2/87eeae151b0be2a308d49a7ec444ff3eb192b14251e62addb29d0bf3778f/lxml-6.1.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:30e7b2ed63b6c8e97cca8af048589a788ab5c9c905f36d9cf1c2bb549f450d2f", size = 4639126, upload-time = "2026-04-18T04:34:59.704Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a3/51/8a3f6a20902ad604dd746ec7b4000311b240d389dac5e9d95adefd349e0c/lxml-6.1.0-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:022981127642fe19866d2907d76241bb07ed21749601f727d5d5dd1ce5d1b773", size = 5232579, upload-time = "2026-04-18T04:35:02.658Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6d/d2/650d619bdbe048d2c3f2c31edb00e35670a5e2d65b4fe3b61bce37b19121/lxml-6.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:23cad0cc86046d4222f7f418910e46b89971c5a45d3c8abfad0f64b7b05e4a9b", size = 5084206, upload-time = "2026-04-18T04:35:05.175Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dd/8a/672ca1a3cbeabd1f511ca275a916c0514b747f4b85bdaae103b8fa92f307/lxml-6.1.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:21c3302068f50d1e8728c67c87ba92aa87043abee517aa2576cca1855326b405", size = 4758906, upload-time = "2026-04-18T04:35:08.098Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/be/f1/ef4b691da85c916cb2feb1eec7414f678162798ac85e042fa164419ac05c/lxml-6.1.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:be10838781cb3be19251e276910cd508fe127e27c3242e50521521a0f3781690", size = 5620553, upload-time = "2026-04-18T04:35:11.23Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/59/17/94e81def74107809755ac2782fdad4404420f1c92ca83433d117a6d5acf0/lxml-6.1.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:2173a7bffe97667bbf0767f8a99e587740a8c56fdf3befac4b09cb29a80276fd", size = 5229458, upload-time = "2026-04-18T04:35:14.254Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/21/55/c4be91b0f830a871fc1b0d730943d56013b683d4671d5198260e2eae722b/lxml-6.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c6854e9cf99c84beb004eecd7d3a3868ef1109bf2b1df92d7bc11e96a36c2180", size = 5247861, upload-time = "2026-04-18T04:35:17.006Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/ca/77123e4d77df3cb1e968ade7b1f808f5d3a5c1c96b18a33895397de292c1/lxml-6.1.0-cp314-cp314t-win32.whl", hash = "sha256:00750d63ef0031a05331b9223463b1c7c02b9004cef2346a5b2877f0f9494dd2", size = 3897377, upload-time = "2026-04-18T04:32:07.656Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/64/ce/3554833989d074267c063209bae8b09815e5656456a2d332b947806b05ff/lxml-6.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:80410c3a7e3c617af04de17caa9f9f20adaa817093293d69eae7d7d0522836f5", size = 4392701, upload-time = "2026-04-18T04:32:12.113Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2b/a0/9b916c68c0e57752c07f8f64b30138d9d4059dbeb27b90274dedbea128ff/lxml-6.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:26dd9f57ee3bd41e7d35b4c98a2ffd89ed11591649f421f0ec19f67d50ec67ac", size = 3817120, upload-time = "2026-04-18T04:32:15.803Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/88/55143966481409b1740a3ac669e611055f49efd68087a5ce41582325db3e/lxml-6.1.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:546b66c0dd1bb8d9fa89d7123e5fa19a8aff3a1f2141eb22df96112afb17b842", size = 3930134, upload-time = "2026-04-18T04:32:35.008Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/97/28b985c2983938d3cb696dd5501423afb90a8c3e869ef5d3c62569282c0f/lxml-6.1.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5cfa1a34df366d9dc0d5eaf420f4cf2bb1e1bebe1066d1c2fc28c179f8a4004c", size = 4210749, upload-time = "2026-04-18T04:36:03.626Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/29/67/dfab2b7d58214921935ccea7ce9b3df9b7d46f305d12f0f532ac7cf6b804/lxml-6.1.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:db88156fcf544cdbf0d95588051515cfdfd4c876fc66444eb98bceb5d6db76de", size = 4318463, upload-time = "2026-04-18T04:36:06.309Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/a2/4ac7eb32a4d997dd352c32c32399aae27b3f268d440e6f9cfa405b575d2f/lxml-6.1.0-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:07f98f5496f96bf724b1e3c933c107f0cbf2745db18c03d2e13a291c3afd2635", size = 4251124, upload-time = "2026-04-18T04:36:09.056Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/33/ef/d6abd850bb4822f9b720cfe36b547a558e694881010ff7d012191e8769c6/lxml-6.1.0-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4642e04449a1e164b5ff71ffd901ddb772dfabf5c9adf1b7be5dffe1212bc037", size = 4401758, upload-time = "2026-04-18T04:36:11.803Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/40/44/3ee09a5b60cb44c4f2fbc1c9015cfd6ff5afc08f991cab295d3024dcbf2d/lxml-6.1.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:7da13bb6fbadfafb474e0226a30570a3445cfd47c86296f2446dafbd77079ace", size = 3508860, upload-time = "2026-04-18T04:32:48.619Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "magika"
|
||||
version = "0.6.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.14' and sys_platform == 'win32'",
|
||||
"python_full_version == '3.13.*' and sys_platform == 'win32'",
|
||||
"python_full_version == '3.12.*' and sys_platform == 'win32'",
|
||||
"python_full_version < '3.12' and sys_platform == 'win32'",
|
||||
]
|
||||
dependencies = [
|
||||
{ name = "click", marker = "sys_platform == 'win32'" },
|
||||
{ name = "numpy", marker = "sys_platform == 'win32'" },
|
||||
{ name = "onnxruntime", marker = "sys_platform == 'win32'" },
|
||||
{ name = "python-dotenv", marker = "sys_platform == 'win32'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fe/b6/8fdd991142ad3e037179a494b153f463024e5a211ef3ad948b955c26b4de/magika-0.6.2.tar.gz", hash = "sha256:37eb6ae8020f6e68f231bc06052c0a0cbe8e6fa27492db345e8dc867dbceb067", size = 3036634, upload-time = "2025-05-02T14:54:18.88Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/07/4f7748f34279f2852068256992377474f9700b6fbad6735d6be58605178f/magika-0.6.2-py3-none-any.whl", hash = "sha256:5ef72fbc07723029b3684ef81454bc224ac5f60986aa0fc5a28f4456eebcb5b2", size = 2967609, upload-time = "2025-05-02T14:54:09.696Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b0/1f/28e412d0ccedc068fbccdae6a6233faaa97ec3e5e2ffd242e49655b10064/magika-0.6.2-py3-none-win_amd64.whl", hash = "sha256:711f427a633e0182737dcc2074748004842f870643585813503ff2553b973b9f", size = 12385740, upload-time = "2025-05-02T14:54:14.096Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "magika"
|
||||
version = "0.6.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.14' and sys_platform != 'win32'",
|
||||
"python_full_version == '3.13.*' and sys_platform != 'win32'",
|
||||
"python_full_version == '3.12.*' and sys_platform != 'win32'",
|
||||
"python_full_version < '3.12' and sys_platform != 'win32'",
|
||||
]
|
||||
dependencies = [
|
||||
{ name = "click", marker = "sys_platform != 'win32'" },
|
||||
{ name = "numpy", marker = "sys_platform != 'win32'" },
|
||||
{ name = "onnxruntime", marker = "sys_platform != 'win32'" },
|
||||
{ name = "python-dotenv", marker = "sys_platform != 'win32'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a3/f3/3d1dcdd7b9c41d589f5cff252d32ed91cdf86ba84391cfc81d9d8773571d/magika-0.6.3.tar.gz", hash = "sha256:7cc52aa7359af861957043e2bf7265ed4741067251c104532765cd668c0c0cb1", size = 3042784, upload-time = "2025-10-30T15:22:34.499Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a2/e4/35c323beb3280482c94299d61626116856ac2d4ec16ecef50afc4fdd4291/magika-0.6.3-py3-none-any.whl", hash = "sha256:eda443d08006ee495e02083b32e51b98cb3696ab595a7d13900d8e2ef506ec9d", size = 2969474, upload-time = "2025-10-30T15:22:25.298Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/25/8f/132b0d7cd51c02c39fd52658a5896276c30c8cc2fd453270b19db8c40f7e/magika-0.6.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:86901e64b05dde5faff408c9b8245495b2e1fd4c226e3393d3d2a3fee65c504b", size = 13358841, upload-time = "2025-10-30T15:22:27.413Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/03/5ed859be502903a68b7b393b17ae0283bf34195cfcca79ce2dc25b9290e7/magika-0.6.3-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:3d9661eedbdf445ac9567e97e7ceefb93545d77a6a32858139ea966b5806fb64", size = 15367335, upload-time = "2025-10-30T15:22:29.907Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mammoth"
|
||||
version = "1.11.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "cobble" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ed/3c/a58418d2af00f2da60d4a51e18cd0311307b72d48d2fffec36a97b4a5e44/mammoth-1.11.0.tar.gz", hash = "sha256:a0f59e442f34d5b6447f4b0999306cbf3e67aaabfa8cb516f878fb1456744637", size = 53142, upload-time = "2025-09-19T10:35:20.373Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ca/54/2e39566a131b13f6d8d193f974cb6a34e81bb7cc2fa6f7e03de067b36588/mammoth-1.11.0-py2.py3-none-any.whl", hash = "sha256:c077ab0d450bd7c0c6ecd529a23bf7e0fa8190c929e28998308ff4eada3f063b", size = 54752, upload-time = "2025-09-19T10:35:18.699Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markdown"
|
||||
version = "3.10.2"
|
||||
@@ -2645,6 +2964,50 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markdownify"
|
||||
version = "1.2.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "six" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/3f/bc/c8c8eea5335341306b0fa7e1cb33c5e1c8d24ef70ddd684da65f41c49c92/markdownify-1.2.2.tar.gz", hash = "sha256:b274f1b5943180b031b699b199cbaeb1e2ac938b75851849a31fd0c3d6603d09", size = 18816, upload-time = "2025-11-16T19:21:18.565Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/43/ce/f1e3e9d959db134cedf06825fae8d5b294bd368aacdd0831a3975b7c4d55/markdownify-1.2.2-py3-none-any.whl", hash = "sha256:3f02d3cc52714084d6e589f70397b6fc9f2f3a8531481bf35e8cc39f975e186a", size = 15724, upload-time = "2025-11-16T19:21:17.622Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markitdown"
|
||||
version = "0.1.5"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "charset-normalizer" },
|
||||
{ name = "defusedxml" },
|
||||
{ name = "magika", version = "0.6.2", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'win32'" },
|
||||
{ name = "magika", version = "0.6.3", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'win32'" },
|
||||
{ name = "markdownify" },
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/83/93/3b93c291c99d09f64f7535ba74c1c6a3507cf49cffd38983a55de6f834b6/markitdown-0.1.5.tar.gz", hash = "sha256:4c956ff1528bf15e1814542035ec96e989206d19d311bb799f4df973ecafc31a", size = 45099, upload-time = "2026-02-20T19:45:23.886Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b1/8b/fd7e042455a829a1ede0bc8e9e3061aa6c7c4cf745385526ef62ff1b5a5b/markitdown-0.1.5-py3-none-any.whl", hash = "sha256:5180a9a841e20fc01c2c09dbc5d039638429bbebcdc2af1b2615c3c427840434", size = 63402, upload-time = "2026-02-20T19:45:27.195Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
docx = [
|
||||
{ name = "lxml" },
|
||||
{ name = "mammoth" },
|
||||
]
|
||||
pdf = [
|
||||
{ name = "pdfminer-six" },
|
||||
{ name = "pdfplumber" },
|
||||
]
|
||||
pptx = [
|
||||
{ name = "python-pptx" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markupsafe"
|
||||
version = "3.0.3"
|
||||
@@ -3536,6 +3899,42 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a0/3e/2218fa29637781b8e7ac35a928108ff2614ddd40879389d3af2caa725af5/parallel_web-0.4.2-py3-none-any.whl", hash = "sha256:aa3a4a9aecc08972c5ce9303271d4917903373dff4dd277d9a3e30f9cff53346", size = 144012, upload-time = "2026-03-09T22:24:33.979Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pathspec"
|
||||
version = "0.12.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pdfminer-six"
|
||||
version = "20251230"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "charset-normalizer" },
|
||||
{ name = "cryptography" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/46/9a/d79d8fa6d47a0338846bb558b39b9963b8eb2dfedec61867c138c1b17eeb/pdfminer_six-20251230.tar.gz", hash = "sha256:e8f68a14c57e00c2d7276d26519ea64be1b48f91db1cdc776faa80528ca06c1e", size = 8511285, upload-time = "2025-12-30T15:49:13.104Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/65/d7/b288ea32deb752a09aab73c75e1e7572ab2a2b56c3124a5d1eb24c62ceb3/pdfminer_six-20251230-py3-none-any.whl", hash = "sha256:9ff2e3466a7dfc6de6fd779478850b6b7c2d9e9405aa2a5869376a822771f485", size = 6591909, upload-time = "2025-12-30T15:49:10.76Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pdfplumber"
|
||||
version = "0.11.9"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pdfminer-six" },
|
||||
{ name = "pillow" },
|
||||
{ name = "pypdfium2" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/38/37/9ca3519e92a8434eb93be570b131476cc0a4e840bb39c62ddb7813a39d53/pdfplumber-0.11.9.tar.gz", hash = "sha256:481224b678b2bbdbf376e2c39bf914144eef7c3d301b4a28eebf0f7f6109d6dc", size = 102768, upload-time = "2026-01-05T08:10:29.072Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/8b/c8/cdbc975f5b634e249cfa6597e37c50f3078412474f21c015e508bfbfe3c3/pdfplumber-0.11.9-py3-none-any.whl", hash = "sha256:33ec5580959ba524e9100138746e090879504c42955df1b8a997604dd326c443", size = 60045, upload-time = "2026-01-05T08:10:27.512Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pillow"
|
||||
version = "12.1.1"
|
||||
@@ -4109,6 +4508,44 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pypdfium2"
|
||||
version = "5.7.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/0f/76/19aacfff78d328a700ca34b5b1dff891e587aac2fd6b928b035ed366cc37/pypdfium2-5.7.0.tar.gz", hash = "sha256:9febb09f532555485f064c1f6442f46d31e27be5981359cb06b5826695906a06", size = 265935, upload-time = "2026-04-08T19:58:16.831Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/81/a5/7e6d9532e7753a1dc439412b38dda5943c692d3ab3f1e01826f9b5527c67/pypdfium2-5.7.0-py3-none-android_23_arm64_v8a.whl", hash = "sha256:9e815e75498a03a3049baf68ff00b90459bead0d9eee65b1860142529faba81d", size = 3343748, upload-time = "2026-04-08T19:57:40.293Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d3/ea/9d4a0b41f86d342dfb6529c31789e70d1123cc6521b29979e02ec2b267b6/pypdfium2-5.7.0-py3-none-android_23_armeabi_v7a.whl", hash = "sha256:405bb3c6d0e7a5a32e98eb45a3343da1ad847d6d6eef77bf6f285652a250e0b7", size = 2805480, upload-time = "2026-04-08T19:57:42.109Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/dc/ce1c8e94082a84d1669606f90c4f694acbdcabd359d92db7302d16b5938b/pypdfium2-5.7.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:609b34d91871c185f399b1a503513c03a9de83597f55404de00c3d31a8037544", size = 3420156, upload-time = "2026-04-08T19:57:43.672Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/51/84/6d859ce82a3723ba7cd70d88ad87eca3cb40553c68db182976fd2b0febe1/pypdfium2-5.7.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:6ae6c6bba0cde30c9293c3f525778c229466de7782e8f7d99e7c2a1b8f9c7a6f", size = 3601560, upload-time = "2026-04-08T19:57:45.148Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/66/0c/8bc2258d1e7ba971d05241a049cd3100c75df6bcf930423de7d0c6265a30/pypdfium2-5.7.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b518d78211cb2912139d10d7f4e39669231eb155e8258159e3413e9e5e4baef", size = 3588134, upload-time = "2026-04-08T19:57:47.379Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/f7/3248cc569a92ff25f1fe0a4a1790807e6e05df60563e39e74c9b723d5620/pypdfium2-5.7.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8aaa8e7681ebcaa042ac8adc152521fd5f16a4ceee1e9b9b582e148519528aa9", size = 3323100, upload-time = "2026-04-08T19:57:49.243Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0d/ee/6f004509df77ce963ed5a0f2e090ea0c43036e49cc72c321ce90f3d328bf/pypdfium2-5.7.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8d2284f799adbae755b66ce1a579834e487337d89bbb34ee749ecfa68322425", size = 3719217, upload-time = "2026-04-08T19:57:50.708Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ae/f0/bb61601aa1c2990d4a5d194440281941781250f6a438813a13fe20eb95cf/pypdfium2-5.7.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:08e9e9576eefbc085ba9a63feede4bcaf93d9fa0d9b17cb549aba6f065a8750e", size = 4147676, upload-time = "2026-04-08T19:57:52.292Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bd/27/a119e0519049afcfca51e9834b67949ffaba5b9afe7e74ed04d6c39b0285/pypdfium2-5.7.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ace647320bae562903097977b83449f91d30e045dd19ce62939d3100869f180", size = 3635469, upload-time = "2026-04-08T19:57:53.948Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/70/0b/4bcb67b039f057aca01ddbe692ae7666b630ad42b91a3aca3cb4d4f01222/pypdfium2-5.7.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7bb7555fe613cd76fff871a12299f902b80443f90b49e2001338718c758f6f4", size = 3091818, upload-time = "2026-04-08T19:57:55.471Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/c9/31490ab7cecaf433195683ff5c750f4111c7347f1fef9131d3d8704618eb/pypdfium2-5.7.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e7c0ef5ae35d40daa1883f3993b3b7ecf3fb06993bcc46651e28cf058d9da992", size = 2959579, upload-time = "2026-04-08T19:57:57.238Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/1e/bf5fe52f007130c0b1b38786ef82c98b4ac06f77e7ca001a17cda6ce76b6/pypdfium2-5.7.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:423c749e8cab22ddaf833041498ec5ad477c1c2abbff0a8ec00b99663c284592", size = 4126033, upload-time = "2026-04-08T19:57:59.111Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/18/7d/46dcebf4eb9ccf9b5fafe79702c31863b4c127e9c3140c0f335c375d3818/pypdfium2-5.7.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f48f453f848a90ec7786bcc84a4c0ee42eb84c2d8af3ca9004f7c18648939838", size = 3742063, upload-time = "2026-04-08T19:58:00.643Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4d/29/cfec37942f13a1dfe3ab059cf8d130609143d33ca1dd554b017a30bffe97/pypdfium2-5.7.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:e84bfa61f0243ed4b33bfe2492946ba761007b7feb5e7e0a086c635436d47906", size = 4332177, upload-time = "2026-04-08T19:58:02.425Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3f/da/07812153eff746bbc548d50129ada699765036674ff94065d538015c9556/pypdfium2-5.7.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:e3f4d7f4473b5ef762560cd5971cad3b51a77da3a25af479ef5aae4611709bb8", size = 4370704, upload-time = "2026-04-08T19:58:04.379Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9b/df/07a6a038ccb6fae6a1a06708c98d00aa03f2ca720b02cd3b75248dc5da70/pypdfium2-5.7.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:9e0b6c9be8c92b63ce0a00a94f6635eec22831e253811d6692824a1244e21780", size = 3924428, upload-time = "2026-04-08T19:58:06.406Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b4/a8/70ce4f997fef4186098c032fb3dd2c39193027a92a23b5a94d7a4c85e068/pypdfium2-5.7.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:3e4974a8545f726fc97a7443507713007e177f22058cd1ca0b28cb0e8e2d7dc2", size = 4264817, upload-time = "2026-04-08T19:58:08.003Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/02/42/03779e61ca40120f87839b4693899c72031b7a9e23676dcd8914d92e460c/pypdfium2-5.7.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:2fe12d57a0b413d42bdba435a608b2435a921a5f6a9d78fd8091b6266b63901a", size = 4175393, upload-time = "2026-04-08T19:58:09.858Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ee/f1/19bea36b354f2407c6ffdc60ad8564d95eb515badec457043ff57ad636f0/pypdfium2-5.7.0-py3-none-win32.whl", hash = "sha256:23958aec5c28c52e71f183a647fcc9fcec96ef703cc60a3ade44e55f4701678f", size = 3606308, upload-time = "2026-04-08T19:58:11.672Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/70/aa/fb333c1912a019de26e2395afd3dbef09e8118a59d70f1e5886fc90aa565/pypdfium2-5.7.0-py3-none-win_amd64.whl", hash = "sha256:a33d2c190042ae09c5512f599a540f88b07be956f18c4bb49c027e8c5118ce44", size = 3726429, upload-time = "2026-04-08T19:58:13.374Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/86/cf/6d4bc1ae4466a1f223abfe27210dce218da307e921961cd687f6e5a795a0/pypdfium2-5.7.0-py3-none-win_arm64.whl", hash = "sha256:8233fd06b0b8c22a5ea0bccbd7c4f73d6e9d0388040ea51909a5b2b1f63157e8", size = 3519317, upload-time = "2026-04-08T19:58:15.261Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pypng"
|
||||
version = "0.20220715.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/93/cd/112f092ec27cca83e0516de0a3368dbd9128c187fb6b52aaaa7cde39c96d/pypng-0.20220715.0.tar.gz", hash = "sha256:739c433ba96f078315de54c0db975aee537cbc3e1d0ae4ed9aab0ca1e427e2c1", size = 128992, upload-time = "2022-07-15T14:11:05.301Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3e/b9/3766cc361d93edb2ce81e2e1f87dd98f314d7d513877a342d31b30741680/pypng-0.20220715.0-py3-none-any.whl", hash = "sha256:4a43e969b8f5aaafb2a415536c1a8ec7e341cd6a3f957fd5b5f32a4cfeed902c", size = 58057, upload-time = "2022-07-15T14:11:03.713Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "9.0.2"
|
||||
@@ -4198,6 +4635,21 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/79/93/f6729f10149305262194774d6c8b438c0b084740cf239f48ab97b4df02fa/python_olm-3.2.16-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10a5e68a2f4b5a2bfa5fdb5dbfa22396a551730df6c4a572235acaa96e997d3f", size = 297000, upload-time = "2023-11-28T19:25:31.045Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-pptx"
|
||||
version = "1.0.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "lxml" },
|
||||
{ name = "pillow" },
|
||||
{ name = "typing-extensions" },
|
||||
{ name = "xlsxwriter" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/52/a9/0c0db8d37b2b8a645666f7fd8accea4c6224e013c42b1d5c17c93590cd06/python_pptx-1.0.2.tar.gz", hash = "sha256:479a8af0eaf0f0d76b6f00b0887732874ad2e3188230315290cd1f9dd9cc7095", size = 10109297, upload-time = "2024-08-07T17:33:37.772Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/4f/00be2196329ebbff56ce564aa94efb0fbc828d00de250b1980de1a34ab49/python_pptx-1.0.2-py3-none-any.whl", hash = "sha256:160838e0b8565a8b1f67947675886e9fea18aa5e795db7ae531606d68e785cba", size = 472788, upload-time = "2024-08-07T17:33:28.192Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-telegram-bot"
|
||||
version = "22.6"
|
||||
@@ -4311,6 +4763,20 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "qrcode"
|
||||
version = "7.4.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||
{ name = "pypng" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/30/35/ad6d4c5a547fe9a5baf85a9edbafff93fc6394b014fab30595877305fa59/qrcode-7.4.2.tar.gz", hash = "sha256:9dd969454827e127dbd93696b20747239e6d540e082937c90f14ac95b30f5845", size = 535974, upload-time = "2023-02-05T22:11:46.548Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/24/79/aaf0c1c7214f2632badb2771d770b1500d3d7cbdf2590ae62e721ec50584/qrcode-7.4.2-py3-none-any.whl", hash = "sha256:581dca7a029bcb2deef5d01068e39093e80ef00b4a61098a2182eac59d01643a", size = 46197, upload-time = "2023-02-05T22:11:43.4Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "referencing"
|
||||
version = "0.37.0"
|
||||
@@ -4577,6 +5043,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "s3transfer"
|
||||
version = "0.16.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "botocore" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "safetensors"
|
||||
version = "0.7.0"
|
||||
@@ -4712,6 +5190,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/4e/39/a61d4b83a7746b70d23d9173be688c0c6bfc7173772344b7442c2c155497/sounddevice-0.5.5-py3-none-win_arm64.whl", hash = "sha256:3861901ddd8230d2e0e8ae62ac320cdd4c688d81df89da036dcb812f757bb3e6", size = 317115, upload-time = "2026-01-23T18:36:42.235Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "soupsieve"
|
||||
version = "2.8.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlalchemy"
|
||||
version = "2.0.48"
|
||||
@@ -4927,8 +5414,8 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "tinker"
|
||||
version = "0.16.1"
|
||||
source = { git = "https://github.com/thinking-machines-lab/tinker.git#07bd3c2dd3cd4398ac1c26f0ec0deccbf3c1f913" }
|
||||
version = "0.18.0"
|
||||
source = { git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b#30517b667f18a3dfb7ef33fb56cf686d5820ba2b" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
{ name = "click" },
|
||||
@@ -5026,6 +5513,56 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/88/ae8320064e32679a5429a2c9ebbc05c2bf32cefb6e076f9b07f6d685a9b4/transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a", size = 10661827, upload-time = "2026-03-04T17:41:42.722Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter"
|
||||
version = "0.25.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/66/7c/0350cfc47faadc0d3cf7d8237a4e34032b3014ddf4a12ded9933e1648b55/tree-sitter-0.25.2.tar.gz", hash = "sha256:fe43c158555da46723b28b52e058ad444195afd1db3ca7720c59a254544e9c20", size = 177961, upload-time = "2025-09-25T17:37:59.751Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7c/22/88a1e00b906d26fa8a075dd19c6c3116997cb884bf1b3c023deb065a344d/tree_sitter-0.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8ca72d841215b6573ed0655b3a5cd1133f9b69a6fa561aecad40dca9029d75b", size = 146752, upload-time = "2025-09-25T17:37:24.775Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/57/1c/22cc14f3910017b7a76d7358df5cd315a84fe0c7f6f7b443b49db2e2790d/tree_sitter-0.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc0351cfe5022cec5a77645f647f92a936b38850346ed3f6d6babfbeeeca4d26", size = 137765, upload-time = "2025-09-25T17:37:26.103Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/0c/d0de46ded7d5b34631e0f630d9866dab22d3183195bf0f3b81de406d6622/tree_sitter-0.25.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1799609636c0193e16c38f366bda5af15b1ce476df79ddaae7dd274df9e44266", size = 604643, upload-time = "2025-09-25T17:37:27.398Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/38/b735a58c1c2f60a168a678ca27b4c1a9df725d0bf2d1a8a1c571c033111e/tree_sitter-0.25.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e65ae456ad0d210ee71a89ee112ac7e72e6c2e5aac1b95846ecc7afa68a194c", size = 632229, upload-time = "2025-09-25T17:37:28.463Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/f6/cda1e1e6cbff5e28d8433578e2556d7ba0b0209d95a796128155b97e7693/tree_sitter-0.25.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:49ee3c348caa459244ec437ccc7ff3831f35977d143f65311572b8ba0a5f265f", size = 629861, upload-time = "2025-09-25T17:37:29.593Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/19/427e5943b276a0dd74c2a1f1d7a7393443f13d1ee47dedb3f8127903c080/tree_sitter-0.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:56ac6602c7d09c2c507c55e58dc7026b8988e0475bd0002f8a386cce5e8e8adc", size = 127304, upload-time = "2025-09-25T17:37:30.549Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/eb/d9/eef856dc15f784d85d1397a17f3ee0f82df7778efce9e1961203abfe376a/tree_sitter-0.25.2-cp311-cp311-win_arm64.whl", hash = "sha256:b3d11a3a3ac89bb8a2543d75597f905a9926f9c806f40fcca8242922d1cc6ad5", size = 113990, upload-time = "2025-09-25T17:37:31.852Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3c/9e/20c2a00a862f1c2897a436b17edb774e831b22218083b459d0d081c9db33/tree_sitter-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ddabfff809ffc983fc9963455ba1cecc90295803e06e140a4c83e94c1fa3d960", size = 146941, upload-time = "2025-09-25T17:37:34.813Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ef/04/8512e2062e652a1016e840ce36ba1cc33258b0dcc4e500d8089b4054afec/tree_sitter-0.25.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c0c0ab5f94938a23fe81928a21cc0fac44143133ccc4eb7eeb1b92f84748331c", size = 137699, upload-time = "2025-09-25T17:37:36.349Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/47/8a/d48c0414db19307b0fb3bb10d76a3a0cbe275bb293f145ee7fba2abd668e/tree_sitter-0.25.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd12d80d91d4114ca097626eb82714618dcdfacd6a5e0955216c6485c350ef99", size = 607125, upload-time = "2025-09-25T17:37:37.725Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/39/d1/b95f545e9fc5001b8a78636ef942a4e4e536580caa6a99e73dd0a02e87aa/tree_sitter-0.25.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b43a9e4c89d4d0839de27cd4d6902d33396de700e9ff4c5ab7631f277a85ead9", size = 635418, upload-time = "2025-09-25T17:37:38.922Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/de/4d/b734bde3fb6f3513a010fa91f1f2875442cdc0382d6a949005cd84563d8f/tree_sitter-0.25.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbb1706407c0e451c4f8cc016fec27d72d4b211fdd3173320b1ada7a6c74c3ac", size = 631250, upload-time = "2025-09-25T17:37:40.039Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/46/f2/5f654994f36d10c64d50a192239599fcae46677491c8dd53e7579c35a3e3/tree_sitter-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:6d0302550bbe4620a5dc7649517c4409d74ef18558276ce758419cf09e578897", size = 127156, upload-time = "2025-09-25T17:37:41.132Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/67/23/148c468d410efcf0a9535272d81c258d840c27b34781d625f1f627e2e27d/tree_sitter-0.25.2-cp312-cp312-win_arm64.whl", hash = "sha256:0c8b6682cac77e37cfe5cf7ec388844957f48b7bd8d6321d0ca2d852994e10d5", size = 113984, upload-time = "2025-09-25T17:37:42.074Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8c/67/67492014ce32729b63d7ef318a19f9cfedd855d677de5773476caf771e96/tree_sitter-0.25.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0628671f0de69bb279558ef6b640bcfc97864fe0026d840f872728a86cd6b6cd", size = 146926, upload-time = "2025-09-25T17:37:43.041Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4e/9c/a278b15e6b263e86c5e301c82a60923fa7c59d44f78d7a110a89a413e640/tree_sitter-0.25.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f5ddcd3e291a749b62521f71fc953f66f5fd9743973fd6dd962b092773569601", size = 137712, upload-time = "2025-09-25T17:37:44.039Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/54/9a/423bba15d2bf6473ba67846ba5244b988cd97a4b1ea2b146822162256794/tree_sitter-0.25.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd88fbb0f6c3a0f28f0a68d72df88e9755cf5215bae146f5a1bdc8362b772053", size = 607873, upload-time = "2025-09-25T17:37:45.477Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ed/4c/b430d2cb43f8badfb3a3fa9d6cd7c8247698187b5674008c9d67b2a90c8e/tree_sitter-0.25.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b878e296e63661c8e124177cc3084b041ba3f5936b43076d57c487822426f614", size = 636313, upload-time = "2025-09-25T17:37:46.68Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9d/27/5f97098dbba807331d666a0997662e82d066e84b17d92efab575d283822f/tree_sitter-0.25.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d77605e0d353ba3fe5627e5490f0fbfe44141bafa4478d88ef7954a61a848dae", size = 631370, upload-time = "2025-09-25T17:37:47.993Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d4/3c/87caaed663fabc35e18dc704cd0e9800a0ee2f22bd18b9cbe7c10799895d/tree_sitter-0.25.2-cp313-cp313-win_amd64.whl", hash = "sha256:463c032bd02052d934daa5f45d183e0521ceb783c2548501cf034b0beba92c9b", size = 127157, upload-time = "2025-09-25T17:37:48.967Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/23/f8467b408b7988aff4ea40946a4bd1a2c1a73d17156a9d039bbaff1e2ceb/tree_sitter-0.25.2-cp313-cp313-win_arm64.whl", hash = "sha256:b3f63a1796886249bd22c559a5944d64d05d43f2be72961624278eff0dcc5cb8", size = 113975, upload-time = "2025-09-25T17:37:49.922Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/07/e3/d9526ba71dfbbe4eba5e51d89432b4b333a49a1e70712aa5590cd22fc74f/tree_sitter-0.25.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:65d3c931013ea798b502782acab986bbf47ba2c452610ab0776cf4a8ef150fc0", size = 146776, upload-time = "2025-09-25T17:37:50.898Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/42/97/4bd4ad97f85a23011dd8a535534bb1035c4e0bac1234d58f438e15cff51f/tree_sitter-0.25.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:bda059af9d621918efb813b22fb06b3fe00c3e94079c6143fcb2c565eb44cb87", size = 137732, upload-time = "2025-09-25T17:37:51.877Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b6/19/1e968aa0b1b567988ed522f836498a6a9529a74aab15f09dd9ac1e41f505/tree_sitter-0.25.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eac4e8e4c7060c75f395feec46421eb61212cb73998dbe004b7384724f3682ab", size = 609456, upload-time = "2025-09-25T17:37:52.925Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/48/b6/cf08f4f20f4c9094006ef8828555484e842fc468827ad6e56011ab668dbd/tree_sitter-0.25.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:260586381b23be33b6191a07cea3d44ecbd6c01aa4c6b027a0439145fcbc3358", size = 636772, upload-time = "2025-09-25T17:37:54.647Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/57/e2/d42d55bf56360987c32bc7b16adb06744e425670b823fb8a5786a1cea991/tree_sitter-0.25.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7d2ee1acbacebe50ba0f85fff1bc05e65d877958f00880f49f9b2af38dce1af0", size = 631522, upload-time = "2025-09-25T17:37:55.833Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/03/87/af9604ebe275a9345d88c3ace0cf2a1341aa3f8ef49dd9fc11662132df8a/tree_sitter-0.25.2-cp314-cp314-win_amd64.whl", hash = "sha256:4973b718fcadfb04e59e746abfbb0288694159c6aeecd2add59320c03368c721", size = 130864, upload-time = "2025-09-25T17:37:57.453Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/6e/e64621037357acb83d912276ffd30a859ef117f9c680f2e3cb955f47c680/tree_sitter-0.25.2-cp314-cp314-win_arm64.whl", hash = "sha256:b8d4429954a3beb3e844e2872610d2a4800ba4eb42bb1990c6a4b1949b18459f", size = 117470, upload-time = "2025-09-25T17:37:58.431Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-language-pack"
|
||||
version = "1.6.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "tree-sitter" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/09/bd/ac34ab0ee92b2d27802754c575965e921490ce11b5357bf89f74a78e8309/tree_sitter_language_pack-1.6.2-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:f5998cfee5735a8e7e691f577062ff7eb3a7ea405ae5654c9cecaa4a1e6c81b0", size = 2241997, upload-time = "2026-04-18T07:04:36.042Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/e0/b997b8c3e0886288a47890e6313c3a7e74ea8192e2d141b3eab64d59a276/tree_sitter_language_pack-1.6.2-cp310-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:8ce814ede4e295f3419ba179b523889c52cc3a998ac085356a470e776596c026", size = 2419565, upload-time = "2026-04-18T07:04:37.67Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fa/a4/629e6983a93fbb52dc50af495ec0431565c6477eea4680d4298238e9831e/tree_sitter_language_pack-1.6.2-cp310-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:2305df7835c1cb3d34b71450b79d135878bc25ea5d02d9984cee864607a4ad60", size = 2555465, upload-time = "2026-04-18T07:04:39.57Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/9c/0f486ca7344f6f3345441e8516b464214c7c5a0f3775d11fda1368901c38/tree_sitter_language_pack-1.6.2-cp310-abi3-win_amd64.whl", hash = "sha256:08351222b43c3a73665571eaa440366add2093a2492bb35f032fb7a31945e720", size = 2351156, upload-time = "2026-04-18T07:04:41.377Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typer"
|
||||
version = "0.24.1"
|
||||
@@ -5437,6 +5974,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xlsxwriter"
|
||||
version = "3.2.9"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/46/2c/c06ef49dc36e7954e55b802a8b231770d286a9758b3d936bd1e04ce5ba88/xlsxwriter-3.2.9.tar.gz", hash = "sha256:254b1c37a368c444eac6e2f867405cc9e461b0ed97a3233b2ac1e574efb4140c", size = 215940, upload-time = "2025-09-16T00:16:21.63Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3a/0c/3662f4a66880196a590b202f0db82d919dd2f89e99a27fadef91c4a33d41/xlsxwriter-3.2.9-py3-none-any.whl", hash = "sha256:9a5db42bc5dff014806c58a20b9eae7322a134abb6fce3c92c181bfb275ec5b3", size = 175315, upload-time = "2025-09-16T00:16:20.108Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xxhash"
|
||||
version = "3.6.0"
|
||||
@@ -5653,7 +6199,7 @@ wheels = [
|
||||
[[package]]
|
||||
name = "yc-bench"
|
||||
version = "0.1.0"
|
||||
source = { git = "https://github.com/collinear-ai/yc-bench.git#0c53c98f01a431db2e391482bc46013045854ab2" }
|
||||
source = { git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c#bfb0c88062450f46341bd9a5298903fc2e952a5c" }
|
||||
dependencies = [
|
||||
{ name = "litellm", marker = "python_full_version >= '3.12'" },
|
||||
{ name = "matplotlib", marker = "python_full_version >= '3.12'" },
|
||||
|
||||
46
workspace/__init__.py
Normal file
46
workspace/__init__.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""Workspace indexing and search.
|
||||
|
||||
Public API:
|
||||
get_indexer(config) -> BaseIndexer
|
||||
load_workspace_config() -> WorkspaceConfig
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from workspace.base import BaseIndexer
|
||||
from workspace.config import WorkspaceConfig, load_workspace_config
|
||||
from workspace.default import DefaultIndexer
|
||||
from workspace.types import IndexingError, IndexSummary, SearchResult
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_indexer(config: WorkspaceConfig | None = None) -> BaseIndexer:
|
||||
if config is None:
|
||||
config = load_workspace_config()
|
||||
if config.indexer == "default":
|
||||
return DefaultIndexer(config)
|
||||
try:
|
||||
from plugins.workspace import load_workspace_indexer
|
||||
|
||||
cls = load_workspace_indexer(config.indexer)
|
||||
except ImportError:
|
||||
cls = None
|
||||
if cls is None:
|
||||
log.warning(
|
||||
"Indexer plugin '%s' not found, falling back to default", config.indexer
|
||||
)
|
||||
return DefaultIndexer(config)
|
||||
return cls(config)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BaseIndexer",
|
||||
"DefaultIndexer",
|
||||
"WorkspaceConfig",
|
||||
"load_workspace_config",
|
||||
"get_indexer",
|
||||
"IndexingError",
|
||||
"IndexSummary",
|
||||
"SearchResult",
|
||||
]
|
||||
44
workspace/base.py
Normal file
44
workspace/base.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# workspace/base.py
|
||||
"""BaseIndexer ABC — the plugin contract for workspace backends.
|
||||
|
||||
Implementations must define __init__(config), index(), and search().
|
||||
status() is optional (default returns empty dict).
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Callable
|
||||
|
||||
from workspace.config import WorkspaceConfig
|
||||
from workspace.types import IndexSummary, SearchResult
|
||||
|
||||
ProgressCallback = Callable[[int, int, str], None]
|
||||
|
||||
|
||||
class BaseIndexer(ABC):
|
||||
@abstractmethod
|
||||
def __init__(self, config: WorkspaceConfig) -> None: ...
|
||||
|
||||
@abstractmethod
|
||||
def index(self, *, progress: ProgressCallback | None = None) -> IndexSummary: ...
|
||||
|
||||
@abstractmethod
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
limit: int = 20,
|
||||
path_prefix: str | None = None,
|
||||
file_glob: str | None = None,
|
||||
) -> list[SearchResult]: ...
|
||||
|
||||
def status(self) -> dict:
|
||||
return {}
|
||||
|
||||
def list_files(self) -> list[dict]:
|
||||
return []
|
||||
|
||||
def retrieve(self, path: str) -> list[SearchResult]:
|
||||
return []
|
||||
|
||||
def delete(self, path: str) -> bool:
|
||||
return False
|
||||
371
workspace/commands.py
Normal file
371
workspace/commands.py
Normal file
@@ -0,0 +1,371 @@
|
||||
"""CLI commands for workspace management.
|
||||
|
||||
hermes workspace roots list/add/remove
|
||||
hermes workspace index
|
||||
hermes workspace search <query> [--path] [--glob] [--limit]
|
||||
hermes workspace status
|
||||
hermes workspace list
|
||||
hermes workspace retrieve <path>
|
||||
hermes workspace delete <path>
|
||||
|
||||
All commands output JSON by default (agent-first). Use --human for Rich output.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from argparse import Namespace
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
def workspace_command(args: Namespace) -> None:
|
||||
action = getattr(args, "workspace_action", None)
|
||||
if action is None:
|
||||
msg = "No workspace subcommand. Use: roots, index, search, status, list, retrieve, delete"
|
||||
print(json.dumps({"error": msg}))
|
||||
sys.exit(1)
|
||||
|
||||
human = getattr(args, "human", False)
|
||||
|
||||
try:
|
||||
if action == "roots":
|
||||
_handle_roots(args, human)
|
||||
elif action == "index":
|
||||
_handle_index(args, human)
|
||||
elif action == "search":
|
||||
_handle_search(args, human)
|
||||
elif action == "status":
|
||||
_handle_status(args, human)
|
||||
elif action == "list":
|
||||
_handle_list(args, human)
|
||||
elif action == "retrieve":
|
||||
_handle_retrieve(args, human)
|
||||
elif action == "delete":
|
||||
_handle_delete(args, human)
|
||||
else:
|
||||
print(json.dumps({"error": f"Unknown workspace action: {action}"}))
|
||||
sys.exit(1)
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception as exc:
|
||||
_fatal(exc, human)
|
||||
|
||||
|
||||
def _handle_roots(args: Namespace, human: bool) -> None:
|
||||
roots_action = getattr(args, "roots_action", "list")
|
||||
|
||||
from workspace.config import load_workspace_config
|
||||
|
||||
if roots_action == "list":
|
||||
config = load_workspace_config()
|
||||
roots = [
|
||||
{"path": r.path, "recursive": r.recursive}
|
||||
for r in config.knowledgebase.roots
|
||||
]
|
||||
if human:
|
||||
_print_human_roots(roots)
|
||||
else:
|
||||
print(json.dumps(roots, indent=2))
|
||||
|
||||
elif roots_action == "add":
|
||||
path = str(Path(args.path).expanduser().resolve())
|
||||
recursive = getattr(args, "recursive", False)
|
||||
_add_root(path, recursive)
|
||||
result = {"added": {"path": path, "recursive": recursive}}
|
||||
if human:
|
||||
print(f"Added workspace root: {path} (recursive={recursive})")
|
||||
else:
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
elif roots_action == "remove":
|
||||
path = str(Path(args.path).expanduser().resolve())
|
||||
_remove_root(path)
|
||||
result = {"removed": path}
|
||||
if human:
|
||||
print(f"Removed workspace root: {path}")
|
||||
else:
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
def _handle_index(args: Namespace, human: bool) -> None:
|
||||
from workspace import get_indexer
|
||||
from workspace.config import load_workspace_config
|
||||
|
||||
config = load_workspace_config()
|
||||
|
||||
if not config.enabled:
|
||||
_error("Workspace is disabled (workspace.enabled = false)")
|
||||
return
|
||||
|
||||
progress_fn = None
|
||||
if human:
|
||||
try:
|
||||
from rich.progress import (
|
||||
BarColumn,
|
||||
MofNCompleteColumn,
|
||||
Progress,
|
||||
SpinnerColumn,
|
||||
TextColumn,
|
||||
)
|
||||
|
||||
progress_ctx = Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
transient=True,
|
||||
)
|
||||
progress_ctx.start()
|
||||
task_id = progress_ctx.add_task("Indexing...", total=None)
|
||||
|
||||
def _rich_progress(current: int, total: int, path: str) -> None:
|
||||
desc = f"[{current}/{total}] {Path(path).name}"
|
||||
progress_ctx.update(
|
||||
task_id,
|
||||
total=total,
|
||||
completed=current,
|
||||
description=desc,
|
||||
)
|
||||
|
||||
progress_fn = _rich_progress
|
||||
except ImportError:
|
||||
|
||||
def _simple_progress(current: int, total: int, path: str) -> None:
|
||||
print(f" [{current}/{total}] {Path(path).name}", file=sys.stderr)
|
||||
|
||||
progress_fn = _simple_progress
|
||||
else:
|
||||
|
||||
def _stderr_progress(current: int, total: int, path: str) -> None:
|
||||
print(f"Indexing [{current}/{total}] {Path(path).name}", file=sys.stderr)
|
||||
|
||||
progress_fn = _stderr_progress
|
||||
|
||||
try:
|
||||
indexer = get_indexer(config)
|
||||
summary = indexer.index(progress=progress_fn)
|
||||
finally:
|
||||
if human:
|
||||
try:
|
||||
progress_ctx.stop() # type: ignore[possibly-undefined]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if human:
|
||||
print(
|
||||
f"\nIndexed {summary.files_indexed} files "
|
||||
f"({summary.chunks_created} chunks), "
|
||||
f"skipped {summary.files_skipped}, "
|
||||
f"errored {summary.files_errored}, "
|
||||
f"pruned {summary.files_pruned} stale. "
|
||||
f"Took {summary.duration_seconds:.1f}s."
|
||||
)
|
||||
if summary.errors:
|
||||
print("\nErrors:")
|
||||
for err in summary.errors:
|
||||
print(f" [{err.stage}] {err.path}: {err.message}")
|
||||
if summary.errors_truncated:
|
||||
print(f" ... and {summary.files_errored - len(summary.errors)} more")
|
||||
else:
|
||||
print(json.dumps(summary.to_dict(), indent=2))
|
||||
|
||||
|
||||
def _handle_search(args: Namespace, human: bool) -> None:
|
||||
from workspace import get_indexer
|
||||
from workspace.config import load_workspace_config
|
||||
from workspace.constants import resolve_path_prefix
|
||||
|
||||
config = load_workspace_config()
|
||||
if not config.enabled:
|
||||
_error("Workspace is disabled (workspace.enabled = false)")
|
||||
return
|
||||
|
||||
query = args.query
|
||||
limit = getattr(args, "limit", None)
|
||||
raw_path = getattr(args, "path", None)
|
||||
path_prefix = resolve_path_prefix(raw_path)
|
||||
file_glob = getattr(args, "glob", None)
|
||||
|
||||
indexer = get_indexer(config)
|
||||
results = indexer.search(
|
||||
query,
|
||||
limit=limit or config.knowledgebase.search.default_limit,
|
||||
path_prefix=path_prefix,
|
||||
file_glob=file_glob,
|
||||
)
|
||||
|
||||
if human:
|
||||
_print_human_results(results)
|
||||
else:
|
||||
print(json.dumps([r.to_dict() for r in results], indent=2))
|
||||
|
||||
|
||||
def _handle_status(args: Namespace, human: bool) -> None:
|
||||
from workspace import get_indexer
|
||||
from workspace.config import load_workspace_config
|
||||
|
||||
config = load_workspace_config()
|
||||
if not config.enabled:
|
||||
_error("Workspace is disabled (workspace.enabled = false)")
|
||||
return
|
||||
|
||||
indexer = get_indexer(config)
|
||||
info = indexer.status()
|
||||
|
||||
if human:
|
||||
if not info:
|
||||
print("No status available (indexer does not report status).")
|
||||
return
|
||||
for k, v in info.items():
|
||||
if k == "db_size_bytes":
|
||||
mb = v / (1024 * 1024)
|
||||
print(f" {k}: {mb:.1f} MB")
|
||||
else:
|
||||
print(f" {k}: {v}")
|
||||
else:
|
||||
print(json.dumps(info, indent=2))
|
||||
|
||||
|
||||
def _handle_list(args: Namespace, human: bool) -> None:
|
||||
from workspace import get_indexer
|
||||
from workspace.config import load_workspace_config
|
||||
|
||||
config = load_workspace_config()
|
||||
if not config.enabled:
|
||||
_error("Workspace is disabled (workspace.enabled = false)")
|
||||
return
|
||||
|
||||
indexer = get_indexer(config)
|
||||
files = indexer.list_files()
|
||||
|
||||
if human:
|
||||
if not files:
|
||||
print("No files indexed.")
|
||||
return
|
||||
print(f"{len(files)} indexed files:\n")
|
||||
for f in files:
|
||||
size_kb = f.get("size_bytes", 0) / 1024
|
||||
chunks = f.get("chunks", 0)
|
||||
print(f" {f['path']} ({size_kb:.0f} KB, {chunks} chunks)")
|
||||
else:
|
||||
print(json.dumps(files, indent=2))
|
||||
|
||||
|
||||
def _handle_retrieve(args: Namespace, human: bool) -> None:
|
||||
from workspace import get_indexer
|
||||
from workspace.config import load_workspace_config
|
||||
|
||||
config = load_workspace_config()
|
||||
if not config.enabled:
|
||||
_error("Workspace is disabled (workspace.enabled = false)")
|
||||
return
|
||||
|
||||
path = str(Path(args.path).expanduser().resolve())
|
||||
indexer = get_indexer(config)
|
||||
results = indexer.retrieve(path)
|
||||
|
||||
if not results:
|
||||
if human:
|
||||
print(f"No indexed chunks for: {path}")
|
||||
else:
|
||||
print(json.dumps({"path": path, "chunks": []}))
|
||||
return
|
||||
|
||||
if human:
|
||||
print(f"{len(results)} chunks for {path}:\n")
|
||||
for r in results:
|
||||
section = f" [{r.section}]" if r.section else ""
|
||||
print(f" chunk {r.chunk_index}: lines {r.line_start}-{r.line_end}{section}")
|
||||
snippet = r.content[:200].replace("\n", " ")
|
||||
if len(r.content) > 200:
|
||||
snippet += "..."
|
||||
print(f" {snippet}\n")
|
||||
else:
|
||||
print(json.dumps([r.to_dict() for r in results], indent=2))
|
||||
|
||||
|
||||
def _handle_delete(args: Namespace, human: bool) -> None:
|
||||
from workspace import get_indexer
|
||||
from workspace.config import load_workspace_config
|
||||
|
||||
config = load_workspace_config()
|
||||
if not config.enabled:
|
||||
_error("Workspace is disabled (workspace.enabled = false)")
|
||||
return
|
||||
|
||||
path = str(Path(args.path).expanduser().resolve())
|
||||
indexer = get_indexer(config)
|
||||
deleted = indexer.delete(path)
|
||||
|
||||
if human:
|
||||
if deleted:
|
||||
print(f"Deleted from index: {path}")
|
||||
else:
|
||||
print(f"Not found in index: {path}")
|
||||
else:
|
||||
print(json.dumps({"path": path, "deleted": deleted}))
|
||||
|
||||
|
||||
def _add_root(path: str, recursive: bool) -> None:
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
kb = config.setdefault("knowledgebase", {})
|
||||
roots: list[dict[str, Any]] = kb.setdefault("roots", [])
|
||||
|
||||
for r in roots:
|
||||
if r.get("path") == path:
|
||||
r["recursive"] = recursive
|
||||
save_config(config)
|
||||
return
|
||||
|
||||
roots.append({"path": path, "recursive": recursive})
|
||||
save_config(config)
|
||||
|
||||
|
||||
def _remove_root(path: str) -> None:
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
kb = config.get("knowledgebase", {})
|
||||
roots: list[dict[str, Any]] = kb.get("roots", [])
|
||||
kb["roots"] = [r for r in roots if r.get("path") != path]
|
||||
save_config(config)
|
||||
|
||||
|
||||
def _print_human_roots(roots: list[dict[str, Any]]) -> None:
|
||||
if not roots:
|
||||
print("No workspace roots configured.")
|
||||
return
|
||||
for r in roots:
|
||||
flag = " (recursive)" if r.get("recursive") else ""
|
||||
print(f" {r['path']}{flag}")
|
||||
|
||||
|
||||
def _print_human_results(results: list) -> None:
|
||||
if not results:
|
||||
print("No results found.")
|
||||
return
|
||||
for r in results:
|
||||
section = f" {r.section}" if r.section else ""
|
||||
print(f"\n{r.path}:{r.line_start}-{r.line_end} (score: {r.score:.1f}){section}")
|
||||
snippet = r.content[:200].replace("\n", " ")
|
||||
if len(r.content) > 200:
|
||||
snippet += "..."
|
||||
print(f" {snippet}")
|
||||
|
||||
|
||||
def _error(msg: str) -> None:
|
||||
print(json.dumps({"error": msg}), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _fatal(exc: Exception, human: bool) -> None:
|
||||
if human:
|
||||
print(f"Error: {exc}", file=sys.stderr)
|
||||
else:
|
||||
print(
|
||||
json.dumps({"error": str(exc), "error_type": type(exc).__name__}),
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
102
workspace/config.py
Normal file
102
workspace/config.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""Workspace configuration — Pydantic models.
|
||||
|
||||
Builds WorkspaceConfig from the main hermes config.yaml dict.
|
||||
Defaults come from the model field definitions.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, field_validator, model_validator
|
||||
|
||||
from workspace.constants import get_workspace_root
|
||||
from workspace.types import WorkspaceRoot
|
||||
|
||||
|
||||
class ChunkingConfig(BaseModel):
|
||||
model_config = ConfigDict(frozen=True, extra="ignore")
|
||||
chunk_size: int = 512
|
||||
overlap: int | None = None
|
||||
|
||||
@field_validator("chunk_size")
|
||||
@classmethod
|
||||
def _chunk_size_positive(cls, v: int) -> int:
|
||||
if v <= 0:
|
||||
raise ValueError(f"chunk_size must be > 0, got {v}")
|
||||
return v
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _clamp_overlap(self) -> "ChunkingConfig":
|
||||
if self.overlap is None:
|
||||
object.__setattr__(self, "overlap", min(32, max(0, self.chunk_size - 1)))
|
||||
elif self.overlap < 0 or self.overlap >= self.chunk_size:
|
||||
raise ValueError(
|
||||
f"overlap must be >= 0 and < chunk_size ({self.chunk_size}), got {self.overlap}"
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
class IndexingConfig(BaseModel):
|
||||
model_config = ConfigDict(frozen=True)
|
||||
max_file_mb: int = 10
|
||||
|
||||
@field_validator("max_file_mb")
|
||||
@classmethod
|
||||
def _positive(cls, v: int) -> int:
|
||||
if v <= 0:
|
||||
raise ValueError(f"max_file_mb must be > 0, got {v}")
|
||||
return v
|
||||
|
||||
|
||||
class SearchConfig(BaseModel):
|
||||
model_config = ConfigDict(frozen=True)
|
||||
default_limit: int = 20
|
||||
|
||||
@field_validator("default_limit")
|
||||
@classmethod
|
||||
def _at_least_one(cls, v: int) -> int:
|
||||
if v < 1:
|
||||
raise ValueError(f"default_limit must be >= 1, got {v}")
|
||||
return v
|
||||
|
||||
|
||||
class ParsingConfig(BaseModel):
|
||||
model_config = ConfigDict(frozen=True, extra="ignore")
|
||||
default: str = "markitdown"
|
||||
overrides: dict[str, str] = {}
|
||||
|
||||
|
||||
class KnowledgebaseConfig(BaseModel):
|
||||
model_config = ConfigDict(frozen=True)
|
||||
roots: list[WorkspaceRoot] = []
|
||||
chunking: ChunkingConfig = ChunkingConfig()
|
||||
indexing: IndexingConfig = IndexingConfig()
|
||||
search: SearchConfig = SearchConfig()
|
||||
parsing: ParsingConfig = ParsingConfig()
|
||||
|
||||
|
||||
class WorkspaceConfig(BaseModel):
|
||||
model_config = ConfigDict(frozen=True)
|
||||
enabled: bool = True
|
||||
workspace_root: Path = Path.home() / ".hermes" / "workspace"
|
||||
indexer: str = "default"
|
||||
plugin_config: dict = {}
|
||||
knowledgebase: KnowledgebaseConfig = KnowledgebaseConfig()
|
||||
|
||||
|
||||
def load_workspace_config(raw: dict | None = None) -> WorkspaceConfig:
|
||||
if raw is None:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
raw = load_config()
|
||||
ws = raw.get("workspace", {})
|
||||
kb = raw.get("knowledgebase", {})
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
hermes_home = get_hermes_home()
|
||||
return WorkspaceConfig(
|
||||
enabled=ws.get("enabled", True),
|
||||
workspace_root=get_workspace_root(hermes_home, ws.get("path", "")),
|
||||
indexer=ws.get("indexer", "default"),
|
||||
plugin_config=ws.get("plugin_config", {}),
|
||||
knowledgebase=KnowledgebaseConfig.model_validate(kb),
|
||||
)
|
||||
230
workspace/constants.py
Normal file
230
workspace/constants.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""Workspace config keys, defaults, and path helpers.
|
||||
|
||||
Zero internal dependencies — safe to import from anywhere.
|
||||
Both workspace/ modules and hermes_cli/config.py import from here.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
BINARY_SUFFIXES: frozenset[str] = frozenset(
|
||||
{
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".gif",
|
||||
".webp",
|
||||
".bmp",
|
||||
".ico",
|
||||
".svg",
|
||||
".zip",
|
||||
".gz",
|
||||
".tar",
|
||||
".xz",
|
||||
".7z",
|
||||
".bz2",
|
||||
".rar",
|
||||
".mp3",
|
||||
".wav",
|
||||
".ogg",
|
||||
".flac",
|
||||
".aac",
|
||||
".mp4",
|
||||
".mov",
|
||||
".avi",
|
||||
".mkv",
|
||||
".webm",
|
||||
".pdf",
|
||||
".docx",
|
||||
".doc",
|
||||
".pptx",
|
||||
".xlsx",
|
||||
".sqlite",
|
||||
".db",
|
||||
".bin",
|
||||
".exe",
|
||||
".dll",
|
||||
".so",
|
||||
".dylib",
|
||||
".wasm",
|
||||
".woff",
|
||||
".woff2",
|
||||
".ttf",
|
||||
".otf",
|
||||
".eot",
|
||||
".pyc",
|
||||
".pyo",
|
||||
".class",
|
||||
".o",
|
||||
".obj",
|
||||
".a",
|
||||
".lib",
|
||||
".DS_Store",
|
||||
".lock",
|
||||
}
|
||||
)
|
||||
|
||||
PARSEABLE_SUFFIXES: frozenset[str] = frozenset(
|
||||
{
|
||||
".pdf",
|
||||
".docx",
|
||||
".pptx",
|
||||
}
|
||||
)
|
||||
|
||||
CODE_SUFFIXES: frozenset[str] = frozenset(
|
||||
{
|
||||
".py",
|
||||
".js",
|
||||
".ts",
|
||||
".tsx",
|
||||
".jsx",
|
||||
".rs",
|
||||
".go",
|
||||
".java",
|
||||
".c",
|
||||
".cpp",
|
||||
".h",
|
||||
".hpp",
|
||||
".cs",
|
||||
".rb",
|
||||
".php",
|
||||
".swift",
|
||||
".kt",
|
||||
".scala",
|
||||
".lua",
|
||||
".r",
|
||||
".m",
|
||||
".mm",
|
||||
".zig",
|
||||
".nim",
|
||||
".ex",
|
||||
".exs",
|
||||
".erl",
|
||||
".hs",
|
||||
".ml",
|
||||
".mli",
|
||||
".clj",
|
||||
".cljs",
|
||||
".sh",
|
||||
".bash",
|
||||
".zsh",
|
||||
".fish",
|
||||
".ps1",
|
||||
".bat",
|
||||
".cmd",
|
||||
".sql",
|
||||
".graphql",
|
||||
".proto",
|
||||
".thrift",
|
||||
}
|
||||
)
|
||||
|
||||
MARKDOWN_SUFFIXES: frozenset[str] = frozenset(
|
||||
{
|
||||
".md",
|
||||
".mdx",
|
||||
".markdown",
|
||||
".mdown",
|
||||
".mkd",
|
||||
}
|
||||
)
|
||||
|
||||
WORKSPACE_SUBDIRS = ("docs", "notes", "data", "code", "uploads", "media")
|
||||
|
||||
WORKSPACE_CONFIG_DEFAULTS = {
|
||||
"enabled": True,
|
||||
"path": "",
|
||||
}
|
||||
|
||||
KNOWLEDGEBASE_CONFIG_DEFAULTS = {
|
||||
"roots": [],
|
||||
"chunking": {
|
||||
"chunk_size": 512,
|
||||
"overlap": None,
|
||||
},
|
||||
"indexing": {
|
||||
"max_file_mb": 10,
|
||||
},
|
||||
"search": {
|
||||
"default_limit": 20,
|
||||
},
|
||||
}
|
||||
|
||||
CHUNKING_PLAN_VERSION = "v2"
|
||||
|
||||
INDEX_DIR_NAME = ".index"
|
||||
INDEX_DB_NAME = "workspace.sqlite"
|
||||
HERMESIGNORE_NAME = ".hermesignore"
|
||||
GITIGNORE_NAME = ".gitignore"
|
||||
|
||||
DEFAULT_IGNORE_PATTERNS = """\
|
||||
# Version control
|
||||
.git/
|
||||
.svn/
|
||||
.hg/
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
Desktop.ini
|
||||
|
||||
# IDE / editor
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
.tox/
|
||||
.venv/
|
||||
venv/
|
||||
.env/
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
dist/
|
||||
build/
|
||||
|
||||
# JavaScript / Node
|
||||
node_modules/
|
||||
bower_components/
|
||||
.npm/
|
||||
.yarn/
|
||||
|
||||
# Build outputs
|
||||
target/
|
||||
out/
|
||||
_build/
|
||||
|
||||
# Hermes internals
|
||||
.index/
|
||||
.hermesignore
|
||||
"""
|
||||
|
||||
|
||||
def get_workspace_root(hermes_home: Path, workspace_path: str = "") -> Path:
|
||||
if workspace_path:
|
||||
return Path(workspace_path).expanduser().resolve()
|
||||
return hermes_home / "workspace"
|
||||
|
||||
|
||||
def get_index_dir(workspace_root: Path) -> Path:
|
||||
return workspace_root / INDEX_DIR_NAME
|
||||
|
||||
|
||||
def get_index_db_path(workspace_root: Path) -> Path:
|
||||
return get_index_dir(workspace_root) / INDEX_DB_NAME
|
||||
|
||||
|
||||
def resolve_path_prefix(raw: str | None) -> str | None:
|
||||
"""Resolve a user-supplied path prefix to its canonical absolute form.
|
||||
|
||||
Mirrors the indexer's ``Path(...).resolve()`` on stored paths so that
|
||||
search byte-prefix comparisons line up regardless of symlinks.
|
||||
"""
|
||||
if not raw:
|
||||
return None
|
||||
return str(Path(raw).resolve())
|
||||
591
workspace/default.py
Normal file
591
workspace/default.py
Normal file
@@ -0,0 +1,591 @@
|
||||
"""DefaultIndexer — built-in Chonkie + SQLite FTS5 workspace backend."""
|
||||
|
||||
import dataclasses
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
|
||||
from workspace.base import BaseIndexer, ProgressCallback
|
||||
from workspace.config import ChunkingConfig, WorkspaceConfig
|
||||
from workspace.constants import (
|
||||
CHUNKING_PLAN_VERSION,
|
||||
CODE_SUFFIXES,
|
||||
MARKDOWN_SUFFIXES,
|
||||
WORKSPACE_SUBDIRS,
|
||||
get_index_dir,
|
||||
resolve_path_prefix,
|
||||
)
|
||||
from workspace.files import discover_workspace_files, seed_hermesignore
|
||||
from workspace.parsers import build_parser
|
||||
from workspace.store import SQLiteFTS5Store
|
||||
from workspace.types import (
|
||||
ChunkRecord,
|
||||
FileRecord,
|
||||
IndexingError,
|
||||
IndexSummary,
|
||||
SearchResult,
|
||||
)
|
||||
|
||||
PipelineKind = Literal["markdown", "code", "plain"]
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_replace = dataclasses.replace
|
||||
|
||||
_HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
|
||||
_NEWLINE_RE = re.compile(r"\n")
|
||||
_MAX_ERRORS = 50
|
||||
|
||||
|
||||
class DefaultIndexer(BaseIndexer):
|
||||
def __init__(self, config: WorkspaceConfig) -> None:
|
||||
self._config = config
|
||||
self._parser = build_parser(config.knowledgebase.parsing)
|
||||
|
||||
def index(self, *, progress: ProgressCallback | None = None) -> IndexSummary:
|
||||
self._require_chonkie()
|
||||
|
||||
start = time.monotonic()
|
||||
self._ensure_workspace_dirs()
|
||||
config_sig = self._config_signature()
|
||||
|
||||
files_indexed = 0
|
||||
files_skipped = 0
|
||||
files_errored = 0
|
||||
chunks_created = 0
|
||||
errors: list[IndexingError] = []
|
||||
|
||||
discovery = discover_workspace_files(self._config)
|
||||
files_skipped += discovery.filtered_count
|
||||
all_files = discovery.files
|
||||
total = len(all_files)
|
||||
disk_paths: set[str] = set()
|
||||
|
||||
pipelines = self._build_pipelines(self._config.knowledgebase.chunking)
|
||||
|
||||
with SQLiteFTS5Store(self._config.workspace_root) as store:
|
||||
for i, (root_path, file_path) in enumerate(all_files):
|
||||
abs_path = str(file_path.resolve())
|
||||
disk_paths.add(abs_path)
|
||||
write_started = False
|
||||
|
||||
if progress:
|
||||
progress(i + 1, total, abs_path)
|
||||
|
||||
try:
|
||||
content_hash = _file_hash(file_path)
|
||||
existing = store.get_file_record(abs_path)
|
||||
if (
|
||||
existing
|
||||
and existing.content_hash == content_hash
|
||||
and existing.config_signature == config_sig
|
||||
):
|
||||
files_skipped += 1
|
||||
continue
|
||||
|
||||
suffix = file_path.suffix.lower()
|
||||
if self._parser.can_parse(suffix):
|
||||
text = self._parser.parse(file_path)
|
||||
if text is None:
|
||||
files_errored += 1
|
||||
_append_error(
|
||||
errors,
|
||||
IndexingError(
|
||||
path=abs_path,
|
||||
stage="parse",
|
||||
error_type="ParseError",
|
||||
message=f"Parser failed to convert {suffix} file",
|
||||
),
|
||||
)
|
||||
continue
|
||||
suffix = ".md"
|
||||
else:
|
||||
text = _read_file_text(file_path)
|
||||
if text is None:
|
||||
files_errored += 1
|
||||
_append_error(
|
||||
errors,
|
||||
IndexingError(
|
||||
path=abs_path,
|
||||
stage="read",
|
||||
error_type="EncodingError",
|
||||
message="Could not decode file with sufficient confidence",
|
||||
),
|
||||
)
|
||||
continue
|
||||
|
||||
if not text.strip():
|
||||
files_skipped += 1
|
||||
continue
|
||||
|
||||
chunk_records = self._process_file(
|
||||
abs_path, text, suffix, pipelines
|
||||
)
|
||||
|
||||
stat = file_path.stat()
|
||||
record = FileRecord(
|
||||
abs_path=abs_path,
|
||||
root_path=root_path,
|
||||
content_hash=content_hash,
|
||||
config_signature=config_sig,
|
||||
size_bytes=stat.st_size,
|
||||
modified_at=datetime.fromtimestamp(
|
||||
stat.st_mtime, tz=timezone.utc
|
||||
).isoformat(),
|
||||
indexed_at=datetime.now(tz=timezone.utc).isoformat(),
|
||||
chunk_count=len(chunk_records),
|
||||
)
|
||||
|
||||
store.conn.execute("SAVEPOINT workspace_file_update")
|
||||
write_started = True
|
||||
store.delete_chunks_for_file(abs_path)
|
||||
store.upsert_file(record)
|
||||
if chunk_records:
|
||||
store.insert_chunks(chunk_records)
|
||||
store.conn.execute("RELEASE SAVEPOINT workspace_file_update")
|
||||
store.commit()
|
||||
write_started = False
|
||||
|
||||
files_indexed += 1
|
||||
chunks_created += len(chunk_records)
|
||||
|
||||
except Exception as exc:
|
||||
if write_started:
|
||||
try:
|
||||
store.conn.execute(
|
||||
"ROLLBACK TO SAVEPOINT workspace_file_update"
|
||||
)
|
||||
store.conn.execute(
|
||||
"RELEASE SAVEPOINT workspace_file_update"
|
||||
)
|
||||
except Exception:
|
||||
log.warning(
|
||||
"Failed to roll back workspace update for %s",
|
||||
abs_path,
|
||||
exc_info=True,
|
||||
)
|
||||
files_errored += 1
|
||||
stage = "read" if isinstance(exc, FileNotFoundError) else "store"
|
||||
_append_error(
|
||||
errors,
|
||||
IndexingError(
|
||||
path=abs_path,
|
||||
stage=stage,
|
||||
error_type=type(exc).__name__,
|
||||
message=str(exc),
|
||||
),
|
||||
)
|
||||
log.warning("Failed to index %s: %s", abs_path, exc, exc_info=True)
|
||||
continue
|
||||
|
||||
if discovery.complete:
|
||||
pruned = _prune_stale(store, disk_paths)
|
||||
else:
|
||||
pruned = 0
|
||||
log.warning(
|
||||
"Workspace discovery was incomplete; skipping stale prune for this run"
|
||||
)
|
||||
store.commit()
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
return IndexSummary(
|
||||
files_indexed=files_indexed,
|
||||
files_skipped=files_skipped,
|
||||
files_pruned=pruned,
|
||||
files_errored=files_errored,
|
||||
chunks_created=chunks_created,
|
||||
duration_seconds=elapsed,
|
||||
errors=errors,
|
||||
errors_truncated=files_errored > _MAX_ERRORS,
|
||||
)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
limit: int = 20,
|
||||
path_prefix: str | None = None,
|
||||
file_glob: str | None = None,
|
||||
) -> list[SearchResult]:
|
||||
if limit is None:
|
||||
limit = self._config.knowledgebase.search.default_limit
|
||||
|
||||
resolved_prefix = resolve_path_prefix(path_prefix)
|
||||
|
||||
with SQLiteFTS5Store(self._config.workspace_root) as store:
|
||||
return store.search(
|
||||
query,
|
||||
limit=limit,
|
||||
path_prefix=resolved_prefix,
|
||||
file_glob=file_glob,
|
||||
)
|
||||
|
||||
def status(self) -> dict[str, Any]:
|
||||
with SQLiteFTS5Store(self._config.workspace_root) as store:
|
||||
return store.status()
|
||||
|
||||
def list_files(self) -> list[dict[str, Any]]:
|
||||
with SQLiteFTS5Store(self._config.workspace_root) as store:
|
||||
return [
|
||||
{
|
||||
"path": r.abs_path,
|
||||
"root": r.root_path,
|
||||
"size_bytes": r.size_bytes,
|
||||
"chunks": r.chunk_count,
|
||||
"modified": r.modified_at,
|
||||
"indexed": r.indexed_at,
|
||||
}
|
||||
for r in store.list_files()
|
||||
]
|
||||
|
||||
def retrieve(self, path: str) -> list[SearchResult]:
|
||||
with SQLiteFTS5Store(self._config.workspace_root) as store:
|
||||
return store.get_chunks_for_file(path)
|
||||
|
||||
def delete(self, path: str) -> bool:
|
||||
with SQLiteFTS5Store(self._config.workspace_root) as store:
|
||||
if store.get_file_record(path) is None:
|
||||
return False
|
||||
store.delete_file(path)
|
||||
store.commit()
|
||||
return True
|
||||
|
||||
def _build_pipelines(self, ch: ChunkingConfig) -> dict[PipelineKind, Any]:
|
||||
from chonkie import Pipeline
|
||||
|
||||
overlap_kwargs = dict(
|
||||
tokenizer="word",
|
||||
context_size=ch.overlap,
|
||||
mode="token",
|
||||
method="suffix",
|
||||
merge=False,
|
||||
)
|
||||
return {
|
||||
"markdown": (
|
||||
Pipeline()
|
||||
.process_with("markdown", tokenizer="word")
|
||||
.chunk_with("recursive", tokenizer="word", chunk_size=ch.chunk_size)
|
||||
.refine_with("overlap", **overlap_kwargs)
|
||||
),
|
||||
"code": (
|
||||
Pipeline()
|
||||
.chunk_with(
|
||||
"code",
|
||||
tokenizer="word",
|
||||
chunk_size=ch.chunk_size,
|
||||
language="auto",
|
||||
)
|
||||
.refine_with("overlap", **overlap_kwargs)
|
||||
),
|
||||
"plain": (
|
||||
Pipeline()
|
||||
.chunk_with("recursive", tokenizer="word", chunk_size=ch.chunk_size)
|
||||
.refine_with("overlap", **overlap_kwargs)
|
||||
),
|
||||
}
|
||||
|
||||
def _process_file(
|
||||
self,
|
||||
abs_path: str,
|
||||
text: str,
|
||||
suffix: str,
|
||||
pipelines: dict[PipelineKind, Any],
|
||||
) -> list[ChunkRecord]:
|
||||
if suffix in MARKDOWN_SUFFIXES:
|
||||
return self._process_markdown(abs_path, text, pipelines)
|
||||
elif suffix in CODE_SUFFIXES:
|
||||
return self._process_code(abs_path, text, pipelines)
|
||||
else:
|
||||
return self._process_plain(abs_path, text, pipelines)
|
||||
|
||||
def _process_markdown(
|
||||
self,
|
||||
abs_path: str,
|
||||
text: str,
|
||||
pipelines: dict[PipelineKind, Any],
|
||||
) -> list[ChunkRecord]:
|
||||
from chonkie.types import MarkdownDocument
|
||||
|
||||
result = pipelines["markdown"].run(texts=text)
|
||||
assert isinstance(result, MarkdownDocument), (
|
||||
f"markdown pipeline returned {type(result).__name__}"
|
||||
)
|
||||
doc = result
|
||||
|
||||
headings = _scan_headings(text)
|
||||
line_offsets = _build_line_offsets(text)
|
||||
candidates: list[ChunkRecord] = []
|
||||
|
||||
for chunk in doc.chunks:
|
||||
if not chunk.text.strip():
|
||||
continue
|
||||
sc, ec = chunk.start_index, chunk.end_index
|
||||
candidates.append(
|
||||
ChunkRecord(
|
||||
chunk_id=_make_id(),
|
||||
abs_path=abs_path,
|
||||
chunk_index=0,
|
||||
content=chunk.text,
|
||||
token_count=chunk.token_count,
|
||||
start_line=_offset_to_line(line_offsets, sc),
|
||||
end_line=_offset_to_line(line_offsets, max(0, ec - 1)),
|
||||
start_char=sc,
|
||||
end_char=ec,
|
||||
section=_nearest_heading(headings, sc),
|
||||
kind="markdown_text",
|
||||
context=chunk.context,
|
||||
)
|
||||
)
|
||||
|
||||
for code in doc.code:
|
||||
if not code.content.strip():
|
||||
continue
|
||||
sc, ec = code.start_index, code.end_index
|
||||
metadata = (
|
||||
json.dumps({"language": code.language}) if code.language else None
|
||||
)
|
||||
candidates.append(
|
||||
ChunkRecord(
|
||||
chunk_id=_make_id(),
|
||||
abs_path=abs_path,
|
||||
chunk_index=0,
|
||||
content=code.content,
|
||||
token_count=len(code.content.split()),
|
||||
start_line=_offset_to_line(line_offsets, sc),
|
||||
end_line=_offset_to_line(line_offsets, max(0, ec - 1)),
|
||||
start_char=sc,
|
||||
end_char=ec,
|
||||
section=_nearest_heading(headings, sc),
|
||||
kind="markdown_code",
|
||||
chunk_metadata=metadata,
|
||||
)
|
||||
)
|
||||
|
||||
for table in doc.tables:
|
||||
if not table.content.strip():
|
||||
continue
|
||||
sc, ec = table.start_index, table.end_index
|
||||
candidates.append(
|
||||
ChunkRecord(
|
||||
chunk_id=_make_id(),
|
||||
abs_path=abs_path,
|
||||
chunk_index=0,
|
||||
content=table.content,
|
||||
token_count=len(table.content.split()),
|
||||
start_line=_offset_to_line(line_offsets, sc),
|
||||
end_line=_offset_to_line(line_offsets, max(0, ec - 1)),
|
||||
start_char=sc,
|
||||
end_char=ec,
|
||||
section=_nearest_heading(headings, sc),
|
||||
kind="markdown_table",
|
||||
)
|
||||
)
|
||||
|
||||
for image in doc.images:
|
||||
if not image.alias:
|
||||
continue
|
||||
sc, ec = image.start_index, image.end_index
|
||||
candidates.append(
|
||||
ChunkRecord(
|
||||
chunk_id=_make_id(),
|
||||
abs_path=abs_path,
|
||||
chunk_index=0,
|
||||
content=image.alias,
|
||||
token_count=len(image.alias.split()),
|
||||
start_line=_offset_to_line(line_offsets, sc),
|
||||
end_line=_offset_to_line(line_offsets, max(0, ec - 1)),
|
||||
start_char=sc,
|
||||
end_char=ec,
|
||||
section=_nearest_heading(headings, sc),
|
||||
kind="markdown_image",
|
||||
)
|
||||
)
|
||||
|
||||
candidates.sort(key=lambda c: c.start_char)
|
||||
return [_replace(c, chunk_index=i) for i, c in enumerate(candidates)]
|
||||
|
||||
def _process_code(
|
||||
self,
|
||||
abs_path: str,
|
||||
text: str,
|
||||
pipelines: dict[PipelineKind, Any],
|
||||
) -> list[ChunkRecord]:
|
||||
from chonkie.types import Document
|
||||
|
||||
result = pipelines["code"].run(texts=text)
|
||||
assert isinstance(result, Document), (
|
||||
f"code pipeline returned {type(result).__name__}"
|
||||
)
|
||||
doc = result
|
||||
line_offsets = _build_line_offsets(text)
|
||||
records: list[ChunkRecord] = []
|
||||
for i, chunk in enumerate(doc.chunks):
|
||||
sc, ec = chunk.start_index, chunk.end_index
|
||||
records.append(
|
||||
ChunkRecord(
|
||||
chunk_id=_make_id(),
|
||||
abs_path=abs_path,
|
||||
chunk_index=i,
|
||||
content=chunk.text,
|
||||
token_count=chunk.token_count,
|
||||
start_line=_offset_to_line(line_offsets, sc),
|
||||
end_line=_offset_to_line(line_offsets, max(0, ec - 1)),
|
||||
start_char=sc,
|
||||
end_char=ec,
|
||||
section=None,
|
||||
kind="code",
|
||||
chunk_metadata=None,
|
||||
context=chunk.context,
|
||||
)
|
||||
)
|
||||
return records
|
||||
|
||||
def _process_plain(
|
||||
self,
|
||||
abs_path: str,
|
||||
text: str,
|
||||
pipelines: dict[PipelineKind, Any],
|
||||
) -> list[ChunkRecord]:
|
||||
from chonkie.types import Document
|
||||
|
||||
result = pipelines["plain"].run(texts=text)
|
||||
assert isinstance(result, Document), (
|
||||
f"plain pipeline returned {type(result).__name__}"
|
||||
)
|
||||
doc = result
|
||||
line_offsets = _build_line_offsets(text)
|
||||
records: list[ChunkRecord] = []
|
||||
for i, chunk in enumerate(doc.chunks):
|
||||
sc, ec = chunk.start_index, chunk.end_index
|
||||
records.append(
|
||||
ChunkRecord(
|
||||
chunk_id=_make_id(),
|
||||
abs_path=abs_path,
|
||||
chunk_index=i,
|
||||
content=chunk.text,
|
||||
token_count=chunk.token_count,
|
||||
start_line=_offset_to_line(line_offsets, sc),
|
||||
end_line=_offset_to_line(line_offsets, max(0, ec - 1)),
|
||||
start_char=sc,
|
||||
end_char=ec,
|
||||
section=None,
|
||||
kind="text",
|
||||
context=chunk.context,
|
||||
)
|
||||
)
|
||||
return records
|
||||
|
||||
def _ensure_workspace_dirs(self) -> None:
|
||||
root = self._config.workspace_root
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
for sub in WORKSPACE_SUBDIRS:
|
||||
(root / sub).mkdir(exist_ok=True)
|
||||
get_index_dir(root).mkdir(parents=True, exist_ok=True)
|
||||
seed_hermesignore(root)
|
||||
|
||||
def _require_chonkie(self) -> None:
|
||||
try:
|
||||
import chonkie # noqa: F401
|
||||
except ImportError:
|
||||
raise RuntimeError(
|
||||
"Chonkie is required for workspace indexing. "
|
||||
"Install it with: pip install hermes-agent[workspace]"
|
||||
)
|
||||
|
||||
def _config_signature(self) -> str:
|
||||
ch = self._config.knowledgebase.chunking
|
||||
pa = self._config.knowledgebase.parsing
|
||||
blob = json.dumps(
|
||||
{
|
||||
"chunk_size": ch.chunk_size,
|
||||
"overlap": ch.overlap,
|
||||
"overlap_mode": "token",
|
||||
"overlap_method": "suffix",
|
||||
"code_chunker": "production_v1",
|
||||
"chunking_plan_version": CHUNKING_PLAN_VERSION,
|
||||
"parsing_default": pa.default,
|
||||
"parsing_overrides": dict(sorted(pa.overrides.items())),
|
||||
},
|
||||
sort_keys=True,
|
||||
)
|
||||
return hashlib.sha256(blob.encode()).hexdigest()[:16]
|
||||
|
||||
|
||||
def _append_error(errors: list[IndexingError], error: IndexingError) -> None:
|
||||
if len(errors) < _MAX_ERRORS:
|
||||
errors.append(error)
|
||||
|
||||
|
||||
def _read_file_text(path: Path) -> str | None:
|
||||
raw = path.read_bytes()
|
||||
try:
|
||||
return raw.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
try:
|
||||
from charset_normalizer import from_bytes
|
||||
|
||||
result = from_bytes(raw).best()
|
||||
if result is None or result.encoding is None:
|
||||
return None
|
||||
if result.coherence < 0.5:
|
||||
return None
|
||||
return str(result)
|
||||
except ImportError:
|
||||
log.debug("charset-normalizer not installed, skipping non-UTF8 file: %s", path)
|
||||
return None
|
||||
|
||||
|
||||
def _scan_headings(text: str) -> list[tuple[int, str]]:
|
||||
return [(m.start(), m.group(0).strip()) for m in _HEADING_RE.finditer(text)]
|
||||
|
||||
|
||||
def _nearest_heading(headings: list[tuple[int, str]], char_offset: int) -> str | None:
|
||||
best = None
|
||||
for offset, heading in headings:
|
||||
if offset <= char_offset:
|
||||
best = heading
|
||||
else:
|
||||
break
|
||||
return best
|
||||
|
||||
|
||||
def _build_line_offsets(text: str) -> list[int]:
|
||||
return [0] + [m.end() for m in _NEWLINE_RE.finditer(text)]
|
||||
|
||||
|
||||
def _offset_to_line(offsets: list[int], char_offset: int) -> int:
|
||||
lo, hi = 0, len(offsets) - 1
|
||||
while lo < hi:
|
||||
mid = (lo + hi + 1) // 2
|
||||
if offsets[mid] <= char_offset:
|
||||
lo = mid
|
||||
else:
|
||||
hi = mid - 1
|
||||
return lo + 1
|
||||
|
||||
|
||||
def _file_hash(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with open(path, "rb") as f:
|
||||
for block in iter(lambda: f.read(65536), b""):
|
||||
h.update(block)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def _make_id() -> str:
|
||||
return f"chnk_{uuid.uuid4().hex[:12]}"
|
||||
|
||||
|
||||
def _prune_stale(store: SQLiteFTS5Store, disk_paths: set[str]) -> int:
|
||||
indexed = store.all_indexed_paths()
|
||||
stale = indexed - disk_paths
|
||||
for path in stale:
|
||||
store.delete_file(path)
|
||||
return len(stale)
|
||||
135
workspace/files.py
Normal file
135
workspace/files.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""File discovery and filtering for workspace indexing.
|
||||
|
||||
Iterates workspace roots, applies ignore patterns (via pathspec),
|
||||
skips binary files and files over the size limit.
|
||||
|
||||
Ignore file precedence per root (first match wins):
|
||||
1. root/.hermesignore
|
||||
2. root/.gitignore
|
||||
3. Built-in default patterns
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from workspace.config import WorkspaceConfig
|
||||
from workspace.constants import (
|
||||
BINARY_SUFFIXES,
|
||||
DEFAULT_IGNORE_PATTERNS,
|
||||
GITIGNORE_NAME,
|
||||
HERMESIGNORE_NAME,
|
||||
PARSEABLE_SUFFIXES,
|
||||
)
|
||||
from workspace.types import WorkspaceRoot
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DiscoveryResult:
|
||||
files: list[tuple[str, Path]]
|
||||
complete: bool
|
||||
filtered_count: int = 0
|
||||
|
||||
|
||||
def discover_workspace_files(config: WorkspaceConfig) -> DiscoveryResult:
|
||||
"""Collect workspace files plus whether discovery completed across all roots."""
|
||||
max_bytes = config.knowledgebase.indexing.max_file_mb * 1024 * 1024
|
||||
|
||||
all_roots = [
|
||||
WorkspaceRoot(path=str(config.workspace_root), recursive=True),
|
||||
*config.knowledgebase.roots,
|
||||
]
|
||||
|
||||
files: list[tuple[str, Path]] = []
|
||||
complete = True
|
||||
filtered_count = 0
|
||||
|
||||
for root_spec in all_roots:
|
||||
root = Path(root_spec.path).expanduser().resolve()
|
||||
if not root.is_dir():
|
||||
log.warning("Workspace root does not exist: %s", root)
|
||||
complete = False
|
||||
continue
|
||||
|
||||
ignore_spec = _load_ignore_spec(root)
|
||||
iterator = root.rglob("*") if root_spec.recursive else root.iterdir()
|
||||
|
||||
try:
|
||||
paths = sorted(iterator)
|
||||
except OSError:
|
||||
log.warning("Failed to enumerate workspace root: %s", root, exc_info=True)
|
||||
complete = False
|
||||
continue
|
||||
|
||||
for p in paths:
|
||||
if not p.is_file():
|
||||
continue
|
||||
if p.name == HERMESIGNORE_NAME:
|
||||
# The ignore-rules file is never itself an indexable artifact —
|
||||
# hardcoded so a user editing their .hermesignore to remove the
|
||||
# self-reference can't accidentally re-enable indexing of it.
|
||||
continue
|
||||
if p.suffix.lower() in BINARY_SUFFIXES and p.suffix.lower() not in PARSEABLE_SUFFIXES:
|
||||
continue
|
||||
try:
|
||||
size = p.stat().st_size
|
||||
except (FileNotFoundError, OSError):
|
||||
log.debug("File vanished during discovery: %s", p)
|
||||
continue
|
||||
if size > max_bytes:
|
||||
log.debug("Skipping oversized file: %s", p)
|
||||
filtered_count += 1
|
||||
continue
|
||||
if size == 0:
|
||||
filtered_count += 1
|
||||
continue
|
||||
if ignore_spec is not None and _is_ignored(p, root, ignore_spec):
|
||||
continue
|
||||
files.append((str(root), p))
|
||||
|
||||
return DiscoveryResult(
|
||||
files=files, complete=complete, filtered_count=filtered_count
|
||||
)
|
||||
|
||||
|
||||
def seed_hermesignore(workspace_root: Path) -> None:
|
||||
"""Create .hermesignore in the workspace root if it doesn't exist."""
|
||||
ignore_file = workspace_root / HERMESIGNORE_NAME
|
||||
if not ignore_file.exists():
|
||||
ignore_file.write_text(DEFAULT_IGNORE_PATTERNS, encoding="utf-8")
|
||||
|
||||
|
||||
def _load_ignore_spec(root: Path):
|
||||
"""Load ignore patterns for a root: .hermesignore → .gitignore → defaults."""
|
||||
try:
|
||||
import pathspec
|
||||
except ImportError:
|
||||
log.warning("pathspec not installed — ignore patterns will not be applied")
|
||||
return None
|
||||
|
||||
hermesignore = root / HERMESIGNORE_NAME
|
||||
if hermesignore.is_file():
|
||||
try:
|
||||
text = hermesignore.read_text(encoding="utf-8", errors="replace")
|
||||
return pathspec.PathSpec.from_lines("gitwildmatch", text.splitlines())
|
||||
except Exception:
|
||||
log.warning("Failed to parse %s", hermesignore, exc_info=True)
|
||||
|
||||
gitignore = root / GITIGNORE_NAME
|
||||
if gitignore.is_file():
|
||||
try:
|
||||
text = gitignore.read_text(encoding="utf-8", errors="replace")
|
||||
return pathspec.PathSpec.from_lines("gitwildmatch", text.splitlines())
|
||||
except Exception:
|
||||
log.warning("Failed to parse %s", gitignore, exc_info=True)
|
||||
|
||||
return pathspec.PathSpec.from_lines(
|
||||
"gitwildmatch", DEFAULT_IGNORE_PATTERNS.splitlines()
|
||||
)
|
||||
|
||||
|
||||
def _is_ignored(path: Path, root: Path, spec) -> bool:
|
||||
rel = path.relative_to(root).as_posix()
|
||||
return spec.match_file(rel)
|
||||
110
workspace/parsers.py
Normal file
110
workspace/parsers.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""File parsers -- convert binary formats (.pdf, .docx, .pptx) to markdown.
|
||||
|
||||
FileParser ABC defines the contract. Built-in backends: markitdown, pandoc.
|
||||
CompositeParser routes extensions to the configured backend.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import ClassVar
|
||||
|
||||
from workspace.config import ParsingConfig
|
||||
from workspace.constants import PARSEABLE_SUFFIXES
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileParser(ABC):
|
||||
name: ClassVar[str]
|
||||
|
||||
@abstractmethod
|
||||
def supported_suffixes(self) -> frozenset[str]: ...
|
||||
|
||||
@abstractmethod
|
||||
def _convert(self, path: Path) -> str: ...
|
||||
|
||||
def parse(self, path: Path) -> str | None:
|
||||
try:
|
||||
result = self._convert(path)
|
||||
return result if result and result.strip() else None
|
||||
except Exception as exc:
|
||||
log.warning("Parser %s failed on %s: %s", self.name, path, exc)
|
||||
return None
|
||||
|
||||
|
||||
class MarkitdownParser(FileParser):
|
||||
name = "markitdown"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._md: object | None = None
|
||||
|
||||
def supported_suffixes(self) -> frozenset[str]:
|
||||
return frozenset({".pdf", ".docx", ".pptx"})
|
||||
|
||||
def _convert(self, path: Path) -> str:
|
||||
if self._md is None:
|
||||
from markitdown import MarkItDown
|
||||
|
||||
self._md = MarkItDown()
|
||||
result = self._md.convert(str(path)) # type: ignore[union-attr]
|
||||
return result.markdown
|
||||
|
||||
|
||||
class PandocParser(FileParser):
|
||||
name = "pandoc"
|
||||
|
||||
def supported_suffixes(self) -> frozenset[str]:
|
||||
return frozenset({".pdf", ".docx", ".pptx"})
|
||||
|
||||
def _convert(self, path: Path) -> str:
|
||||
result = subprocess.run(
|
||||
["pandoc", str(path), "-t", "markdown"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
)
|
||||
result.check_returncode()
|
||||
return result.stdout
|
||||
|
||||
|
||||
class CompositeParser:
|
||||
"""Routes file extensions to the appropriate parser backend."""
|
||||
|
||||
def __init__(self, routing: dict[str, FileParser]) -> None:
|
||||
self._routing = routing
|
||||
|
||||
def parse(self, path: Path) -> str | None:
|
||||
suffix = path.suffix.lower()
|
||||
parser = self._routing.get(suffix)
|
||||
if parser is None:
|
||||
return None
|
||||
return parser.parse(path)
|
||||
|
||||
def can_parse(self, suffix: str) -> bool:
|
||||
return suffix.lower() in self._routing
|
||||
|
||||
|
||||
_PARSER_CLASSES: list[type[FileParser]] = [MarkitdownParser, PandocParser]
|
||||
|
||||
|
||||
def build_parser(config: ParsingConfig) -> CompositeParser:
|
||||
available: dict[str, FileParser] = {}
|
||||
for cls in _PARSER_CLASSES:
|
||||
instance = cls()
|
||||
available[instance.name] = instance
|
||||
|
||||
routing: dict[str, FileParser] = {}
|
||||
for suffix in PARSEABLE_SUFFIXES:
|
||||
backend_name = config.overrides.get(suffix, config.default)
|
||||
parser = available.get(backend_name)
|
||||
if parser is None:
|
||||
log.warning("Unknown parser backend %r for %s — skipping", backend_name, suffix)
|
||||
continue
|
||||
if suffix in parser.supported_suffixes():
|
||||
routing[suffix] = parser
|
||||
|
||||
return CompositeParser(routing)
|
||||
399
workspace/store.py
Normal file
399
workspace/store.py
Normal file
@@ -0,0 +1,399 @@
|
||||
"""SQLite FTS5 workspace store.
|
||||
|
||||
Manages the workspace.sqlite database: schema creation, file/chunk CRUD,
|
||||
and BM25 full-text search.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sqlite3
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from workspace.constants import get_index_db_path, get_index_dir
|
||||
from workspace.types import ChunkRecord, FileRecord, SearchResult
|
||||
|
||||
_SCHEMA_VERSION = "1"
|
||||
|
||||
_SCHEMA_SQL = """\
|
||||
PRAGMA journal_mode = WAL;
|
||||
PRAGMA foreign_keys = ON;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
abs_path TEXT PRIMARY KEY,
|
||||
root_path TEXT NOT NULL,
|
||||
content_hash TEXT NOT NULL,
|
||||
config_signature TEXT NOT NULL,
|
||||
size_bytes INTEGER NOT NULL,
|
||||
modified_at TEXT NOT NULL,
|
||||
indexed_at TEXT NOT NULL,
|
||||
chunk_count INTEGER NOT NULL DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chunks (
|
||||
chunk_id TEXT PRIMARY KEY,
|
||||
abs_path TEXT NOT NULL REFERENCES files(abs_path) ON DELETE CASCADE,
|
||||
chunk_index INTEGER NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
context TEXT,
|
||||
token_count INTEGER NOT NULL,
|
||||
start_line INTEGER NOT NULL,
|
||||
end_line INTEGER NOT NULL,
|
||||
start_char INTEGER NOT NULL,
|
||||
end_char INTEGER NOT NULL,
|
||||
section TEXT,
|
||||
kind TEXT NOT NULL,
|
||||
chunk_metadata TEXT,
|
||||
UNIQUE(abs_path, chunk_index)
|
||||
);
|
||||
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
||||
chunk_id UNINDEXED,
|
||||
abs_path UNINDEXED,
|
||||
retrieval_text,
|
||||
section,
|
||||
tokenize = 'porter unicode61'
|
||||
);
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
|
||||
INSERT INTO chunks_fts(chunk_id, abs_path, retrieval_text, section)
|
||||
VALUES (
|
||||
new.chunk_id,
|
||||
new.abs_path,
|
||||
new.content || ' ' || COALESCE(new.context, ''),
|
||||
new.section
|
||||
);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
|
||||
DELETE FROM chunks_fts WHERE chunk_id = old.chunk_id;
|
||||
END;
|
||||
"""
|
||||
|
||||
|
||||
class SQLiteFTS5Store:
|
||||
"""Concrete FTS5-based workspace index store."""
|
||||
|
||||
def __init__(self, workspace_root: Path) -> None:
|
||||
self._db_path = get_index_db_path(workspace_root)
|
||||
self._conn: sqlite3.Connection | None = None
|
||||
|
||||
def open(self) -> None:
|
||||
index_dir = get_index_dir(self._db_path.parent.parent)
|
||||
index_dir.mkdir(parents=True, exist_ok=True)
|
||||
# timeout=5.0 sets PRAGMA busy_timeout so regular writes wait on locks
|
||||
# instead of raising "database is locked" when another process is
|
||||
# mid-write. This covers every statement except `PRAGMA journal_mode
|
||||
# = WAL`, which SQLite doesn't subject to busy_timeout — that
|
||||
# specific pragma is retried in _init_schema.
|
||||
self._conn = sqlite3.connect(str(self._db_path), timeout=5.0)
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
self._init_schema()
|
||||
|
||||
def close(self) -> None:
|
||||
if self._conn:
|
||||
self._conn.close()
|
||||
self._conn = None
|
||||
|
||||
def __enter__(self) -> "SQLiteFTS5Store":
|
||||
self.open()
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc: Any) -> None:
|
||||
self.close()
|
||||
|
||||
@property
|
||||
def conn(self) -> sqlite3.Connection:
|
||||
if self._conn is None:
|
||||
raise RuntimeError("Store not open — call open() or use as context manager")
|
||||
return self._conn
|
||||
|
||||
def _init_schema(self) -> None:
|
||||
cur = self.conn.cursor()
|
||||
try:
|
||||
existing = cur.execute(
|
||||
"SELECT value FROM meta WHERE key = 'schema_version'"
|
||||
).fetchone()
|
||||
except sqlite3.OperationalError:
|
||||
existing = None
|
||||
|
||||
if existing is not None and existing[0] != _SCHEMA_VERSION:
|
||||
cur.executescript(
|
||||
"DROP TABLE IF EXISTS chunks_fts;"
|
||||
"DROP TABLE IF EXISTS chunks;"
|
||||
"DROP TABLE IF EXISTS files;"
|
||||
"DROP TABLE IF EXISTS meta;"
|
||||
"DROP TRIGGER IF EXISTS chunks_ai;"
|
||||
"DROP TRIGGER IF EXISTS chunks_ad;"
|
||||
)
|
||||
|
||||
_execute_with_lock_retry(cur, _SCHEMA_SQL)
|
||||
|
||||
cur.execute(
|
||||
"INSERT OR REPLACE INTO meta (key, value) VALUES ('schema_version', ?)",
|
||||
(_SCHEMA_VERSION,),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_file_record(self, abs_path: str) -> FileRecord | None:
|
||||
row = self.conn.execute(
|
||||
"SELECT * FROM files WHERE abs_path = ?", (abs_path,)
|
||||
).fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
return FileRecord(
|
||||
abs_path=row["abs_path"],
|
||||
root_path=row["root_path"],
|
||||
content_hash=row["content_hash"],
|
||||
config_signature=row["config_signature"],
|
||||
size_bytes=row["size_bytes"],
|
||||
modified_at=row["modified_at"],
|
||||
indexed_at=row["indexed_at"],
|
||||
chunk_count=row["chunk_count"],
|
||||
)
|
||||
|
||||
def upsert_file(self, record: FileRecord) -> None:
|
||||
self.conn.execute(
|
||||
"""INSERT INTO files (abs_path, root_path, content_hash, config_signature,
|
||||
size_bytes, modified_at, indexed_at, chunk_count)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(abs_path) DO UPDATE SET
|
||||
root_path = excluded.root_path,
|
||||
content_hash = excluded.content_hash,
|
||||
config_signature = excluded.config_signature,
|
||||
size_bytes = excluded.size_bytes,
|
||||
modified_at = excluded.modified_at,
|
||||
indexed_at = excluded.indexed_at,
|
||||
chunk_count = excluded.chunk_count""",
|
||||
(
|
||||
record.abs_path,
|
||||
record.root_path,
|
||||
record.content_hash,
|
||||
record.config_signature,
|
||||
record.size_bytes,
|
||||
record.modified_at,
|
||||
record.indexed_at,
|
||||
record.chunk_count,
|
||||
),
|
||||
)
|
||||
|
||||
def delete_file(self, abs_path: str) -> None:
|
||||
self.conn.execute("DELETE FROM files WHERE abs_path = ?", (abs_path,))
|
||||
|
||||
def insert_chunks(self, chunks: list[ChunkRecord]) -> None:
|
||||
self.conn.executemany(
|
||||
"""INSERT INTO chunks (chunk_id, abs_path, chunk_index, content,
|
||||
context, token_count, start_line, end_line, start_char,
|
||||
end_char, section, kind, chunk_metadata)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
[
|
||||
(
|
||||
c.chunk_id,
|
||||
c.abs_path,
|
||||
c.chunk_index,
|
||||
c.content,
|
||||
c.context,
|
||||
c.token_count,
|
||||
c.start_line,
|
||||
c.end_line,
|
||||
c.start_char,
|
||||
c.end_char,
|
||||
c.section,
|
||||
c.kind,
|
||||
c.chunk_metadata,
|
||||
)
|
||||
for c in chunks
|
||||
],
|
||||
)
|
||||
|
||||
def delete_chunks_for_file(self, abs_path: str) -> None:
|
||||
self.conn.execute("DELETE FROM chunks WHERE abs_path = ?", (abs_path,))
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
limit: int = 20,
|
||||
path_prefix: str | None = None,
|
||||
file_glob: str | None = None,
|
||||
) -> list[SearchResult]:
|
||||
limit = max(1, limit)
|
||||
|
||||
if not query.strip():
|
||||
return []
|
||||
|
||||
fts_query = _build_fts_query(query)
|
||||
if not fts_query:
|
||||
return []
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
c.abs_path,
|
||||
c.start_line,
|
||||
c.end_line,
|
||||
c.section,
|
||||
c.chunk_index,
|
||||
rank AS score,
|
||||
c.token_count,
|
||||
f.modified_at,
|
||||
c.content
|
||||
FROM chunks_fts
|
||||
JOIN chunks c ON chunks_fts.chunk_id = c.chunk_id
|
||||
JOIN files f ON c.abs_path = f.abs_path
|
||||
WHERE chunks_fts MATCH ?
|
||||
"""
|
||||
params: list[Any] = [fts_query]
|
||||
|
||||
if path_prefix:
|
||||
sql += " AND substr(c.abs_path, 1, ?) = ?"
|
||||
params.extend([len(path_prefix), path_prefix])
|
||||
|
||||
if file_glob:
|
||||
sql += " AND c.abs_path GLOB ?"
|
||||
params.append(
|
||||
"*" + file_glob if not file_glob.startswith("*") else file_glob
|
||||
)
|
||||
|
||||
sql += " ORDER BY rank LIMIT ?"
|
||||
params.append(limit)
|
||||
|
||||
rows = self.conn.execute(sql, params).fetchall()
|
||||
return [
|
||||
SearchResult(
|
||||
path=row[0],
|
||||
line_start=row[1],
|
||||
line_end=row[2],
|
||||
section=row[3],
|
||||
chunk_index=row[4],
|
||||
score=row[5],
|
||||
tokens=row[6],
|
||||
modified=row[7],
|
||||
content=row[8],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
def list_files(self) -> list[FileRecord]:
|
||||
rows = self.conn.execute(
|
||||
"SELECT * FROM files ORDER BY abs_path"
|
||||
).fetchall()
|
||||
return [
|
||||
FileRecord(
|
||||
abs_path=row["abs_path"],
|
||||
root_path=row["root_path"],
|
||||
content_hash=row["content_hash"],
|
||||
config_signature=row["config_signature"],
|
||||
size_bytes=row["size_bytes"],
|
||||
modified_at=row["modified_at"],
|
||||
indexed_at=row["indexed_at"],
|
||||
chunk_count=row["chunk_count"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
def get_chunks_for_file(self, abs_path: str) -> list[SearchResult]:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT c.abs_path, c.start_line, c.end_line, c.section,
|
||||
c.chunk_index, c.token_count, f.modified_at, c.content
|
||||
FROM chunks c
|
||||
JOIN files f ON c.abs_path = f.abs_path
|
||||
WHERE c.abs_path = ?
|
||||
ORDER BY c.chunk_index""",
|
||||
(abs_path,),
|
||||
).fetchall()
|
||||
return [
|
||||
SearchResult(
|
||||
path=row[0],
|
||||
line_start=row[1],
|
||||
line_end=row[2],
|
||||
section=row[3],
|
||||
chunk_index=row[4],
|
||||
score=1.0,
|
||||
tokens=row[5],
|
||||
modified=row[6],
|
||||
content=row[7],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
def all_indexed_paths(self) -> set[str]:
|
||||
rows = self.conn.execute("SELECT abs_path FROM files").fetchall()
|
||||
return {row[0] for row in rows}
|
||||
|
||||
def status(self) -> dict[str, Any]:
|
||||
file_count = self.conn.execute("SELECT COUNT(*) FROM files").fetchone()[0]
|
||||
chunk_count = self.conn.execute("SELECT COUNT(*) FROM chunks").fetchone()[0]
|
||||
db_size = self._db_path.stat().st_size if self._db_path.exists() else 0
|
||||
return {
|
||||
"file_count": file_count,
|
||||
"chunk_count": chunk_count,
|
||||
"db_size_bytes": db_size,
|
||||
"db_path": str(self._db_path),
|
||||
}
|
||||
|
||||
def commit(self) -> None:
|
||||
self.conn.commit()
|
||||
|
||||
|
||||
def _execute_with_lock_retry(
|
||||
cur: sqlite3.Cursor,
|
||||
sql: str,
|
||||
*,
|
||||
attempts: int = 5,
|
||||
) -> None:
|
||||
"""Run a schema-bootstrap executescript() with retry on transient locks.
|
||||
|
||||
`PRAGMA journal_mode = WAL` (inside our schema bootstrap) requires an
|
||||
exclusive file lock and is NOT subject to busy_timeout — so two processes
|
||||
racing to initialize an empty DB can see one of them fail immediately with
|
||||
"database is locked". Everything else in the schema honors busy_timeout,
|
||||
but we retry the whole script uniformly to keep the code simple. The first
|
||||
winner's bootstrap runs in microseconds; the loser retries a few times
|
||||
until WAL mode is already set and its pragma becomes a no-op.
|
||||
"""
|
||||
for attempt in range(1, attempts + 1):
|
||||
try:
|
||||
cur.executescript(sql)
|
||||
return
|
||||
except sqlite3.OperationalError as exc:
|
||||
if "database is locked" not in str(exc) or attempt == attempts:
|
||||
raise
|
||||
time.sleep(0.1 * attempt)
|
||||
|
||||
|
||||
_FTS5_COMPOUND_SEPARATORS = re.compile(r"[-_]")
|
||||
_FTS5_TOKEN_RE = re.compile(r"[^\W_]+", re.UNICODE)
|
||||
|
||||
|
||||
def _build_fts_query(raw_query: str) -> str:
|
||||
"""Build a safe FTS5 query from raw user input.
|
||||
|
||||
All tokens are double-quoted to prevent FTS5 operator injection.
|
||||
Compound terms (hyphenated/underscored) get phrase + AND boost.
|
||||
"""
|
||||
tokens = _FTS5_TOKEN_RE.findall(raw_query)
|
||||
tokens = [t.lower() for t in tokens if len(t) >= 2]
|
||||
if not tokens:
|
||||
return ""
|
||||
|
||||
words = raw_query.split()
|
||||
parts: list[str] = []
|
||||
for word in words:
|
||||
sub_tokens = _FTS5_TOKEN_RE.findall(word)
|
||||
sub_tokens = [t.lower() for t in sub_tokens if len(t) >= 2]
|
||||
if not sub_tokens:
|
||||
continue
|
||||
if len(sub_tokens) > 1 and _FTS5_COMPOUND_SEPARATORS.search(word):
|
||||
phrase = " ".join(sub_tokens)
|
||||
and_clause = " AND ".join(f'"{t}"' for t in sub_tokens)
|
||||
parts.append(f'("{phrase}" OR ({and_clause}))')
|
||||
else:
|
||||
for t in sub_tokens:
|
||||
parts.append(f'"{t}"')
|
||||
|
||||
return " OR ".join(parts)
|
||||
111
workspace/types.py
Normal file
111
workspace/types.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""Workspace data types.
|
||||
|
||||
Salvaged from PR #5840's agent/workspace_types.py, trimmed for FTS5-only:
|
||||
no dense scores, no reranking, no plugin context.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
|
||||
class WorkspaceRoot(BaseModel):
|
||||
model_config = ConfigDict(frozen=True)
|
||||
path: str
|
||||
recursive: bool = False
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FileRecord:
|
||||
abs_path: str
|
||||
root_path: str
|
||||
content_hash: str
|
||||
config_signature: str
|
||||
size_bytes: int
|
||||
modified_at: str
|
||||
indexed_at: str
|
||||
chunk_count: int = 0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ChunkRecord:
|
||||
chunk_id: str
|
||||
abs_path: str
|
||||
chunk_index: int
|
||||
content: str
|
||||
token_count: int
|
||||
start_line: int
|
||||
end_line: int
|
||||
start_char: int
|
||||
end_char: int
|
||||
section: str | None = None
|
||||
kind: str = "text"
|
||||
context: str | None = None
|
||||
chunk_metadata: str | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SearchResult:
|
||||
path: str
|
||||
line_start: int
|
||||
line_end: int
|
||||
section: str | None
|
||||
chunk_index: int
|
||||
score: float
|
||||
tokens: int
|
||||
modified: str
|
||||
content: str
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"path": self.path,
|
||||
"line_start": self.line_start,
|
||||
"line_end": self.line_end,
|
||||
"section": self.section,
|
||||
"chunk_index": self.chunk_index,
|
||||
"score": self.score,
|
||||
"tokens": self.tokens,
|
||||
"modified": self.modified,
|
||||
"content": self.content,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IndexingError:
|
||||
path: str
|
||||
stage: str
|
||||
error_type: str
|
||||
message: str
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"path": self.path,
|
||||
"stage": self.stage,
|
||||
"error_type": self.error_type,
|
||||
"message": self.message,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IndexSummary:
|
||||
files_indexed: int
|
||||
files_skipped: int
|
||||
files_pruned: int
|
||||
files_errored: int
|
||||
chunks_created: int
|
||||
duration_seconds: float
|
||||
errors: list[IndexingError]
|
||||
errors_truncated: bool
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"files_indexed": self.files_indexed,
|
||||
"files_skipped": self.files_skipped,
|
||||
"files_pruned": self.files_pruned,
|
||||
"files_errored": self.files_errored,
|
||||
"chunks_created": self.chunks_created,
|
||||
"duration_seconds": round(self.duration_seconds, 2),
|
||||
"errors": [e.to_dict() for e in self.errors],
|
||||
"errors_truncated": self.errors_truncated,
|
||||
}
|
||||
Reference in New Issue
Block a user