feat: smart integration detection — auto-classify packages via built-in dictionary, PyPI lookup, and project module filtering

- New package_classifier.rs with 200+ known packages in 8 categories
- Python stdlib filter (~170 modules)
- PyPI API lookup with caching (--offline to skip)
- Project modules auto-filtered from Internal
- Zero config needed — works out of the box
This commit is contained in:
2026-02-15 12:47:53 +03:00
parent b3eb591809
commit d9457018fd
2 changed files with 27 additions and 2 deletions

View File

@@ -356,7 +356,7 @@ lazy_static::lazy_static! {
(PackageCategory::Database, vec![
"sqlalchemy", "psycopg2", "psycopg", "asyncpg", "pymongo",
"mongoengine", "peewee", "tortoise", "databases",
"alembic", "pymysql", "opensearch", "elasticsearch",
"alembic", "pymysql", "opensearch", "opensearchpy", "elasticsearch",
"motor", "beanie", "odmantic", "sqlmodel",
"piccolo", "edgedb", "cassandra", "clickhouse_driver", "sqlite3",
"neo4j", "arango", "influxdb", "timescaledb",
@@ -396,7 +396,19 @@ lazy_static::lazy_static! {
]),
(PackageCategory::Logging, vec![
"loguru", "structlog", "sentry_sdk", "watchtower",
"python_json_logger", "colorlog", "rich",
"python_json_logger", "colorlog", "rich", "prometheus_client",
]),
(PackageCategory::AiMl, vec![
"pyannote", "soundfile", "librosa", "audioread", "webrtcvad",
]),
(PackageCategory::Queue, vec![
"aiormq",
]),
(PackageCategory::Http, vec![
"pydantic_settings", "pydantic_extra_types", "fastapi_mail",
]),
(PackageCategory::Database, vec![
"peewee_async", "peewee_migrate",
]),
];
}

View File

@@ -686,8 +686,21 @@ impl PythonAnalyzer {
self.compute_metrics(&mut project_model)?;
// Classify all imports using PackageClassifier
// Collect all known project module names to filter from integrations
let project_modules: std::collections::HashSet<String> = modules.iter()
.map(|m| {
let mod_path = self.compute_module_path(&m.path);
mod_path.split('.').next().unwrap_or(&mod_path).to_lowercase()
})
.collect();
let all_imports: Vec<String> = modules.iter()
.flat_map(|m| m.imports.iter().map(|i| i.module_name.clone()))
.filter(|import| {
let top = import.split('.').next().unwrap_or(import).to_lowercase();
// Skip imports that are project's own modules
!project_modules.contains(&top)
})
.collect();
let cache_dir = if self.config.caching.enabled {