From 136697caf0e5fd2fc7109f4d28eb2015156450f6 Mon Sep 17 00:00:00 2001 From: Arkasha Date: Sun, 15 Feb 2026 11:36:49 +0300 Subject: [PATCH] fix: prefer file docstring over __init__.py for module summary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For non-init files, use the file's own docstring first before falling back to the parent __init__.py docstring. Also skip dataclass-like classes (≤2 methods) from critical marking to avoid false positives on simple data containers like ToolResult. --- archdoc-core/src/python_analyzer.rs | 35 +++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/archdoc-core/src/python_analyzer.rs b/archdoc-core/src/python_analyzer.rs index b474fac..bb053cc 100644 --- a/archdoc-core/src/python_analyzer.rs +++ b/archdoc-core/src/python_analyzer.rs @@ -658,9 +658,9 @@ impl PythonAnalyzer { let doc_summary = if is_init { parsed_module.file_docstring.clone() } else { - // For non-init files, check if there's an __init__.py docstring for this module's parent - init_docstrings.get(&module_id).cloned() - .or_else(|| parsed_module.file_docstring.clone()) + // For non-init files, use file docstring first, then check __init__.py + parsed_module.file_docstring.clone() + .or_else(|| init_docstrings.get(&module_id).cloned()) }; let module = Module { @@ -799,6 +799,25 @@ impl PythonAnalyzer { Ok(()) } + /// Check if a class symbol is a simple data container (dataclass-like). + /// A class is considered a dataclass if it has ≤2 methods (typically __init__ and __repr__/__str__). + fn is_dataclass_like(symbol_id: &str, project_model: &ProjectModel) -> bool { + let symbol = match project_model.symbols.get(symbol_id) { + Some(s) => s, + None => return false, + }; + if symbol.kind != crate::model::SymbolKind::Class { + return false; + } + // Count methods belonging to this class + let class_name = &symbol.qualname; + let method_prefix = format!("{}::{}.", symbol.module_id, class_name); + let method_count = project_model.symbols.values() + .filter(|s| s.kind == crate::model::SymbolKind::Method && s.id.starts_with(&method_prefix)) + .count(); + method_count <= 2 + } + fn compute_metrics(&self, project_model: &mut ProjectModel) -> Result<(), ArchDocError> { // Collect fan-in/fan-out first to avoid borrow issues let mut metrics: std::collections::HashMap = std::collections::HashMap::new(); @@ -815,12 +834,20 @@ impl PythonAnalyzer { metrics.insert(symbol_id.clone(), (fan_in, fan_out)); } + // Pre-compute which symbols are dataclass-like (need immutable borrow) + let dataclass_ids: std::collections::HashSet = metrics.keys() + .filter(|id| Self::is_dataclass_like(id, project_model)) + .cloned() + .collect(); + for (symbol_id, (fan_in, fan_out)) in &metrics { if let Some(symbol) = project_model.symbols.get_mut(symbol_id) { symbol.metrics.fan_in = *fan_in; symbol.metrics.fan_out = *fan_out; - symbol.metrics.is_critical = *fan_in > self.config.thresholds.critical_fan_in + // Don't mark dataclass-like classes as critical — they're just data containers + let exceeds_threshold = *fan_in > self.config.thresholds.critical_fan_in || *fan_out > self.config.thresholds.critical_fan_out; + symbol.metrics.is_critical = exceeds_threshold && !dataclass_ids.contains(symbol_id); } }