feat: major improvements — layout, cycles, integrations, usage examples, tests #1

Merged
dparmeev merged 15 commits from feature/improvements-v2 into main 2026-02-15 11:21:47 +03:00
2 changed files with 380 additions and 93 deletions
Showing only changes of commit da527e6960 - Show all commits

View File

@@ -360,7 +360,7 @@ fn generate_docs(model: &ProjectModel, out: &str, verbose: bool) -> Result<()> {
}
// Create individual documentation files for files and symbols
for (file_id, file_doc) in &model.files {
for (_file_id, file_doc) in &model.files {
let file_doc_path = files_path.join(format!("{}.md", sanitize_filename(&file_doc.path)));
// Create file documentation with symbol sections

View File

@@ -10,7 +10,7 @@ use crate::cache::CacheManager;
use std::path::Path;
use std::fs;
use rustpython_parser::{ast, Parse};
use rustpython_ast::{Stmt, StmtClassDef, StmtFunctionDef, Expr, Ranged};
use rustpython_ast::{Stmt, Expr, Ranged};
pub struct PythonAnalyzer {
_config: Config,
@@ -29,25 +29,22 @@ impl PythonAnalyzer {
return Ok(cached_module);
}
// Read the Python file
let code = fs::read_to_string(file_path)
.map_err(ArchDocError::Io)?;
// Parse the Python code into an AST
let ast = ast::Suite::parse(&code, file_path.to_str().unwrap_or("<unknown>"))
.map_err(|e| ArchDocError::ParseError {
file: file_path.to_string_lossy().to_string(),
line: 0, // We don't have line info from the error
line: 0,
message: format!("Failed to parse: {}", e),
})?;
// Extract imports, definitions, and calls
let mut imports = Vec::new();
let mut symbols = Vec::new();
let mut calls = Vec::new();
for stmt in ast {
self.extract_from_statement(&stmt, None, &mut imports, &mut symbols, &mut calls, 0);
for stmt in &ast {
self.extract_from_statement(stmt, None, &mut imports, &mut symbols, &mut calls, 0);
}
let parsed_module = ParsedModule {
@@ -58,13 +55,20 @@ impl PythonAnalyzer {
calls,
};
// Store in cache
self.cache_manager.store_module(file_path, parsed_module.clone())?;
Ok(parsed_module)
}
fn extract_from_statement(&self, stmt: &Stmt, current_symbol: Option<&str>, imports: &mut Vec<Import>, symbols: &mut Vec<Symbol>, calls: &mut Vec<Call>, depth: usize) {
fn extract_from_statement(
&self,
stmt: &Stmt,
parent_class: Option<&str>,
imports: &mut Vec<Import>,
symbols: &mut Vec<Symbol>,
calls: &mut Vec<Call>,
depth: usize,
) {
match stmt {
Stmt::Import(import_stmt) => {
for alias in &import_stmt.names {
@@ -93,18 +97,25 @@ impl PythonAnalyzer {
}
}
Stmt::FunctionDef(func_def) => {
// Extract function definition
// Create a symbol for this function
let (kind, qualname) = if let Some(class_name) = parent_class {
(crate::model::SymbolKind::Method, format!("{}.{}", class_name, func_def.name))
} else {
(crate::model::SymbolKind::Function, func_def.name.to_string())
};
let signature = self.build_function_signature(&func_def.name, &func_def.args);
let integrations_flags = self.detect_integrations(&func_def.body, &self._config);
let docstring = self.extract_docstring(&func_def.body);
let symbol = Symbol {
id: func_def.name.to_string(),
kind: crate::model::SymbolKind::Function,
module_id: "".to_string(), // Will be filled later
file_id: "".to_string(), // Will be filled later
qualname: func_def.name.to_string(),
signature: format!("def {}(...)", func_def.name),
id: qualname.clone(),
kind,
module_id: String::new(),
file_id: String::new(),
qualname: qualname.clone(),
signature,
annotations: None,
docstring_first_line: self.extract_docstring(&func_def.body), // Extract docstring
docstring_first_line: docstring,
purpose: "extracted from AST".to_string(),
outbound_calls: Vec::new(),
inbound_calls: Vec::new(),
@@ -118,24 +129,63 @@ impl PythonAnalyzer {
};
symbols.push(symbol);
// Recursively process function body for calls
for body_stmt in &func_def.body {
self.extract_from_statement(body_stmt, Some(&func_def.name), imports, symbols, calls, depth + 1);
self.extract_from_statement(body_stmt, parent_class, imports, symbols, calls, depth + 1);
}
// Extract calls from body expressions recursively
self.extract_calls_from_body(&func_def.body, Some(&qualname), calls);
}
Stmt::AsyncFunctionDef(func_def) => {
let (kind, qualname) = if let Some(class_name) = parent_class {
(crate::model::SymbolKind::Method, format!("{}.{}", class_name, func_def.name))
} else {
(crate::model::SymbolKind::AsyncFunction, func_def.name.to_string())
};
let signature = format!("async {}", self.build_function_signature(&func_def.name, &func_def.args));
let integrations_flags = self.detect_integrations(&func_def.body, &self._config);
let docstring = self.extract_docstring(&func_def.body);
let symbol = Symbol {
id: qualname.clone(),
kind,
module_id: String::new(),
file_id: String::new(),
qualname: qualname.clone(),
signature,
annotations: None,
docstring_first_line: docstring,
purpose: "extracted from AST".to_string(),
outbound_calls: Vec::new(),
inbound_calls: Vec::new(),
integrations_flags,
metrics: crate::model::SymbolMetrics {
fan_in: 0,
fan_out: 0,
is_critical: false,
cycle_participant: false,
},
};
symbols.push(symbol);
for body_stmt in &func_def.body {
self.extract_from_statement(body_stmt, parent_class, imports, symbols, calls, depth + 1);
}
self.extract_calls_from_body(&func_def.body, Some(&qualname), calls);
}
Stmt::ClassDef(class_def) => {
// Extract class definition
// Create a symbol for this class
let integrations_flags = self.detect_integrations(&class_def.body, &self._config);
let docstring = self.extract_docstring(&class_def.body);
let symbol = Symbol {
id: class_def.name.to_string(),
kind: crate::model::SymbolKind::Class,
module_id: "".to_string(), // Will be filled later
file_id: "".to_string(), // Will be filled later
module_id: String::new(),
file_id: String::new(),
qualname: class_def.name.to_string(),
signature: format!("class {}", class_def.name),
annotations: None,
docstring_first_line: self.extract_docstring(&class_def.body), // Extract docstring
docstring_first_line: docstring,
purpose: "extracted from AST".to_string(),
outbound_calls: Vec::new(),
inbound_calls: Vec::new(),
@@ -149,29 +199,163 @@ impl PythonAnalyzer {
};
symbols.push(symbol);
// Recursively process class body for methods
// Process class body with class name as parent
for body_stmt in &class_def.body {
self.extract_from_statement(body_stmt, Some(&class_def.name), imports, symbols, calls, depth + 1);
}
}
Stmt::Expr(expr_stmt) => {
self.extract_from_expression(&expr_stmt.value, current_symbol, calls);
let caller = parent_class.map(|c| c.to_string()).unwrap_or_else(|| "unknown".to_string());
self.extract_from_expression(&expr_stmt.value, Some(&caller), calls);
}
_ => {
// For other statement types, we might still need to check for calls in expressions
// This is a simplified approach - a full implementation would need to traverse all expressions
// Recurse into compound statements to find calls
Stmt::If(if_stmt) => {
let caller = parent_class.map(|c| c.to_string());
self.extract_from_expression(&if_stmt.test, caller.as_deref(), calls);
self.extract_calls_from_body(&if_stmt.body, caller.as_deref(), calls);
self.extract_calls_from_body(&if_stmt.orelse, caller.as_deref(), calls);
}
Stmt::For(for_stmt) => {
let caller = parent_class.map(|c| c.to_string());
self.extract_from_expression(&for_stmt.iter, caller.as_deref(), calls);
self.extract_calls_from_body(&for_stmt.body, caller.as_deref(), calls);
self.extract_calls_from_body(&for_stmt.orelse, caller.as_deref(), calls);
}
Stmt::While(while_stmt) => {
let caller = parent_class.map(|c| c.to_string());
self.extract_from_expression(&while_stmt.test, caller.as_deref(), calls);
self.extract_calls_from_body(&while_stmt.body, caller.as_deref(), calls);
self.extract_calls_from_body(&while_stmt.orelse, caller.as_deref(), calls);
}
Stmt::With(with_stmt) => {
let caller = parent_class.map(|c| c.to_string());
for item in &with_stmt.items {
self.extract_from_expression(&item.context_expr, caller.as_deref(), calls);
}
self.extract_calls_from_body(&with_stmt.body, caller.as_deref(), calls);
}
Stmt::Return(return_stmt) => {
if let Some(value) = &return_stmt.value {
let caller = parent_class.map(|c| c.to_string());
self.extract_from_expression(value, caller.as_deref(), calls);
}
}
Stmt::Assign(assign_stmt) => {
let caller = parent_class.map(|c| c.to_string());
self.extract_from_expression(&assign_stmt.value, caller.as_deref(), calls);
}
Stmt::Try(try_stmt) => {
let caller = parent_class.map(|c| c.to_string());
self.extract_calls_from_body(&try_stmt.body, caller.as_deref(), calls);
for handler in &try_stmt.handlers {
let rustpython_ast::ExceptHandler::ExceptHandler(h) = handler; {
self.extract_calls_from_body(&h.body, caller.as_deref(), calls);
}
}
self.extract_calls_from_body(&try_stmt.orelse, caller.as_deref(), calls);
self.extract_calls_from_body(&try_stmt.finalbody, caller.as_deref(), calls);
}
_ => {}
}
}
/// Extract calls from a body (list of statements)
fn extract_calls_from_body(&self, body: &[Stmt], caller: Option<&str>, calls: &mut Vec<Call>) {
for stmt in body {
match stmt {
Stmt::Expr(expr_stmt) => {
self.extract_from_expression(&expr_stmt.value, caller, calls);
}
Stmt::Return(return_stmt) => {
if let Some(value) = &return_stmt.value {
self.extract_from_expression(value, caller, calls);
}
}
Stmt::Assign(assign_stmt) => {
self.extract_from_expression(&assign_stmt.value, caller, calls);
}
Stmt::If(if_stmt) => {
self.extract_from_expression(&if_stmt.test, caller, calls);
self.extract_calls_from_body(&if_stmt.body, caller, calls);
self.extract_calls_from_body(&if_stmt.orelse, caller, calls);
}
Stmt::For(for_stmt) => {
self.extract_from_expression(&for_stmt.iter, caller, calls);
self.extract_calls_from_body(&for_stmt.body, caller, calls);
self.extract_calls_from_body(&for_stmt.orelse, caller, calls);
}
Stmt::While(while_stmt) => {
self.extract_from_expression(&while_stmt.test, caller, calls);
self.extract_calls_from_body(&while_stmt.body, caller, calls);
self.extract_calls_from_body(&while_stmt.orelse, caller, calls);
}
Stmt::With(with_stmt) => {
for item in &with_stmt.items {
self.extract_from_expression(&item.context_expr, caller, calls);
}
self.extract_calls_from_body(&with_stmt.body, caller, calls);
}
Stmt::Try(try_stmt) => {
self.extract_calls_from_body(&try_stmt.body, caller, calls);
for handler in &try_stmt.handlers {
let rustpython_ast::ExceptHandler::ExceptHandler(h) = handler; {
self.extract_calls_from_body(&h.body, caller, calls);
}
}
self.extract_calls_from_body(&try_stmt.orelse, caller, calls);
self.extract_calls_from_body(&try_stmt.finalbody, caller, calls);
}
_ => {}
}
}
}
fn build_function_signature(&self, name: &str, args: &rustpython_ast::Arguments) -> String {
let mut params = Vec::new();
for arg in &args.args {
let param_name = arg.def.arg.to_string();
let annotation = arg.def.annotation.as_ref()
.map(|a| format!(": {}", self.expr_to_string(a)))
.unwrap_or_default();
if let Some(default) = &arg.default {
params.push(format!("{}{} = {}", param_name, annotation, self.expr_to_string(default)));
} else {
params.push(format!("{}{}", param_name, annotation));
}
}
// Add *args
if let Some(vararg) = &args.vararg {
let annotation = vararg.annotation.as_ref()
.map(|a| format!(": {}", self.expr_to_string(a)))
.unwrap_or_default();
params.push(format!("*{}{}", vararg.arg, annotation));
}
// Add **kwargs
if let Some(kwarg) = &args.kwarg {
let annotation = kwarg.annotation.as_ref()
.map(|a| format!(": {}", self.expr_to_string(a)))
.unwrap_or_default();
params.push(format!("**{}{}", kwarg.arg, annotation));
}
format!("def {}({})", name, params.join(", "))
}
fn extract_docstring(&self, body: &[Stmt]) -> Option<String> {
// Extract the first statement if it's a string expression (docstring)
if let Some(first_stmt) = body.first() {
if let Stmt::Expr(expr_stmt) = first_stmt {
if let Expr::Constant(constant_expr) = &*expr_stmt.value {
if let Some(docstring) = constant_expr.value.as_str() {
// Return the first line of the docstring
return docstring.lines().next().map(|s| s.to_string());
// Return full docstring, trimmed
let trimmed = docstring.trim();
if trimmed.is_empty() {
return None;
}
return Some(trimmed.to_string());
}
}
}
@@ -190,10 +374,8 @@ impl PythonAnalyzer {
return flags;
}
// Convert body to string for pattern matching
let body_str = format!("{:?}", body);
// Check for HTTP integrations
for pattern in &config.analysis.integration_patterns {
if pattern.type_ == "http" {
for lib in &pattern.patterns {
@@ -222,31 +404,20 @@ impl PythonAnalyzer {
flags
}
#[allow(dead_code)]
fn extract_function_def(&self, _func_def: &StmtFunctionDef, _symbols: &mut Vec<Symbol>, _calls: &mut Vec<Call>, _depth: usize) {
// Extract function information
// This is a simplified implementation - a full implementation would extract more details
}
#[allow(dead_code)]
fn extract_class_def(&self, _class_def: &StmtClassDef, _symbols: &mut Vec<Symbol>, _depth: usize) {
// Extract class information
// This is a simplified implementation - a full implementation would extract more details
}
fn extract_from_expression(&self, expr: &Expr, current_symbol: Option<&str>, calls: &mut Vec<Call>) {
match expr {
Expr::Call(call_expr) => {
// Extract call information
let callee_expr = self.expr_to_string(&call_expr.func);
calls.push(Call {
caller_symbol: current_symbol.unwrap_or("unknown").to_string(), // Use current symbol as caller
caller_symbol: current_symbol.unwrap_or("unknown").to_string(),
callee_expr,
line_number: call_expr.range().start().into(),
call_type: CallType::Unresolved,
});
// Recursively process arguments
// Recursively process the function expression itself
self.extract_from_expression(&call_expr.func, current_symbol, calls);
for arg in &call_expr.args {
self.extract_from_expression(arg, current_symbol, calls);
}
@@ -255,13 +426,79 @@ impl PythonAnalyzer {
}
}
Expr::Attribute(attr_expr) => {
// Recursively process value
self.extract_from_expression(&attr_expr.value, current_symbol, calls);
}
_ => {
// For other expression types, recursively process child expressions
// This is a simplified approach - a full implementation would handle all expression variants
Expr::BoolOp(bool_op) => {
for value in &bool_op.values {
self.extract_from_expression(value, current_symbol, calls);
}
}
Expr::BinOp(bin_op) => {
self.extract_from_expression(&bin_op.left, current_symbol, calls);
self.extract_from_expression(&bin_op.right, current_symbol, calls);
}
Expr::UnaryOp(unary_op) => {
self.extract_from_expression(&unary_op.operand, current_symbol, calls);
}
Expr::IfExp(if_exp) => {
self.extract_from_expression(&if_exp.test, current_symbol, calls);
self.extract_from_expression(&if_exp.body, current_symbol, calls);
self.extract_from_expression(&if_exp.orelse, current_symbol, calls);
}
Expr::Dict(dict_expr) => {
for key in &dict_expr.keys {
if let Some(k) = key {
self.extract_from_expression(k, current_symbol, calls);
}
}
for value in &dict_expr.values {
self.extract_from_expression(value, current_symbol, calls);
}
}
Expr::List(list_expr) => {
for elt in &list_expr.elts {
self.extract_from_expression(elt, current_symbol, calls);
}
}
Expr::Tuple(tuple_expr) => {
for elt in &tuple_expr.elts {
self.extract_from_expression(elt, current_symbol, calls);
}
}
Expr::ListComp(comp) => {
self.extract_from_expression(&comp.elt, current_symbol, calls);
for generator in &comp.generators {
self.extract_from_expression(&generator.iter, current_symbol, calls);
for if_clause in &generator.ifs {
self.extract_from_expression(if_clause, current_symbol, calls);
}
}
}
Expr::Compare(compare) => {
self.extract_from_expression(&compare.left, current_symbol, calls);
for comp in &compare.comparators {
self.extract_from_expression(comp, current_symbol, calls);
}
}
Expr::JoinedStr(joined) => {
for value in &joined.values {
self.extract_from_expression(value, current_symbol, calls);
}
}
Expr::FormattedValue(fv) => {
self.extract_from_expression(&fv.value, current_symbol, calls);
}
Expr::Subscript(sub) => {
self.extract_from_expression(&sub.value, current_symbol, calls);
self.extract_from_expression(&sub.slice, current_symbol, calls);
}
Expr::Starred(starred) => {
self.extract_from_expression(&starred.value, current_symbol, calls);
}
Expr::Await(await_expr) => {
self.extract_from_expression(&await_expr.value, current_symbol, calls);
}
_ => {}
}
}
@@ -271,74 +508,127 @@ impl PythonAnalyzer {
Expr::Attribute(attr_expr) => {
format!("{}.{}", self.expr_to_string(&attr_expr.value), attr_expr.attr)
}
Expr::Constant(c) => {
if let Some(s) = c.value.as_str() {
format!("\"{}\"", s)
} else {
format!("{:?}", c.value)
}
}
Expr::Subscript(sub) => {
format!("{}[{}]", self.expr_to_string(&sub.value), self.expr_to_string(&sub.slice))
}
_ => "<complex_expression>".to_string(),
}
}
pub fn resolve_symbols(&self, modules: &[ParsedModule]) -> Result<ProjectModel, ArchDocError> {
// Build symbol index
// Resolve cross-module references
// Build call graph
// This is a simplified implementation that creates a basic project model
// A full implementation would do much more sophisticated symbol resolution
let mut project_model = ProjectModel::new();
// Add modules to project model
// Build import alias map for call resolution
// alias_name -> original_module_name
let mut import_aliases: std::collections::HashMap<String, String> = std::collections::HashMap::new();
for parsed_module in modules {
for import in &parsed_module.imports {
if let Some(alias) = &import.alias {
import_aliases.insert(alias.clone(), import.module_name.clone());
}
}
}
for parsed_module in modules {
let module_id = parsed_module.module_path.clone();
let file_id = parsed_module.path.to_string_lossy().to_string();
// Create file doc
let file_doc = FileDoc {
id: file_id.clone(),
path: parsed_module.path.to_string_lossy().to_string(),
module_id: module_id.clone(),
imports: parsed_module.imports.iter().map(|i| i.module_name.clone()).collect(),
outbound_modules: Vec::new(), // TODO: Resolve outbound modules
outbound_modules: Vec::new(),
inbound_files: Vec::new(),
symbols: parsed_module.symbols.iter().map(|s| s.id.clone()).collect(),
};
project_model.files.insert(file_id.clone(), file_doc);
// Add symbols to project model
for mut symbol in parsed_module.symbols.clone() {
symbol.module_id = module_id.clone();
symbol.file_id = file_id.clone();
project_model.symbols.insert(symbol.id.clone(), symbol);
}
// Create module
let module = Module {
id: module_id.clone(),
path: parsed_module.path.to_string_lossy().to_string(),
files: vec![file_id.clone()],
doc_summary: None,
outbound_modules: Vec::new(), // TODO: Resolve outbound modules
outbound_modules: Vec::new(),
inbound_modules: Vec::new(),
symbols: parsed_module.symbols.iter().map(|s| s.id.clone()).collect(),
};
project_model.modules.insert(module_id, module);
}
// Build dependency graphs and compute metrics
self.build_dependency_graphs(&mut project_model, modules)?;
self.resolve_call_types(&mut project_model, modules, &import_aliases);
self.compute_metrics(&mut project_model)?;
Ok(project_model)
}
/// Resolve call types using import information
fn resolve_call_types(
&self,
project_model: &mut ProjectModel,
parsed_modules: &[ParsedModule],
import_aliases: &std::collections::HashMap<String, String>,
) {
// Collect all known symbol names
let known_symbols: std::collections::HashSet<String> = project_model.symbols.keys().cloned().collect();
for parsed_module in parsed_modules {
let import_map: std::collections::HashMap<String, String> = parsed_module.imports.iter()
.filter_map(|i| {
i.alias.as_ref().map(|alias| (alias.clone(), i.module_name.clone()))
})
.collect();
// Also map plain imported names
let mut name_map: std::collections::HashMap<String, String> = import_map;
for import in &parsed_module.imports {
// For "from foo.bar import baz", map "baz" -> "foo.bar.baz"
let parts: Vec<&str> = import.module_name.split('.').collect();
if let Some(last) = parts.last() {
name_map.insert(last.to_string(), import.module_name.clone());
}
}
// Update edge call types
for edge in &mut project_model.edges.symbol_call_edges {
let callee = &edge.to_id;
// Check if callee is a known local symbol
if known_symbols.contains(callee) {
edge.edge_type = crate::model::EdgeType::SymbolCall;
} else {
// Check if it matches an import alias
let root_name = callee.split('.').next().unwrap_or(callee);
if name_map.contains_key(root_name) || import_aliases.contains_key(root_name) {
edge.edge_type = crate::model::EdgeType::ExternalCall;
} else {
edge.edge_type = crate::model::EdgeType::UnresolvedCall;
}
}
}
}
}
fn build_dependency_graphs(&self, project_model: &mut ProjectModel, parsed_modules: &[ParsedModule]) -> Result<(), ArchDocError> {
// Build module import edges
for parsed_module in parsed_modules {
let from_module_id = parsed_module.module_path.clone();
for import in &parsed_module.imports {
// Try to resolve the imported module
let to_module_id = import.module_name.clone();
// Create module import edge
let edge = crate::model::Edge {
from_id: from_module_id.clone(),
to_id: to_module_id,
@@ -349,19 +639,13 @@ impl PythonAnalyzer {
}
}
// Build symbol call edges
for parsed_module in parsed_modules {
let _module_id = parsed_module.module_path.clone();
for call in &parsed_module.calls {
// Try to resolve the called symbol
let callee_expr = call.callee_expr.clone();
// Create symbol call edge
let edge = crate::model::Edge {
from_id: call.caller_symbol.clone(),
to_id: callee_expr,
edge_type: crate::model::EdgeType::SymbolCall, // TODO: Map CallType to EdgeType properly
edge_type: crate::model::EdgeType::SymbolCall,
meta: None,
};
project_model.edges.symbol_call_edges.push(edge);
@@ -372,24 +656,27 @@ impl PythonAnalyzer {
}
fn compute_metrics(&self, project_model: &mut ProjectModel) -> Result<(), ArchDocError> {
// Compute fan-in and fan-out metrics for symbols
for symbol in project_model.symbols.values_mut() {
// Fan-out: count of outgoing calls
// Collect fan-in/fan-out first to avoid borrow issues
let mut metrics: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
for symbol_id in project_model.symbols.keys() {
let fan_out = project_model.edges.symbol_call_edges
.iter()
.filter(|edge| edge.from_id == symbol.id)
.filter(|edge| edge.from_id == *symbol_id)
.count();
// Fan-in: count of incoming calls
let fan_in = project_model.edges.symbol_call_edges
.iter()
.filter(|edge| edge.to_id == symbol.id)
.filter(|edge| edge.to_id == *symbol_id)
.count();
metrics.insert(symbol_id.clone(), (fan_in, fan_out));
}
symbol.metrics.fan_in = fan_in;
symbol.metrics.fan_out = fan_out;
symbol.metrics.is_critical = fan_in > 10 || fan_out > 10; // Simple threshold
symbol.metrics.cycle_participant = false; // TODO: Detect cycles
for (symbol_id, (fan_in, fan_out)) in &metrics {
if let Some(symbol) = project_model.symbols.get_mut(symbol_id) {
symbol.metrics.fan_in = *fan_in;
symbol.metrics.fan_out = *fan_out;
symbol.metrics.is_critical = *fan_in > 10 || *fan_out > 10;
}
}
Ok(())