feat: improve documentation quality with real data
- Extract file-level docstrings from Python files (module-level string expressions) - Use __init__.py docstrings as module doc_summary - Use file docstrings as file purpose in layout tables (instead of 'Source file') - Populate module outbound_modules/inbound_modules from import edges (internal only) - Make filename sanitization consistent (sanitize_for_link matches sanitize_filename) - Clean up stale .md files from previous runs before generating - Fill ARCHITECTURE.md template with real layout, modules index, and critical points - Add file_docstring field to ParsedModule and file_purpose to FileDoc
This commit is contained in:
@@ -51,6 +51,7 @@ pub struct FileDoc {
|
||||
pub outbound_modules: Vec<String>,
|
||||
pub inbound_files: Vec<String>,
|
||||
pub symbols: Vec<String>,
|
||||
pub file_purpose: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -142,6 +143,7 @@ pub struct ParsedModule {
|
||||
pub imports: Vec<Import>,
|
||||
pub symbols: Vec<Symbol>,
|
||||
pub calls: Vec<Call>,
|
||||
pub file_docstring: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
|
||||
@@ -43,6 +43,9 @@ impl PythonAnalyzer {
|
||||
let mut symbols = Vec::new();
|
||||
let mut calls = Vec::new();
|
||||
|
||||
// Extract file-level docstring (first statement if it's a string expression)
|
||||
let file_docstring = self.extract_docstring(&ast);
|
||||
|
||||
for stmt in &ast {
|
||||
self.extract_from_statement(stmt, None, &mut imports, &mut symbols, &mut calls, 0);
|
||||
}
|
||||
@@ -53,6 +56,7 @@ impl PythonAnalyzer {
|
||||
imports,
|
||||
symbols,
|
||||
calls,
|
||||
file_docstring,
|
||||
};
|
||||
|
||||
self.cache_manager.store_module(file_path, parsed_module.clone())?;
|
||||
@@ -580,10 +584,25 @@ impl PythonAnalyzer {
|
||||
}
|
||||
}
|
||||
|
||||
// First pass: collect __init__.py docstrings keyed by module_id
|
||||
let mut init_docstrings: std::collections::HashMap<String, String> = std::collections::HashMap::new();
|
||||
for parsed_module in modules {
|
||||
if parsed_module.path.file_name().map(|f| f == "__init__.py").unwrap_or(false)
|
||||
&& let Some(ref ds) = parsed_module.file_docstring {
|
||||
let module_id = self.compute_module_path(&parsed_module.path);
|
||||
init_docstrings.insert(module_id, ds.clone());
|
||||
}
|
||||
}
|
||||
|
||||
for parsed_module in modules {
|
||||
let module_id = self.compute_module_path(&parsed_module.path);
|
||||
let file_id = parsed_module.path.to_string_lossy().to_string();
|
||||
|
||||
// Use file docstring first line as file purpose
|
||||
let file_purpose = parsed_module.file_docstring.as_ref().map(|ds| {
|
||||
ds.lines().next().unwrap_or(ds).to_string()
|
||||
});
|
||||
|
||||
let file_doc = FileDoc {
|
||||
id: file_id.clone(),
|
||||
path: parsed_module.path.to_string_lossy().to_string(),
|
||||
@@ -592,6 +611,7 @@ impl PythonAnalyzer {
|
||||
outbound_modules: Vec::new(),
|
||||
inbound_files: Vec::new(),
|
||||
symbols: parsed_module.symbols.iter().map(|s| s.id.clone()).collect(),
|
||||
file_purpose,
|
||||
};
|
||||
project_model.files.insert(file_id.clone(), file_doc);
|
||||
|
||||
@@ -601,11 +621,21 @@ impl PythonAnalyzer {
|
||||
project_model.symbols.insert(symbol.id.clone(), symbol);
|
||||
}
|
||||
|
||||
// Use __init__.py docstring for module doc_summary, or file docstring for single-file modules
|
||||
let is_init = parsed_module.path.file_name().map(|f| f == "__init__.py").unwrap_or(false);
|
||||
let doc_summary = if is_init {
|
||||
parsed_module.file_docstring.clone()
|
||||
} else {
|
||||
// For non-init files, check if there's an __init__.py docstring for this module's parent
|
||||
init_docstrings.get(&module_id).cloned()
|
||||
.or_else(|| parsed_module.file_docstring.clone())
|
||||
};
|
||||
|
||||
let module = Module {
|
||||
id: module_id.clone(),
|
||||
path: parsed_module.path.to_string_lossy().to_string(),
|
||||
files: vec![file_id.clone()],
|
||||
doc_summary: None,
|
||||
doc_summary,
|
||||
outbound_modules: Vec::new(),
|
||||
inbound_modules: Vec::new(),
|
||||
symbols: parsed_module.symbols.iter().map(|s| s.id.clone()).collect(),
|
||||
@@ -668,6 +698,9 @@ impl PythonAnalyzer {
|
||||
}
|
||||
|
||||
fn build_dependency_graphs(&self, project_model: &mut ProjectModel, parsed_modules: &[ParsedModule]) -> Result<(), ArchDocError> {
|
||||
// Collect known internal module IDs
|
||||
let known_modules: std::collections::HashSet<String> = project_model.modules.keys().cloned().collect();
|
||||
|
||||
for parsed_module in parsed_modules {
|
||||
let from_module_id = self.compute_module_path(&parsed_module.path);
|
||||
|
||||
@@ -683,6 +716,41 @@ impl PythonAnalyzer {
|
||||
}
|
||||
}
|
||||
|
||||
// Populate outbound_modules and inbound_modules from edges
|
||||
// Only include internal modules (ones that exist in project_model.modules)
|
||||
for edge in &project_model.edges.module_import_edges {
|
||||
let from_id = &edge.from_id;
|
||||
// Try to match the import to an internal module
|
||||
// Import "src.core.SomeClass" should match module "src.core"
|
||||
let to_internal = if known_modules.contains(&edge.to_id) {
|
||||
Some(edge.to_id.clone())
|
||||
} else {
|
||||
// Try prefix matching: "foo.bar.baz" -> check "foo.bar", "foo"
|
||||
let parts: Vec<&str> = edge.to_id.split('.').collect();
|
||||
let mut found = None;
|
||||
for i in (1..parts.len()).rev() {
|
||||
let prefix = parts[..i].join(".");
|
||||
if known_modules.contains(&prefix) {
|
||||
found = Some(prefix);
|
||||
break;
|
||||
}
|
||||
}
|
||||
found
|
||||
};
|
||||
|
||||
if let Some(ref target_module) = to_internal
|
||||
&& target_module != from_id {
|
||||
if let Some(module) = project_model.modules.get_mut(from_id)
|
||||
&& !module.outbound_modules.contains(target_module) {
|
||||
module.outbound_modules.push(target_module.clone());
|
||||
}
|
||||
if let Some(module) = project_model.modules.get_mut(target_module)
|
||||
&& !module.inbound_modules.contains(from_id) {
|
||||
module.inbound_modules.push(from_id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for parsed_module in parsed_modules {
|
||||
for call in &parsed_module.calls {
|
||||
let callee_expr = call.callee_expr.clone();
|
||||
|
||||
@@ -10,13 +10,8 @@ use chrono::Utc;
|
||||
use handlebars::Handlebars;
|
||||
|
||||
fn sanitize_for_link(filename: &str) -> String {
|
||||
filename
|
||||
.chars()
|
||||
.map(|c| match c {
|
||||
'/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
|
||||
c => c,
|
||||
})
|
||||
.collect()
|
||||
let cleaned = filename.strip_prefix("./").unwrap_or(filename);
|
||||
cleaned.replace('/', "__")
|
||||
}
|
||||
|
||||
pub struct Renderer {
|
||||
@@ -278,10 +273,68 @@ impl Renderer {
|
||||
|
||||
let today = Utc::now().format("%Y-%m-%d").to_string();
|
||||
|
||||
// Collect layout items for template
|
||||
let mut layout_items = Vec::new();
|
||||
for file_doc in model.files.values() {
|
||||
let purpose = file_doc.file_purpose.as_deref().unwrap_or("Source file");
|
||||
layout_items.push(serde_json::json!({
|
||||
"path": file_doc.path,
|
||||
"purpose": purpose,
|
||||
"link": format!("docs/architecture/files/{}.md", sanitize_for_link(&file_doc.path))
|
||||
}));
|
||||
}
|
||||
|
||||
// Collect module items for template
|
||||
let mut modules_list = Vec::new();
|
||||
for (module_id, module) in &model.modules {
|
||||
modules_list.push(serde_json::json!({
|
||||
"name": module_id,
|
||||
"symbol_count": module.symbols.len(),
|
||||
"inbound_count": module.inbound_modules.len(),
|
||||
"outbound_count": module.outbound_modules.len(),
|
||||
"link": format!("docs/architecture/modules/{}.md", sanitize_for_link(module_id))
|
||||
}));
|
||||
}
|
||||
|
||||
// Collect critical points
|
||||
let mut high_fan_in = Vec::new();
|
||||
let mut high_fan_out = Vec::new();
|
||||
for (symbol_id, symbol) in &model.symbols {
|
||||
if symbol.metrics.fan_in > 5 {
|
||||
high_fan_in.push(serde_json::json!({
|
||||
"symbol": symbol_id,
|
||||
"count": symbol.metrics.fan_in,
|
||||
"critical": symbol.metrics.is_critical,
|
||||
}));
|
||||
}
|
||||
if symbol.metrics.fan_out > 5 {
|
||||
high_fan_out.push(serde_json::json!({
|
||||
"symbol": symbol_id,
|
||||
"count": symbol.metrics.fan_out,
|
||||
"critical": symbol.metrics.is_critical,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
let cycles: Vec<_> = cycle_detector::detect_cycles(model)
|
||||
.iter()
|
||||
.map(|cycle| {
|
||||
serde_json::json!({
|
||||
"cycle_path": format!("{} → {}", cycle.join(" → "), cycle.first().unwrap_or(&String::new()))
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Project statistics
|
||||
let project_description = format!(
|
||||
"Python project with {} modules, {} files, and {} symbols.",
|
||||
model.modules.len(), model.files.len(), model.symbols.len()
|
||||
);
|
||||
|
||||
// Prepare data for template
|
||||
let data = serde_json::json!({
|
||||
"project_name": project_name,
|
||||
"project_description": "<FILL_MANUALLY: what this project does in 3–7 lines>",
|
||||
"project_description": project_description,
|
||||
"created_date": &today,
|
||||
"updated_date": &today,
|
||||
"key_decisions": ["<FILL_MANUALLY>"],
|
||||
@@ -290,6 +343,12 @@ impl Renderer {
|
||||
"db_integrations": db_integrations,
|
||||
"http_integrations": http_integrations,
|
||||
"queue_integrations": queue_integrations,
|
||||
"rails_summary": "\n\nNo tooling information available.\n",
|
||||
"layout_items": layout_items,
|
||||
"modules": modules_list,
|
||||
"high_fan_in": high_fan_in,
|
||||
"high_fan_out": high_fan_out,
|
||||
"cycles": cycles,
|
||||
});
|
||||
|
||||
self.templates.render("architecture_md", &data)
|
||||
@@ -464,9 +523,10 @@ impl Renderer {
|
||||
let mut layout_items = Vec::new();
|
||||
|
||||
for file_doc in model.files.values() {
|
||||
let purpose = file_doc.file_purpose.as_deref().unwrap_or("Source file");
|
||||
layout_items.push(serde_json::json!({
|
||||
"path": file_doc.path,
|
||||
"purpose": "Source file",
|
||||
"purpose": purpose,
|
||||
"link": format!("docs/architecture/files/{}.md", sanitize_for_link(&file_doc.path))
|
||||
}));
|
||||
}
|
||||
@@ -603,9 +663,10 @@ impl Renderer {
|
||||
let mut layout_items = Vec::new();
|
||||
|
||||
for file_doc in model.files.values() {
|
||||
let purpose = file_doc.file_purpose.as_deref().unwrap_or("Source file");
|
||||
layout_items.push(serde_json::json!({
|
||||
"path": file_doc.path,
|
||||
"purpose": "Source file",
|
||||
"purpose": purpose,
|
||||
"link": format!("files/{}.md", sanitize_for_link(&file_doc.path))
|
||||
}));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user