feat: improve documentation quality with real data

- Extract file-level docstrings from Python files (module-level string expressions)
- Use __init__.py docstrings as module doc_summary
- Use file docstrings as file purpose in layout tables (instead of 'Source file')
- Populate module outbound_modules/inbound_modules from import edges (internal only)
- Make filename sanitization consistent (sanitize_for_link matches sanitize_filename)
- Clean up stale .md files from previous runs before generating
- Fill ARCHITECTURE.md template with real layout, modules index, and critical points
- Add file_docstring field to ParsedModule and file_purpose to FileDoc
This commit is contained in:
2026-02-15 04:10:20 +03:00
parent 25fdf400fa
commit c095560e13
24 changed files with 936 additions and 518 deletions

View File

@@ -51,6 +51,7 @@ pub struct FileDoc {
pub outbound_modules: Vec<String>,
pub inbound_files: Vec<String>,
pub symbols: Vec<String>,
pub file_purpose: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -142,6 +143,7 @@ pub struct ParsedModule {
pub imports: Vec<Import>,
pub symbols: Vec<Symbol>,
pub calls: Vec<Call>,
pub file_docstring: Option<String>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]

View File

@@ -43,6 +43,9 @@ impl PythonAnalyzer {
let mut symbols = Vec::new();
let mut calls = Vec::new();
// Extract file-level docstring (first statement if it's a string expression)
let file_docstring = self.extract_docstring(&ast);
for stmt in &ast {
self.extract_from_statement(stmt, None, &mut imports, &mut symbols, &mut calls, 0);
}
@@ -53,6 +56,7 @@ impl PythonAnalyzer {
imports,
symbols,
calls,
file_docstring,
};
self.cache_manager.store_module(file_path, parsed_module.clone())?;
@@ -580,10 +584,25 @@ impl PythonAnalyzer {
}
}
// First pass: collect __init__.py docstrings keyed by module_id
let mut init_docstrings: std::collections::HashMap<String, String> = std::collections::HashMap::new();
for parsed_module in modules {
if parsed_module.path.file_name().map(|f| f == "__init__.py").unwrap_or(false)
&& let Some(ref ds) = parsed_module.file_docstring {
let module_id = self.compute_module_path(&parsed_module.path);
init_docstrings.insert(module_id, ds.clone());
}
}
for parsed_module in modules {
let module_id = self.compute_module_path(&parsed_module.path);
let file_id = parsed_module.path.to_string_lossy().to_string();
// Use file docstring first line as file purpose
let file_purpose = parsed_module.file_docstring.as_ref().map(|ds| {
ds.lines().next().unwrap_or(ds).to_string()
});
let file_doc = FileDoc {
id: file_id.clone(),
path: parsed_module.path.to_string_lossy().to_string(),
@@ -592,6 +611,7 @@ impl PythonAnalyzer {
outbound_modules: Vec::new(),
inbound_files: Vec::new(),
symbols: parsed_module.symbols.iter().map(|s| s.id.clone()).collect(),
file_purpose,
};
project_model.files.insert(file_id.clone(), file_doc);
@@ -601,11 +621,21 @@ impl PythonAnalyzer {
project_model.symbols.insert(symbol.id.clone(), symbol);
}
// Use __init__.py docstring for module doc_summary, or file docstring for single-file modules
let is_init = parsed_module.path.file_name().map(|f| f == "__init__.py").unwrap_or(false);
let doc_summary = if is_init {
parsed_module.file_docstring.clone()
} else {
// For non-init files, check if there's an __init__.py docstring for this module's parent
init_docstrings.get(&module_id).cloned()
.or_else(|| parsed_module.file_docstring.clone())
};
let module = Module {
id: module_id.clone(),
path: parsed_module.path.to_string_lossy().to_string(),
files: vec![file_id.clone()],
doc_summary: None,
doc_summary,
outbound_modules: Vec::new(),
inbound_modules: Vec::new(),
symbols: parsed_module.symbols.iter().map(|s| s.id.clone()).collect(),
@@ -668,6 +698,9 @@ impl PythonAnalyzer {
}
fn build_dependency_graphs(&self, project_model: &mut ProjectModel, parsed_modules: &[ParsedModule]) -> Result<(), ArchDocError> {
// Collect known internal module IDs
let known_modules: std::collections::HashSet<String> = project_model.modules.keys().cloned().collect();
for parsed_module in parsed_modules {
let from_module_id = self.compute_module_path(&parsed_module.path);
@@ -683,6 +716,41 @@ impl PythonAnalyzer {
}
}
// Populate outbound_modules and inbound_modules from edges
// Only include internal modules (ones that exist in project_model.modules)
for edge in &project_model.edges.module_import_edges {
let from_id = &edge.from_id;
// Try to match the import to an internal module
// Import "src.core.SomeClass" should match module "src.core"
let to_internal = if known_modules.contains(&edge.to_id) {
Some(edge.to_id.clone())
} else {
// Try prefix matching: "foo.bar.baz" -> check "foo.bar", "foo"
let parts: Vec<&str> = edge.to_id.split('.').collect();
let mut found = None;
for i in (1..parts.len()).rev() {
let prefix = parts[..i].join(".");
if known_modules.contains(&prefix) {
found = Some(prefix);
break;
}
}
found
};
if let Some(ref target_module) = to_internal
&& target_module != from_id {
if let Some(module) = project_model.modules.get_mut(from_id)
&& !module.outbound_modules.contains(target_module) {
module.outbound_modules.push(target_module.clone());
}
if let Some(module) = project_model.modules.get_mut(target_module)
&& !module.inbound_modules.contains(from_id) {
module.inbound_modules.push(from_id.clone());
}
}
}
for parsed_module in parsed_modules {
for call in &parsed_module.calls {
let callee_expr = call.callee_expr.clone();

View File

@@ -10,13 +10,8 @@ use chrono::Utc;
use handlebars::Handlebars;
fn sanitize_for_link(filename: &str) -> String {
filename
.chars()
.map(|c| match c {
'/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
c => c,
})
.collect()
let cleaned = filename.strip_prefix("./").unwrap_or(filename);
cleaned.replace('/', "__")
}
pub struct Renderer {
@@ -278,10 +273,68 @@ impl Renderer {
let today = Utc::now().format("%Y-%m-%d").to_string();
// Collect layout items for template
let mut layout_items = Vec::new();
for file_doc in model.files.values() {
let purpose = file_doc.file_purpose.as_deref().unwrap_or("Source file");
layout_items.push(serde_json::json!({
"path": file_doc.path,
"purpose": purpose,
"link": format!("docs/architecture/files/{}.md", sanitize_for_link(&file_doc.path))
}));
}
// Collect module items for template
let mut modules_list = Vec::new();
for (module_id, module) in &model.modules {
modules_list.push(serde_json::json!({
"name": module_id,
"symbol_count": module.symbols.len(),
"inbound_count": module.inbound_modules.len(),
"outbound_count": module.outbound_modules.len(),
"link": format!("docs/architecture/modules/{}.md", sanitize_for_link(module_id))
}));
}
// Collect critical points
let mut high_fan_in = Vec::new();
let mut high_fan_out = Vec::new();
for (symbol_id, symbol) in &model.symbols {
if symbol.metrics.fan_in > 5 {
high_fan_in.push(serde_json::json!({
"symbol": symbol_id,
"count": symbol.metrics.fan_in,
"critical": symbol.metrics.is_critical,
}));
}
if symbol.metrics.fan_out > 5 {
high_fan_out.push(serde_json::json!({
"symbol": symbol_id,
"count": symbol.metrics.fan_out,
"critical": symbol.metrics.is_critical,
}));
}
}
let cycles: Vec<_> = cycle_detector::detect_cycles(model)
.iter()
.map(|cycle| {
serde_json::json!({
"cycle_path": format!("{} → {}", cycle.join(""), cycle.first().unwrap_or(&String::new()))
})
})
.collect();
// Project statistics
let project_description = format!(
"Python project with {} modules, {} files, and {} symbols.",
model.modules.len(), model.files.len(), model.symbols.len()
);
// Prepare data for template
let data = serde_json::json!({
"project_name": project_name,
"project_description": "<FILL_MANUALLY: what this project does in 37 lines>",
"project_description": project_description,
"created_date": &today,
"updated_date": &today,
"key_decisions": ["<FILL_MANUALLY>"],
@@ -290,6 +343,12 @@ impl Renderer {
"db_integrations": db_integrations,
"http_integrations": http_integrations,
"queue_integrations": queue_integrations,
"rails_summary": "\n\nNo tooling information available.\n",
"layout_items": layout_items,
"modules": modules_list,
"high_fan_in": high_fan_in,
"high_fan_out": high_fan_out,
"cycles": cycles,
});
self.templates.render("architecture_md", &data)
@@ -464,9 +523,10 @@ impl Renderer {
let mut layout_items = Vec::new();
for file_doc in model.files.values() {
let purpose = file_doc.file_purpose.as_deref().unwrap_or("Source file");
layout_items.push(serde_json::json!({
"path": file_doc.path,
"purpose": "Source file",
"purpose": purpose,
"link": format!("docs/architecture/files/{}.md", sanitize_for_link(&file_doc.path))
}));
}
@@ -603,9 +663,10 @@ impl Renderer {
let mut layout_items = Vec::new();
for file_doc in model.files.values() {
let purpose = file_doc.file_purpose.as_deref().unwrap_or("Source file");
layout_items.push(serde_json::json!({
"path": file_doc.path,
"purpose": "Source file",
"purpose": purpose,
"link": format!("files/{}.md", sanitize_for_link(&file_doc.path))
}));
}