计算markdown标题数量&给markdown注入内容

为了统计markdown 文件中的标题的数量,使用python 如下

下面的代码有两个主要功能

  • 统计markdown文件的标题数量
  • 给markdown文件注入hexoblog 标题的格式

代码中的路径内容对应

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# 正则匹配相关
import re
# 系统库
import os
# 日期
from datetime import datetime
# 时间
import time

# 获取文件夹下的子文件夹
def list_subdirectories(root_dir):
subdirectories = [d for d in os.listdir(root_dir)
if os.path.isdir(os.path.join(root_dir, d))
and not d.startswith('.') # 排除隐藏文件夹
and d.lower() != 'unfamiliarwords'] # 排除名为 UnfamiliarWords 的文件夹

subdirectories.sort()
return subdirectories

# 获取.md 文件的文件名称
def list_md_file(file_path):
md_files = [f for f in os.listdir(file_path) if f.endswith(".md")]
md_files.sort()
return md_files

# 计算md文件的标题数量
def count_markdown_headings(file_path):
# 读取 Markdown 文件内容
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()

# 使用正则表达式匹配 Markdown 标题
heading_pattern = re.compile(r'^#+\s', re.MULTILINE)
matches = heading_pattern.findall(content)

# 统计各级别标题的数量
heading_count = {}
for match in matches:
level = match.count('#')
heading_count[level] = heading_count.get(level, 0) + 1

# 打印统计结果
result = []
for level, count in heading_count.items():
# print(f'Level {level} headings: {count}')
info_dict = {
"level": level,
"headings": count
}
result.append(info_dict)
return result


# 注入内容
def inject_content_to_markdown(file_path, content_to_inject):
try:
# 打开原始Markdown文件并读取内容
with open(file_path, 'r', encoding='utf-8') as file:
original_content = file.read()

# 向内容开头注入新的内容
new_content = f"{content_to_inject}\n\n{original_content}"

# 将修改后的内容写回Markdown文件
with open(file_path, 'w', encoding='utf-8') as file:
file.write(new_content)

print(f"内容已成功注入到文件:{file_path}")

except Exception as e:
print(f"发生错误:{e}")

# 注入的内容
def inject_content_func(markdown_file_path, markdown_file_name):

time.sleep(1)

# 获取当前时间
current_time = datetime.now()

markdown_file_name = markdown_file_name.split(".")[0]

# 格式化时间为字符串
formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
# 要注入的内容
inject_content = f'''---
title: {markdown_file_name}
date: {formatted_time}
index_img: /img/richpoordad.png
excerpt: The Part of {markdown_file_name} unfamiliar words
tags:
- RichDadPoorDad
categories: English
---'''
print(inject_content)
inject_content_to_markdown(markdown_file_path, inject_content)


if __name__ == "__main__":
# if u want to log ecah detail of secitons, just set the boolen value True
should_log_detail = False
# 指定文件夹路径
folder_path = '/Users/lingxiao/RichDadAndPoorDad'
# 获取子文件夹列表
subdirectories = list_subdirectories(folder_path)

total_count = 0

# 输出子文件夹名称
for subdir in subdirectories:
# print(subdir)
folder_sub_path = folder_path + "/" + subdir

subdir_count = 0


if should_log_detail:
print("==========="*5)
print(f"{subdir} section detail as follows:")

md_files = list_md_file(folder_sub_path)

# 打印文件名
for md_file in md_files:
whole_md_address = folder_sub_path + "/" + md_file
# 给 md 文件注入 通用脚本 !!!!!!
inject_content_func(whole_md_address,md_file)

# 计算每一个md文件的标题个数
result = count_markdown_headings(whole_md_address)
for info_dict in result:
if info_dict["level"] == 2:
word_count = info_dict["headings"]
total_count = total_count + word_count
subdir_count = subdir_count + word_count
if should_log_detail:
print(f'\t{md_file} words count == {word_count}')

log_str = f"{subdir} words count == {subdir_count} \n"
if should_log_detail:
log_str = f"---Total count:{subdir_count} \n"
print(log_str)

print(f"RichDadAndPoorDad 总 数 == {total_count}\n")

计算markdown标题数量&给markdown注入内容
https://jackiedai.github.io/2023/12/21/003tools/001_caculate_H_count/
Author
lingXiao
Posted on
December 21, 2023
Licensed under