python: 字频统计
2021/10/20 20:39:39
本文主要是介绍python: 字频统计,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
import re import time import os.path work_dir = os.getcwd() + '\\' file_time = time.strftime("%y%m%d%H%M%S") file_name = "" terminal_words = "" frequency_number = 0 def nameMark(): print(" " + "*" * 20 + " 字频统计 " + "*" * 20) def separatorLine(): print("*" * 50) def processTxt(words): punctuation = ",.。,??1234567890ABCD" # defines characters that are not counted words = words.replace(" ", "") words = re.sub(r"[%s]+" % punctuation, "", words) return words def getTerminalInput(ter_words): ter_words = processTxt(ter_words) print("The terminal has been read.") separatorLine() return ter_words def isRulePath(file_path): re_path = r'^(?P<path>(?:[a-zA-Z]:)?\\(?:[^\\\?\/\*\|<>:"]+\\)+)' \ r'(?P<filename>(?P<name>[^\\\?\/\*\|<>:"]+?)\.' \ r'(?P<ext>[^.\\\?\/\*\|<>:"]+))$' path_flag = re.search(re_path, file_path) if path_flag: rule_path = path_flag.group() return rule_path else: print("Invalid path") return False def getFilesInput(rule_path): with open(rule_path, 'r', encoding='utf-8') as file: words = file.readline() words = processTxt(words) print() print(f"{rule_path} content has been read!") separatorLine() return words def getFrequency(): global frequency_number while True: frequency_number = input("Please input number : \n Note: words with frequency less than input times will not " "be recorded! \n :-->") if frequency_number.isdigit(): separatorLine() return frequency_number break else: continue def analyseText(words, number): result = {} for i in words: k = words.count(i) if i in result: result[i] += 1 else: result[i] = 1 for k in list(result.keys()): if result[k] < int(number): del result[k] result_sort_list = sorted(result.items(), key=lambda d: d[1], reverse=True) return result_sort_list def writeTxtFile(result_file): with open(work_dir + 'result_' + file_time + '.txt', 'w', encoding="UTF-8") as file: if file_name != "": file.write("Analyzing source files:" + file_name) file.write("\n" * 2) file.write(f"PS:This file records only {frequency_number} and above!") file.write("\n" * 2) file.write("+" * 50) file.write("\n" * 2) else: file.write("The terminal input is as follows:") file.write("\n" * 2) file.write("+" * 50) file.write("\n" * 2) file.write(terminal_words) file.write("\n" * 2) file.write("+" * 50) file.write("\n" * 2) file.write(str(result_file)) file.write("\n" * 2) file.write("+" * 50) file.write("\n" * 2) for i in result_file: file.write(str(i)) file.write("\n") file.write("+" * 50) print(f"Completed and saved!\n {work_dir}result_{file_time}.txt") while True: nameMark() flag = input("File(F) or Terminal(T)[quit -q]: \n") if flag.lower() == 'q': break elif flag.lower() == "t": terminal_words = input("input text: \n") t_words = processTxt(terminal_words) # t_words = getTerminalInput(terminal_input) show_time = getFrequency() analysis_result = analyseText(t_words, show_time) writeTxtFile(analysis_result) separatorLine() answer = input('To exit? y/n \n') if answer.upper() == 'Y': print('Thank you for using!!!') break else: continue elif flag.lower() == "f": while 1: file_name = input("file path[quit -q]:-->> ") if file_name.lower() == 'q': break file_name = isRulePath(file_name) if file_name: f_words = getFilesInput(file_name) show_time = getFrequency() analysis_result = analyseText(f_words, show_time) writeTxtFile(analysis_result) separatorLine() answer = input('To exit? y/n \n') if answer.upper() == 'Y': print('Thank you for using!!!') break else: continue else: continue else: print("Input error! \n <TXT File> --> F or <Terminal input> --> T ") separatorLine() continue
这篇关于python: 字频统计的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2025-01-03用FastAPI掌握Python异步IO:轻松实现高并发网络请求处理
- 2025-01-02封装学习:Python面向对象编程基础教程
- 2024-12-28Python编程基础教程
- 2024-12-27Python编程入门指南
- 2024-12-27Python编程基础
- 2024-12-27Python编程基础教程
- 2024-12-27Python编程基础指南
- 2024-12-24Python编程入门指南
- 2024-12-24Python编程基础入门
- 2024-12-24Python编程基础:变量与数据类型