Merge pull request #80 from rkt-1597/Word_Freq_Counter

Word freq counter
king04aman · Oct 24, 2024 · b857ce2 · b857ce2
2 parents 8c6ae32 + 8d8d056
commit b857ce2
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 0 deletions.
diff --git a/Word_frequency_counter/README.md b/Word_frequency_counter/README.md
@@ -0,0 +1,29 @@
+# Word Frequency Counter
+
+A simple python script that counts the number of words in a given text document and prints the top 10 words according to their frequency, along with their frequency of occurence.
+
+---
+
+- **Input :** Path of the text file to be processed
+- **Output :** List of top 10 words according to their frequency, along with their frequency of occurence.
+
+---
+
+## Features :
+
+- User friendly interface
+- Output is in tabular format
+- Case insensitive processing of words
+- Get the Top 10 words in the text file which occur most frequntly, along with their counts
+
+---
+
+## Usage :
+
+1. Clone the repository
+2. Navigate to the project folder
+3. Run the command :
+
+```python
+python3 main.py
+```
diff --git a/Word_frequency_counter/main.py b/Word_frequency_counter/main.py
@@ -0,0 +1,28 @@
+import regex as re
+from collections import Counter
+
+def find_words_frequency(file_path):
+    '''
+    This script takes the path of the text file to be processed as input
+    and prints the top ten words and also prints their counts in the given text file.
+    '''
+    with open(file_path, 'r', encoding='utf-8') as file:
+        text = file.read().lower()
+
+    # Use `regex`'s findall function
+    all_words = re.findall(r'\b\p{L}+\b', text)
+    word_frequency = Counter(all_words)
+    most_common_words = word_frequency.most_common(10)
+
+    # Print in tabular format
+    print(f"{'Word':<15} {'Count':<5}")
+    print("-" * 20)
+    for word, count in most_common_words:
+        print(f"{word:<15} {count:<5}")
+
+def main():
+    file_path = input("Enter the path of file : ")
+    find_words_frequency(file_path)
+
+if __name__ == "__main__":
+    main()
diff --git a/Word_frequency_counter/requirments.txt b/Word_frequency_counter/requirments.txt
@@ -0,0 +1 @@
+regex=2.5.147
diff --git a/Word_frequency_counter/runtime.txt b/Word_frequency_counter/runtime.txt
@@ -0,0 +1 @@
+python-3.10.7