forked from kaieberl/paper2speech
-
Notifications
You must be signed in to change notification settings - Fork 1
/
bulk_docx_to_txt.py
31 lines (24 loc) · 880 Bytes
/
bulk_docx_to_txt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
from docx import Document
def convert_docx_to_txt(docx_file, txt_file):
# Open the .docx file
doc = Document(docx_file)
# Extract text from the document
text = ''
for paragraph in doc.paragraphs:
text += paragraph.text + '\n'
print("HREEE")
# Write the extracted text to a .txt file with UTF-8 encoding
with open(txt_file, 'w', encoding='utf-8') as f:
f.write(text)
def batch_convert_folder(folder_path):
# Iterate over all files in the folder
print("HEARAS")
for filename in os.listdir(folder_path):
docx_file = os.path.join(folder_path, filename)
# Define the corresponding .txt file name
txt_file = os.path.splitext(docx_file)[0] + '.txt'
convert_docx_to_txt(docx_file, txt_file)
# Example usage:
folder_path = '/Users/TARGETFOLDER/'
batch_convert_folder(folder_path)