`import pandas as pd
import os
import string
Helper function to convert column letter to index
def column_letter_to_index(column_letter):
return string.ascii_uppercase.index(column_letter.upper())
Get the folder path and column letter from the user
folder_path = input("Enter the folder path containing Excel files: ")
column_letter = input("Enter the column letter to select (e.g., B): ")
Convert the column letter to a zero-based index
column_index = column_letter_to_index(column_letter)
List to hold the selected columns from each file
selected_columns = []
Loop through all files in the folder
for file_name in os.listdir(folder_path):
# Skip temporary or hidden files (Excel temp files usually start with '~$')
if file_name.startswith('~$'):
continue
# Check if the file is an Excel file
if file_name.endswith('.xlsx') or file_name.endswith('.xls'):
file_path = os.path.join(folder_path, file_name)
# Read the Excel file
df = pd.read_excel(file_path)
# Select the specified column if it exists in the file
if df.shape[1] > column_index: # Ensures that the column exists
selected_columns.append(df.iloc[:, [column_index]])
Concatenate all selected columns
merged_df = pd.concat(selected_columns, axis=1)
Define the output file path
output_file_path = os.path.join(folder_path, 'merged_columns.xlsx')
merged_df.to_excel(output_file_path, index=False)
print(f'Merged Excel file created at: {output_file_path}')
`
Let’s make sure this meets your requirements: if the column has numbers stored as text (e.g., `"123"`, `"45.678"`, `"+89"`, or `"-12.345"`), we’ll convert them to numeric format.
The updated code will:
1. Remove any `+` or `-` operators.
2. Remove any non-numeric characters except for the decimal point.
3. Convert the cleaned text to a float with up to three decimal places, if applicable.
Here's the refined code:
python
import pandas as pd
import os
import string
import re
Helper function to convert column letter to index
def column_letter_to_index(column_letter):
return string.ascii_uppercase.index(column_letter.upper())
Function to clean and convert text data to numbers
def clean_and_convert(value):
# Remove any non-numeric characters except for the decimal point
cleaned_value = re.sub(r'[^\d.]', '', str(value))
try:
# Convert to float and round to 3 decimal places if necessary
num_value = round(float(cleaned_value), 3)
return num_value
except ValueError:
return None # Return None if conversion fails
Get the folder path and column letter from the user
folder_path = input("Enter the folder path containing Excel files: ")
column_letter = input("Enter the column letter to select (e.g., B): ")
Convert the column letter to a zero-based index
column_index = column_letter_to_index(column_letter)
List to hold the selected columns from each file
selected_columns = []
Loop through all files in the folder
for file_name in os.listdir(folder_path):
# Skip temporary or hidden files (Excel temp files usually start with '~$')
if file_name.startswith('~$'):
continue
# Check if the file is an Excel file
if file_name.endswith('.xlsx') or file_name.endswith('.xls'):
file_path = os.path.join(folder_path, file_name)
# Read the Excel file
df = pd.read_excel(file_path)
# Select the specified column if it exists in the file
if df.shape[1] > column_index: # Ensures that the column exists
selected_column = df.iloc[:, column_index]
# Clean and convert each cell in the selected column
selected_column = selected_column.apply(clean_and_convert)
# Append the cleaned column to the list
selected_columns.append(selected_column)
Concatenate all cleaned and converted columns
merged_df = pd.concat(selected_columns, axis=1)
Transpose the data
merged_df = merged_df.T
Define the output file path
output_file_path = os.path.join(folder_path, 'merged_columns_converted_transposed.xlsx')
merged_df.to_excel(output_file_path, index=False)
print(f'Merged, cleaned, converted, and transposed Excel file created at: {output_file_path}')
### Explanation
- **`re.sub(r'[^\d.]', '', str(value))`**: This removes any character that is not a digit or a decimal point.
- **Conversion to Float**: After cleaning, the code attempts to convert the text to a float and rounds it to three decimal places if needed.
Now, if you have values like `"123"`, `"45.678"`, or `"-12.345"`, they’ll be cleaned, converted to numeric format, and rounded to three decimal places.
Top comments (0)