Iterate through rows
import pandas as pd
for index, row in df.iterrows():
pass
Count unique values in dataframe
df.labels.value_counts()
Style max value in a row or column
# Inspired by https://stackoverflow.com/a/45606572/2874139
def highlight_max(data, color='yellow', isBold=True):
# Styling
attrs = []
if color is not None:
attrs.append(f'background-color: {color}')
if isBold:
attrs.append('font-weight: bold')
attrs = '; '.join(attrs)
if data.ndim == 1:
is_max = data == data.max()
return [attrs if value else '' for value in is_max]
else:
is_max = data == data.max().max()
return pd.DataFrame(np.where(is_max, attrs, ''), index=data.index, columns=data.columns)
df.style.apply(highlight_max, axis=1) # Max in row
df.style.apply(highlight_max, axis=0) # Max in column
Display 1000 rows and columns
# source: fast.ai material
def display_all(df):
with pd.option_context("display.max_rows", 1000, "display.max_columns", 1000):
display(df)
display_all(df)
Save dataframe as CSV file
# index specifies whether to add a sequential index to the saved file
df.to_csv(csv_path, index=False)
Create dataframe form python dictionary
all_questions = [] # rows of column 'all_questions'
all_good_answers = [] # rows of column 'all_good_answers'
all_bad_answers = [] # rows of column 'all_bad_answers'
qa_dict = {'question': all_questions, 'good_answer': all_good_answers, 'bad_answer': all_bad_answers}
# Create a dataframe with 3 columns: question, good_answer and bad_answer
df = pd.DataFrame(data=qa_dict)
Parse dates in dataframe
df = pd.read_csv("train.csv", low_memory=False, parse_dates=["createddate"])
I am working on a project called ML Studio, want to get early access to and product updates? Subscribe here or follow me on twitter.
Top comments (0)