import pandas as pd
Welcome welcome: Comparing the code queries used in two popular data manipulation libraries: Pandas in Python and dplyr in R.
Written on July 4, 2023
Load library
Loading pandas
Loading dplyr
library(dplyr)
Create data frame
= {
data_sales 'group': ['A', 'B', 'C', 'A'],
'sales': [2000, 1500, 3000, 2500],
'gender': ['male', 'female',
'male', 'male'],
'age': [25, 30, 35, 32]
}
# Create the DataFrame
= pd.DataFrame(data_sales) df
<- tibble(
df group = c("A", "B", "C", "A"),
sales = c(2000, 1500, 3000, 2500),
gender = c("male", "female",
"male", "male"),
age = c(25, 30, 35, 32)
)
Select column(s)
"group", "gender"]] df[[
|>
df select(group, gender)
Filter
'sales >= 2500') df.query(
|>
df filter(sales >= 2500)
Rename column
df.rename(={'group': 'working_class',
columns'sales': 'product_sales'})
|>
df rename(working_class = group,
product_sales = sales)
Create a variable
'sales_times_2'] = df['sales'] * 2 df[
|>
df mutate(sales_times_2 = sales * 2)
Modify a variable
'sales'] = df['sales'] * 2 df[
|>
df mutate(sales = sales * 2)
Summmarization
('group'])
df.groupby(['sales': 'sum'})
.agg({
.reset_index() )
|>
df group_by(group) |>
summarise(sales = sum(sales))
Sort
'sales') df.sort_values(
|>
df arrange(sales)
Sort (Descending order)
'sales',
df.sort_values(=False) ascending
|>
df arrange(desc(sales))