Skip to content

Commit e5ce845

Browse files
committed
chore: update files
1 parent b5d6a0e commit e5ce845

6 files changed

Lines changed: 1812 additions & 3 deletions

File tree

.ant

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
<<<<<<< HEAD
21
source:https://github.com/nandita2000/Alteryx-workflows
3-
=======
42

53

64
## Admin notes
@@ -430,4 +428,3 @@ source:https://github.com/nandita2000/Alteryx-workflows
430428

431429

432430
--------------------------------------------------------------------------------
433-
>>>>>>> 8cab3f6 (chore: update files)

README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,37 @@ In this repository, you will find:
1111
- **Documentation**: Guides, tutorials, and explanations on how to use, customize, and deploy the workflows.
1212
- **Scripts**: Any additional scripts or tools used alongside Alteryx workflows.
1313

14+
## Airflow DAG Conversion
15+
16+
This repository includes a tool to convert Alteryx workflows to Apache Airflow DAGs:
17+
18+
- **`alterxy2airflow.py`**: Python script that converts `.yxmd` files to Airflow DAG Python files
19+
20+
### Usage
21+
22+
```bash
23+
# Convert a single workflow
24+
python alterxy2airflow.py "Accident Workflow.yxmd" "accident_dag.py"
25+
26+
# Convert all workflows in a directory
27+
python alterxy2airflow.py --all "path/to/workflows" "output/dir"
28+
```
29+
30+
### Supported Tools
31+
32+
The converter supports the following Alteryx tool types:
33+
- File Input/Output (CSV, Excel)
34+
- Formula
35+
- Filter
36+
- Sort
37+
- Sample
38+
- Summarize/Aggregate
39+
- Browse
40+
- Multi-Row Formula
41+
- Table Composer
42+
- Charts
43+
- Macro Input/Output
44+
1445
## Usage
1546

1647
To utilize the Alteryx workflows in this repository:

accident_dag.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Airflow DAG generated from Alteryx workflow: Accident_Workflow
4+
Source: Accident_Workflow.yxmd
5+
Generated: 2026-02-23 18:32:36
6+
"""
7+
8+
from datetime import datetime, timedelta
9+
from airflow import DAG
10+
from airflow.operators.python import PythonOperator
11+
import pandas as pd
12+
import os
13+
14+
default_args = {
15+
'owner': 'airflow',
16+
'depends_on_past': False,
17+
'email_on_failure': False,
18+
'email_on_retry': False,
19+
'retries': 1,
20+
'retry_delay': timedelta(minutes=5),
21+
}
22+
23+
dag = DAG(
24+
'accident_workflow',
25+
default_args=default_args,
26+
description='DAG generated from Alteryx workflow: Accident_Workflow',
27+
schedule_interval='@daily',
28+
start_date=datetime(2024, 1, 1),
29+
catchup=False,
30+
tags=['alteryx', 'generated'],
31+
)
32+
33+
# Read CSV file
34+
def read_1(context):
35+
import pandas as pd
36+
ti = context['ti']
37+
df = pd.read_csv('C:\Users\ash_s\Downloads\archive (8)\US_Accidents_March23.csv')
38+
ti.xcom_push(key='data', value=df.to_json())
39+
return df.shape[0]
40+
41+
task_read_1 = PythonOperator(
42+
task_id='read_1',
43+
python_callable=read_1,
44+
dag=dag,
45+
)
46+
47+
# Aggregate data
48+
def aggregate_data_2(context):
49+
import pandas as pd
50+
ti = context['ti']
51+
json_data = ti.xcom_pull(key='data', task_ids='read_1')
52+
df = pd.read_json(json_data)
53+
54+
# Group by: []
55+
# Aggregations: []
56+
pass
57+
58+
ti.xcom_push(key='data', value=df.to_json())
59+
return df.shape[0]
60+
61+
task_aggregate_data_2 = PythonOperator(
62+
task_id='aggregate_data_2',
63+
python_callable=aggregate_data_2,
64+
dag=dag,
65+
)
66+
67+
# Browse/Print data
68+
def browse_3(context):
69+
import pandas as pd
70+
ti = context['ti']
71+
json_data = ti.xcom_pull(key='data', task_ids='read_2')
72+
df = pd.read_json(json_data)
73+
74+
print(f"Browse 3 - Shape: {df.shape}")
75+
print(df.head())
76+
return df.shape[0]
77+
78+
task_browse_3 = PythonOperator(
79+
task_id='browse_3',
80+
python_callable=browse_3,
81+
dag=dag,
82+
)
83+
84+
# Browse/Print data
85+
def browse_4(context):
86+
import pandas as pd
87+
ti = context['ti']
88+
json_data = ti.xcom_pull(key='data', task_ids='read_1')
89+
df = pd.read_json(json_data)
90+
91+
print(f"Browse 4 - Shape: {df.shape}")
92+
print(df.head())
93+
return df.shape[0]
94+
95+
task_browse_4 = PythonOperator(
96+
task_id='browse_4',
97+
python_callable=browse_4,
98+
dag=dag,
99+
)
100+
101+
# Filter data
102+
def filter_data_5(context):
103+
import pandas as pd
104+
ti = context['ti']
105+
json_data = ti.xcom_pull(key='data', task_ids='read_2')
106+
df = pd.read_json(json_data)
107+
108+
# Apply filter: Timezone IsNotNull Serious
109+
try:
110+
df = df.query('Timezone IsNotNull Serious')
111+
except:
112+
# Fallback for complex expressions
113+
pass
114+
115+
ti.xcom_push(key='data', value=df.to_json())
116+
return df.shape[0]
117+
118+
task_filter_data_5 = PythonOperator(
119+
task_id='filter_data_5',
120+
python_callable=filter_data_5,
121+
dag=dag,
122+
)
123+
124+
# Browse/Print data
125+
def browse_7(context):
126+
import pandas as pd
127+
ti = context['ti']
128+
json_data = ti.xcom_pull(key='data', task_ids='read_17')
129+
df = pd.read_json(json_data)
130+
131+
print(f"Browse 7 - Shape: {df.shape}")
132+
print(df.head())
133+
return df.shape[0]
134+
135+
task_browse_7 = PythonOperator(
136+
task_id='browse_7',
137+
python_callable=browse_7,
138+
dag=dag,
139+
)
140+
141+
# Apply formulas
142+
def apply_formulas_16(context):
143+
import pandas as pd
144+
import re
145+
ti = context['ti']
146+
json_data = ti.xcom_pull(key='data', task_ids='read_5')
147+
df = pd.read_json(json_data)
148+
149+
# Apply formulas
150+
151+
152+
ti.xcom_push(key='data', value=df.to_json())
153+
return df.shape[0]
154+
155+
task_apply_formulas_16 = PythonOperator(
156+
task_id='apply_formulas_16',
157+
python_callable=apply_formulas_16,
158+
dag=dag,
159+
)
160+
161+
162+
163+
# Define task dependencies
164+
task_1 >> task_2
165+
task_1 >> task_4
166+
task_2 >> task_3
167+
task_2 >> task_5
168+
task_5 >> task_16
169+
task_16 >> task_17
170+
task_17 >> task_7
171+
task_17 >> task_22
172+
task_17 >> task_22
173+
174+
if __name__ == "__main__":
175+
dag.test()

0 commit comments

Comments
 (0)