-
Notifications
You must be signed in to change notification settings - Fork 52
/
utils.py
59 lines (46 loc) · 1.65 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# Databricks notebook source
# MAGIC %md
# MAGIC Utils notebook\
# MAGIC With Databricks we can create a utils notebook that is then used in other notebooks via the `%run` magic\
# MAGIC We will make some of the code from hugging_face_basics available for general use.
# COMMAND ----------
# setup env
import os
import requests
from pathlib import Path
username = spark.sql("SELECT current_user()").first()['current_user()']
os.environ['USERNAME'] = username
db_catalog = 'gen_ai_workshop'
db_schema = 'datasets'
db_volume = 'raw_data'
raw_table = 'arxiv_data'
hf_volume = 'hf_volume'
#Internal dev
vector_search_endpoint = 'one-env-shared-endpoint-6'
#vector_search_endpoint = 'gen_ai_workshop'
# # setting up transformers cache
spark.sql(f"CREATE VOLUME IF NOT EXISTS {db_catalog}.{db_schema}.{hf_volume}")
hf_volume_path = f'/Volumes/{db_catalog}/{db_schema}/{hf_volume}'
transformers_cache = f'{hf_volume_path}/transformers'
downloads_dir = f'{hf_volume_path}/downloads'
tf_cache_path = Path(transformers_cache)
dload_path = Path(downloads_dir)
tf_cache_path.mkdir(parents=True, exist_ok=True)
dload_path.mkdir(parents=True, exist_ok=True)
# COMMAND ----------
# DBTITLE 1,AI Agent Framework config
# The AI Agent Framework relies on yaml files for config
# We cannot use the %run imports that we have been using
import yaml
common_config = {
"paths_and_locations": {
"db_catalog": db_catalog,
"db_schema": db_schema,
"db_volume": db_volume,
"raw_table": raw_table,
"hf_volume": hf_volume,
"vector_search_endpoint": vector_search_endpoint
},
}
with open('common_config.yaml', 'w') as f:
yaml.dump(common_config, f)