-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess_users.py
35 lines (26 loc) · 901 Bytes
/
process_users.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# %%
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
from dotenv import load_dotenv
from requests import Session
from tqdm import tqdm
from graphql import save_repo
load_dotenv(".env")
session = Session() # Create a session object
# Configuration
api_keys = [os.environ[k] for k in os.environ if k.startswith("GITHUB")]
print(f"{len(api_keys)=}")
session.keys = api_keys
strk = pd.read_parquet('strk.parquet')
# %%
# parse users
os.makedirs("repos", exist_ok=True)
relevant_repos = pd.read_csv("relevant_repos.txt",header=None)[0].values
with ThreadPoolExecutor(max_workers=len(api_keys)) as executor:
futures = [executor.submit(save_repo, session, repo) for repo in relevant_repos]
for future in tqdm(as_completed(futures), total=len(futures)):
result = future.result()
for user in strk.identity.values:
print(user)
# %%