Gephi (GEXF) datasets#
GEXF provides a small set of example datasets at https://gexf.net/datasets.html. This notebook downloads a few of them and renders with Graphistry.
[1]:
import os
from pathlib import Path
from urllib.request import Request, urlopen
import graphistry
# To specify Graphistry account & server, use:
# graphistry.register(api=3, username='...', password='...', protocol='https', server='hub.graphistry.com')
# For more options: https://pygraphistry.readthedocs.io/en/latest/server/register.html
[2]:
GRAPHISTRY_SERVER = os.environ.get("GRAPHISTRY_SERVER", "hub.graphistry.com")
GRAPHISTRY_PROTOCOL = os.environ.get("GRAPHISTRY_PROTOCOL", "https")
GRAPHISTRY_USERNAME = os.environ.get("GRAPHISTRY_USERNAME")
GRAPHISTRY_PASSWORD = os.environ.get("GRAPHISTRY_PASSWORD")
if not GRAPHISTRY_USERNAME or not GRAPHISTRY_PASSWORD:
raise RuntimeError("Set GRAPHISTRY_USERNAME and GRAPHISTRY_PASSWORD to upload.")
graphistry.register(
api=3,
protocol=GRAPHISTRY_PROTOCOL,
server=GRAPHISTRY_SERVER,
username=GRAPHISTRY_USERNAME,
password=GRAPHISTRY_PASSWORD,
)
[2]:
<graphistry.pygraphistry.GraphistryClient at 0x7bc24bf15d90>
We will download these datasets into a local data/ folder:
elegans
Yeast
EuroSiS web graph
[3]:
DATA_DIR = Path("demos/demos_databases_apis/gexf/data")
if not DATA_DIR.exists():
DATA_DIR = Path("data")
DATA_DIR.mkdir(parents=True, exist_ok=True)
def download_gexf(url, path):
req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
with urlopen(req) as response, open(path, "wb") as f:
f.write(response.read())
DATASETS = [
("C. elegans", "https://gexf.net/data/celegans.gexf", DATA_DIR / "celegans.gexf"),
("Yeast", "https://gexf.net/data/yeast.gexf", DATA_DIR / "yeast.gexf"),
("EuroSiS", "https://gexf.net/data/WebAtlas_EuroSiS.gexf", DATA_DIR / "WebAtlas_EuroSiS.gexf"),
]
for name, url, path in DATASETS:
if not path.exists():
download_gexf(url, path)
[path.exists() for _, _, path in DATASETS]
[3]:
[True, True, True]
C. elegans#
[4]:
g_celegans = graphistry.gexf(str(DATA_DIR / "celegans.gexf"))
counts = {"nodes": len(g_celegans._nodes), "edges": len(g_celegans._edges)}
bindings = {
"point_color": g_celegans._point_color,
"point_size": g_celegans._point_size,
"point_x": g_celegans._point_x,
"point_y": g_celegans._point_y,
"edge_color": g_celegans._edge_color,
"play": g_celegans._url_params.get("play"),
}
counts, bindings
[4]:
({'nodes': 306, 'edges': 2345},
{'point_color': None,
'point_size': None,
'point_x': None,
'point_y': None,
'edge_color': None,
'play': None})
[5]:
g_celegans._nodes.head()
[5]:
| node_id | label | |
|---|---|---|
| 0 | 0 | 1 |
| 1 | 1 | 2 |
| 2 | 10 | 11 |
| 3 | 100 | 101 |
| 4 | 101 | 102 |
[6]:
g_celegans.name("C. elegans (GEXF)").plot()
[6]:
Yeast#
[7]:
g_yeast = graphistry.gexf(str(DATA_DIR / "yeast.gexf"))
counts = {"nodes": len(g_yeast._nodes), "edges": len(g_yeast._edges)}
bindings = {
"point_color": g_yeast._point_color,
"point_size": g_yeast._point_size,
"point_x": g_yeast._point_x,
"point_y": g_yeast._point_y,
"edge_color": g_yeast._edge_color,
"play": g_yeast._url_params.get("play"),
}
counts, bindings
[7]:
({'nodes': 2361, 'edges': 7182},
{'point_color': None,
'point_size': None,
'point_x': None,
'point_y': None,
'edge_color': None,
'play': None})
[8]:
g_yeast._nodes.head()
[8]:
| node_id | label | |
|---|---|---|
| 0 | 4941 | YBR236C |
| 1 | 4942 | YOR151C |
| 2 | 4943 | YML010W |
| 3 | 4944 | YNR016C |
| 4 | 4945 | YLR386W |
[9]:
g_yeast.name("Yeast (GEXF)").plot()
[9]:
EuroSiS web graph#
[10]:
g_eurosis = graphistry.gexf(str(DATA_DIR / "WebAtlas_EuroSiS.gexf"))
counts = {"nodes": len(g_eurosis._nodes), "edges": len(g_eurosis._edges)}
bindings = {
"point_color": g_eurosis._point_color,
"point_size": g_eurosis._point_size,
"point_x": g_eurosis._point_x,
"point_y": g_eurosis._point_y,
"edge_color": g_eurosis._edge_color,
"play": g_eurosis._url_params.get("play"),
}
counts, bindings
[10]:
({'nodes': 1285, 'edges': 7524},
{'point_color': None,
'point_size': None,
'point_x': None,
'point_y': None,
'edge_color': None,
'play': None})
[11]:
g_eurosis._nodes.head()
[11]:
| node_id | label | country | tag_gender | tag_governance | tag_health | tag_info | tag_internat | tag_nano | tag_people | ... | tag_socioeco | tag_space | tag_transport | tag_agri | tag_biotech | tag_business | tag_comm | tag_energy | tag_environment | tag_food | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 10 | Astronomical Institute | Czech Republic | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 1 | 1002 | CCSTI La Turbine Rhône-Alpes | France | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2 | 1003 | Laurea University of Applied Sciences | Finland | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 1004 | European Association for Education Law and Policy | International | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 1006 | Les petits débrouillards | Belgium | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
5 rows × 25 columns
[12]:
g_eurosis.name("EuroSiS (GEXF)").plot()
[12]: