Skip to content

Commit bb2d7ca

Browse files
authored
Add from_pretrained telemetry (huggingface#1461)
* Add from_pretrained usage logging * Add classes * add a telemetry notice * macos
1 parent 4f3ddb6 commit bb2d7ca

File tree

6 files changed

+171
-6
lines changed

6 files changed

+171
-6
lines changed

docs/source/installation.mdx

+21
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,24 @@ git pull
120120
```
121121

122122
Your Python environment will find the `main` version of 🤗 Diffusers on the next run.
123+
124+
## Notice on telemetry logging
125+
126+
Our library gathers telemetry information during `from_pretrained()` requests.
127+
This data includes the version of Diffusers and PyTorch/Flax, the requested model or pipeline class,
128+
and the path to a pretrained checkpoint if it is hosted on the Hub.
129+
This usage data helps us debug issues and prioritize new features.
130+
No private data, such as paths to models saved locally on disk, is ever collected.
131+
132+
We understand that not everyone wants to share additional information, and we respect your privacy,
133+
so you can disable telemetry collection by setting the `DISABLE_TELEMETRY` environment variable from your terminal:
134+
135+
On Linux/MacOS:
136+
```bash
137+
export DISABLE_TELEMETRY=YES
138+
```
139+
140+
On Windows:
141+
```bash
142+
set DISABLE_TELEMETRY=YES
143+
```

src/diffusers/hub_utils.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,11 @@
2020
from typing import Dict, Optional, Union
2121
from uuid import uuid4
2222

23+
import requests
2324
from huggingface_hub import HfFolder, whoami
2425

2526
from . import __version__
26-
from .utils import ENV_VARS_TRUE_VALUES, logging
27+
from .utils import ENV_VARS_TRUE_VALUES, HUGGINGFACE_CO_RESOLVE_ENDPOINT, logging
2728
from .utils.import_utils import (
2829
_flax_version,
2930
_jax_version,
@@ -45,7 +46,9 @@
4546

4647
MODEL_CARD_TEMPLATE_PATH = Path(__file__).parent / "utils" / "model_card_template.md"
4748
SESSION_ID = uuid4().hex
49+
HF_HUB_OFFLINE = os.getenv("HF_HUB_OFFLINE", "").upper() in ENV_VARS_TRUE_VALUES
4850
DISABLE_TELEMETRY = os.getenv("DISABLE_TELEMETRY", "").upper() in ENV_VARS_TRUE_VALUES
51+
HUGGINGFACE_CO_TELEMETRY = HUGGINGFACE_CO_RESOLVE_ENDPOINT + "/api/telemetry/"
4952

5053

5154
def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
@@ -72,6 +75,27 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
7275
return ua
7376

7477

78+
def send_telemetry(data: Dict, name: str):
79+
"""
80+
Sends logs to the Hub telemetry endpoint.
81+
82+
Args:
83+
data: the fields to track, e.g. {"example_name": "dreambooth"}
84+
name: a unique name to differentiate the telemetry logs, e.g. "diffusers_examples" or "diffusers_notebooks"
85+
"""
86+
if DISABLE_TELEMETRY or HF_HUB_OFFLINE:
87+
pass
88+
89+
headers = {"user-agent": http_user_agent(data)}
90+
endpoint = HUGGINGFACE_CO_TELEMETRY + name
91+
try:
92+
r = requests.head(endpoint, headers=headers)
93+
r.raise_for_status()
94+
except Exception:
95+
# We don't want to error in case of connection errors of any kind.
96+
pass
97+
98+
7599
def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None):
76100
if token is None:
77101
token = HfFolder.get_token()

src/diffusers/modeling_flax_utils.py

+9
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from requests import HTTPError
2929

3030
from . import __version__, is_torch_available
31+
from .hub_utils import send_telemetry
3132
from .modeling_flax_pytorch_utils import convert_pytorch_state_dict_to_flax
3233
from .utils import (
3334
CONFIG_NAME,
@@ -339,6 +340,10 @@ def from_pretrained(
339340
f"Error no file named {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME} found in directory "
340341
f"{pretrained_path_with_subfolder}."
341342
)
343+
send_telemetry(
344+
{"model_class": cls.__name__, "model_path": "local", "framework": "flax"},
345+
name="diffusers_from_pretrained",
346+
)
342347
else:
343348
try:
344349
model_file = hf_hub_download(
@@ -354,6 +359,10 @@ def from_pretrained(
354359
subfolder=subfolder,
355360
revision=revision,
356361
)
362+
send_telemetry(
363+
{"model_class": cls.__name__, "model_path": "hub", "framework": "flax"},
364+
name="diffusers_from_pretrained",
365+
)
357366

358367
except RepositoryNotFoundError:
359368
raise EnvironmentError(

src/diffusers/modeling_utils.py

+97-2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from requests import HTTPError
2727

2828
from . import __version__
29+
from .hub_utils import send_telemetry
2930
from .utils import (
3031
CONFIG_NAME,
3132
DIFFUSERS_CACHE,
@@ -400,7 +401,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
400401
model_file = None
401402
if is_safetensors_available():
402403
try:
403-
model_file = _get_model_file(
404+
model_file = cls._get_model_file(
404405
pretrained_model_name_or_path,
405406
weights_name=SAFETENSORS_WEIGHTS_NAME,
406407
cache_dir=cache_dir,
@@ -416,7 +417,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
416417
except:
417418
pass
418419
if model_file is None:
419-
model_file = _get_model_file(
420+
model_file = cls._get_model_file(
420421
pretrained_model_name_or_path,
421422
weights_name=WEIGHTS_NAME,
422423
cache_dir=cache_dir,
@@ -531,6 +532,100 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
531532

532533
return model
533534

535+
@classmethod
536+
def _get_model_file(
537+
cls,
538+
pretrained_model_name_or_path,
539+
*,
540+
weights_name,
541+
subfolder,
542+
cache_dir,
543+
force_download,
544+
proxies,
545+
resume_download,
546+
local_files_only,
547+
use_auth_token,
548+
user_agent,
549+
revision,
550+
):
551+
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
552+
if os.path.isdir(pretrained_model_name_or_path):
553+
if os.path.isfile(os.path.join(pretrained_model_name_or_path, weights_name)):
554+
# Load from a PyTorch checkpoint
555+
model_file = os.path.join(pretrained_model_name_or_path, weights_name)
556+
elif subfolder is not None and os.path.isfile(
557+
os.path.join(pretrained_model_name_or_path, subfolder, weights_name)
558+
):
559+
model_file = os.path.join(pretrained_model_name_or_path, subfolder, weights_name)
560+
else:
561+
raise EnvironmentError(
562+
f"Error no file named {weights_name} found in directory {pretrained_model_name_or_path}."
563+
)
564+
send_telemetry(
565+
{"model_class": cls.__name__, "model_path": "local", "framework": "pytorch"},
566+
name="diffusers_from_pretrained",
567+
)
568+
return model_file
569+
else:
570+
try:
571+
# Load from URL or cache if already cached
572+
model_file = hf_hub_download(
573+
pretrained_model_name_or_path,
574+
filename=weights_name,
575+
cache_dir=cache_dir,
576+
force_download=force_download,
577+
proxies=proxies,
578+
resume_download=resume_download,
579+
local_files_only=local_files_only,
580+
use_auth_token=use_auth_token,
581+
user_agent=user_agent,
582+
subfolder=subfolder,
583+
revision=revision,
584+
)
585+
send_telemetry(
586+
{"model_class": cls.__name__, "model_path": "hub", "framework": "pytorch"},
587+
name="diffusers_from_pretrained",
588+
)
589+
return model_file
590+
591+
except RepositoryNotFoundError:
592+
raise EnvironmentError(
593+
f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier "
594+
"listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a "
595+
"token having permission to this repo with `use_auth_token` or log in with `huggingface-cli "
596+
"login`."
597+
)
598+
except RevisionNotFoundError:
599+
raise EnvironmentError(
600+
f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for "
601+
"this model name. Check the model page at "
602+
f"'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions."
603+
)
604+
except EntryNotFoundError:
605+
raise EnvironmentError(
606+
f"{pretrained_model_name_or_path} does not appear to have a file named {weights_name}."
607+
)
608+
except HTTPError as err:
609+
raise EnvironmentError(
610+
"There was a specific connection error when trying to load"
611+
f" {pretrained_model_name_or_path}:\n{err}"
612+
)
613+
except ValueError:
614+
raise EnvironmentError(
615+
f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it"
616+
f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a"
617+
f" directory containing a file named {weights_name} or"
618+
" \nCheckout your internet connection or see how to run the library in"
619+
" offline mode at 'https://huggingface.co/docs/diffusers/installation#offline-mode'."
620+
)
621+
except EnvironmentError:
622+
raise EnvironmentError(
623+
f"Can't load the model for '{pretrained_model_name_or_path}'. If you were trying to load it from "
624+
"'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
625+
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
626+
f"containing a file named {weights_name}"
627+
)
628+
534629
@classmethod
535630
def _load_pretrained_model(
536631
cls,

src/diffusers/pipeline_flax_utils.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from tqdm.auto import tqdm
3030

3131
from .configuration_utils import ConfigMixin
32-
from .hub_utils import http_user_agent
32+
from .hub_utils import http_user_agent, send_telemetry
3333
from .modeling_flax_utils import FLAX_WEIGHTS_NAME, FlaxModelMixin
3434
from .schedulers.scheduling_utils_flax import SCHEDULER_CONFIG_NAME, FlaxSchedulerMixin
3535
from .utils import CONFIG_NAME, DIFFUSERS_CACHE, BaseOutput, is_transformers_available, logging
@@ -346,8 +346,16 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
346346
ignore_patterns=ignore_patterns,
347347
user_agent=user_agent,
348348
)
349+
send_telemetry(
350+
{"pipeline_class": requested_pipeline_class, "pipeline_path": "hub", "framework": "flax"},
351+
name="diffusers_from_pretrained",
352+
)
349353
else:
350354
cached_folder = pretrained_model_name_or_path
355+
send_telemetry(
356+
{"pipeline_class": cls.__name__, "pipeline_path": "local", "framework": "flax"},
357+
name="diffusers_from_pretrained",
358+
)
351359

352360
config_dict = cls.load_config(cached_folder)
353361

src/diffusers/pipeline_utils.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
from .configuration_utils import ConfigMixin
3535
from .dynamic_modules_utils import get_class_from_dynamic_module
36-
from .hub_utils import http_user_agent
36+
from .hub_utils import http_user_agent, send_telemetry
3737
from .modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT
3838
from .schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
3939
from .utils import (
@@ -477,7 +477,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
477477
else:
478478
requested_pipeline_class = config_dict.get("_class_name", cls.__name__)
479479
user_agent = {"pipeline_class": requested_pipeline_class}
480-
if custom_pipeline is not None:
480+
if custom_pipeline is not None and not custom_pipeline.endswith(".py"):
481481
user_agent["custom_pipeline"] = custom_pipeline
482482

483483
user_agent = http_user_agent(user_agent)
@@ -504,8 +504,16 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
504504
ignore_patterns=ignore_patterns,
505505
user_agent=user_agent,
506506
)
507+
send_telemetry(
508+
{"pipeline_class": requested_pipeline_class, "pipeline_path": "hub", "framework": "pytorch"},
509+
name="diffusers_from_pretrained",
510+
)
507511
else:
508512
cached_folder = pretrained_model_name_or_path
513+
send_telemetry(
514+
{"pipeline_class": cls.__name__, "pipeline_path": "local", "framework": "pytorch"},
515+
name="diffusers_from_pretrained",
516+
)
509517

510518
config_dict = cls.load_config(cached_folder)
511519

0 commit comments

Comments
 (0)