|
1 | 1 |
|
2 |
| -import os |
3 | 2 | from dataclasses import dataclass
|
4 | 3 | from functools import partial
|
| 4 | +from urllib.parse import urljoin |
5 | 5 |
|
6 | 6 | from typing import Optional, Callable
|
7 | 7 | from torchtext._download_hooks import load_state_dict_from_url
|
@@ -100,19 +100,19 @@ def encoderConf(self) -> RobertaEncoderConf:
|
100 | 100 |
|
101 | 101 |
|
102 | 102 | XLMR_BASE_ENCODER = RobertaModelBundle(
|
103 |
| - _path=os.path.join(_TEXT_BUCKET, "xlmr.base.encoder.pt"), |
| 103 | + _path=urljoin(_TEXT_BUCKET, "xlmr.base.encoder.pt"), |
104 | 104 | _encoder_conf=RobertaEncoderConf(vocab_size=250002),
|
105 | 105 | transform=partial(get_xlmr_transform,
|
106 |
| - vocab_path=os.path.join(_TEXT_BUCKET, "xlmr.vocab.pt"), |
107 |
| - spm_model_path=os.path.join(_TEXT_BUCKET, "xlmr.sentencepiece.bpe.model"), |
| 106 | + vocab_path=urljoin(_TEXT_BUCKET, "xlmr.vocab.pt"), |
| 107 | + spm_model_path=urljoin(_TEXT_BUCKET, "xlmr.sentencepiece.bpe.model"), |
108 | 108 | )
|
109 | 109 | )
|
110 | 110 |
|
111 | 111 | XLMR_LARGE_ENCODER = RobertaModelBundle(
|
112 |
| - _path=os.path.join(_TEXT_BUCKET, "xlmr.large.encoder.pt"), |
| 112 | + _path=urljoin(_TEXT_BUCKET, "xlmr.large.encoder.pt"), |
113 | 113 | _encoder_conf=RobertaEncoderConf(vocab_size=250002, embedding_dim=1024, ffn_dimension=4096, num_attention_heads=16, num_encoder_layers=24),
|
114 | 114 | transform=partial(get_xlmr_transform,
|
115 |
| - vocab_path=os.path.join(_TEXT_BUCKET, "xlmr.vocab.pt"), |
116 |
| - spm_model_path=os.path.join(_TEXT_BUCKET, "xlmr.sentencepiece.bpe.model"), |
| 115 | + vocab_path=urljoin(_TEXT_BUCKET, "xlmr.vocab.pt"), |
| 116 | + spm_model_path=urljoin(_TEXT_BUCKET, "xlmr.sentencepiece.bpe.model"), |
117 | 117 | )
|
118 | 118 | )
|
0 commit comments