Skip to content

Commit a3efab8

Browse files
authored
Merge pull request doccano#1206 from doccano/feature/auto-labeling
[Feature] Auto Labeling Implementation
2 parents 8d6d41f + c60dbd9 commit a3efab8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+3902
-587
lines changed

Pipfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ pyexcel-xlsx = "*"
4343
lockfile = "*"
4444
gunicorn = "*"
4545
fasteners = "*"
46+
auto-labeling-pipeline = "*"
4647

4748
[requires]
4849
python_version = "3.8"

Pipfile.lock

Lines changed: 374 additions & 274 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

app/api/exceptions.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from rest_framework import status
2-
from rest_framework.exceptions import APIException
2+
from rest_framework.exceptions import APIException, PermissionDenied, ValidationError
33

44

55
class FileParseException(APIException):
@@ -10,3 +10,26 @@ class FileParseException(APIException):
1010
def __init__(self, line_num, line, code=None):
1111
detail = self.default_detail.format(line_num, line)
1212
super().__init__(detail, code)
13+
14+
15+
class AutoLabelingException(APIException):
16+
status_code = status.HTTP_400_BAD_REQUEST
17+
default_detail = 'Auto labeling not allowed for the document with labels.'
18+
19+
20+
class AutoLabeliingPermissionDenied(PermissionDenied):
21+
default_detail = 'You do not have permission to perform auto labeling.' \
22+
'Please ask the project administrators to add you.'
23+
24+
25+
class URLConnectionError(ValidationError):
26+
default_detail = 'Failed to establish a connection. Please check the URL or network.'
27+
28+
29+
class AWSTokenError(ValidationError):
30+
default_detail = 'The security token included in the request is invalid.'
31+
32+
33+
class SampleDataException(ValidationError):
34+
default_detail = 'The response is empty. Maybe the sample data is not appropriate.' \
35+
'Please specify another sample data which returns at least one label.'
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Generated by Django 3.1.5 on 2021-02-01 06:03
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
('api', '0004_merge_20210114_1117'),
11+
]
12+
13+
operations = [
14+
migrations.AlterField(
15+
model_name='label',
16+
name='suffix_key',
17+
field=models.CharField(blank=True, choices=[('0', '0'), ('1', '1'), ('2', '2'), ('3', '3'), ('4', '4'), ('5', '5'), ('6', '6'), ('7', '7'), ('8', '8'), ('9', '9'), ('a', 'a'), ('b', 'b'), ('c', 'c'), ('d', 'd'), ('e', 'e'), ('f', 'f'), ('g', 'g'), ('h', 'h'), ('i', 'i'), ('j', 'j'), ('k', 'k'), ('l', 'l'), ('m', 'm'), ('n', 'n'), ('o', 'o'), ('p', 'p'), ('q', 'q'), ('r', 'r'), ('s', 's'), ('t', 't'), ('u', 'u'), ('v', 'v'), ('w', 'w'), ('x', 'x'), ('y', 'y'), ('z', 'z')], max_length=1, null=True),
18+
),
19+
migrations.CreateModel(
20+
name='AutoLabelingConfig',
21+
fields=[
22+
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
23+
('model_name', models.CharField(max_length=100)),
24+
('model_attrs', models.JSONField(default=dict)),
25+
('template', models.TextField(default='')),
26+
('label_mapping', models.JSONField(default=dict)),
27+
('default', models.BooleanField(default=False)),
28+
('created_at', models.DateTimeField(auto_now_add=True)),
29+
('updated_at', models.DateTimeField(auto_now=True)),
30+
('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='auto_labeling_config', to='api.project')),
31+
],
32+
),
33+
]

app/api/models.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import string
22

3+
from auto_labeling_pipeline.models import RequestModelFactory
34
from django.db import models
45
from django.dispatch import receiver
56
from django.db.models.signals import post_save, pre_delete
@@ -343,3 +344,26 @@ def delete_linked_project(sender, instance, using, **kwargs):
343344
project = Project.objects.get(pk=projectInstance.pk)
344345
user.projects.remove(project)
345346
user.save()
347+
348+
349+
class AutoLabelingConfig(models.Model):
350+
model_name = models.CharField(max_length=100)
351+
model_attrs = models.JSONField(default=dict)
352+
template = models.TextField(default='')
353+
label_mapping = models.JSONField(default=dict)
354+
project = models.ForeignKey(Project, related_name='auto_labeling_config', on_delete=models.CASCADE)
355+
default = models.BooleanField(default=False)
356+
created_at = models.DateTimeField(auto_now_add=True)
357+
updated_at = models.DateTimeField(auto_now=True)
358+
359+
def __str__(self):
360+
return self.model_name
361+
362+
def clean_fields(self, exclude=None):
363+
super().clean_fields(exclude=exclude)
364+
try:
365+
RequestModelFactory.find(self.model_name)
366+
except NameError:
367+
raise ValidationError(f'The specified model name {self.model_name} does not exist.')
368+
except Exception:
369+
raise ValidationError('The attributes does not match the model.')

app/api/serializers.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from auto_labeling_pipeline.models import RequestModelFactory
12
from django.conf import settings
23
from django.contrib.auth import get_user_model
34
from django.shortcuts import get_object_or_404
@@ -6,7 +7,7 @@
67
from rest_framework.exceptions import ValidationError
78

89

9-
from .models import Label, Project, Document, RoleMapping, Role, Comment
10+
from .models import Label, Project, Document, RoleMapping, Role, Comment, AutoLabelingConfig
1011
from .models import TextClassificationProject, SequenceLabelingProject, Seq2seqProject, Speech2textProject
1112
from .models import DocumentAnnotation, SequenceAnnotation, Seq2seqAnnotation, Speech2textAnnotation
1213

@@ -241,3 +242,37 @@ def get_rolename(cls, instance):
241242
class Meta:
242243
model = RoleMapping
243244
fields = ('id', 'user', 'role', 'username', 'rolename')
245+
246+
247+
class AutoLabelingConfigSerializer(serializers.ModelSerializer):
248+
249+
class Meta:
250+
model = AutoLabelingConfig
251+
fields = ('id', 'model_name', 'model_attrs', 'template', 'label_mapping', 'default')
252+
read_only_fields = ('created_at', 'updated_at')
253+
254+
def validate_model_name(self, value):
255+
try:
256+
RequestModelFactory.find(value)
257+
except NameError:
258+
raise serializers.ValidationError(f'The specified model name {value} does not exist.')
259+
return value
260+
261+
def valid_label_mapping(self, value):
262+
if isinstance(value, dict):
263+
return value
264+
else:
265+
raise serializers.ValidationError(f'The {value} is not a dictionary. Please specify it as a dictionary.')
266+
267+
def validate(self, data):
268+
try:
269+
RequestModelFactory.create(data['model_name'], data['model_attrs'])
270+
except Exception:
271+
model = RequestModelFactory.find(data['model_name'])
272+
schema = model.schema()
273+
required_fields = ', '.join(schema['required']) if 'required' in schema else ''
274+
raise serializers.ValidationError(
275+
'The attributes does not match the model.'
276+
'You need to correctly specify the required fields: {}'.format(required_fields)
277+
)
278+
return data

app/api/urls.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
from .views import TextUploadAPI, TextDownloadAPI, CloudUploadAPI
1212
from .views import StatisticsAPI
1313
from .views import RoleMappingList, RoleMappingDetail, Roles
14+
from .views import AutoLabelingTemplateListAPI, AutoLabelingTemplateDetailAPI
15+
from .views import AutoLabelingConfigList, AutoLabelingConfigDetail, AutoLabelingConfigTest, AutoLabelingAnnotation
16+
from .views import AutoLabelingConfigParameterTest, AutoLabelingTemplateTest, AutoLabelingMappingTest
1417

1518
urlpatterns = [
1619
path('health', Health.as_view(), name='health'),
@@ -52,6 +55,51 @@
5255
RoleMappingList.as_view(), name='rolemapping_list'),
5356
path('projects/<int:project_id>/roles/<int:rolemapping_id>',
5457
RoleMappingDetail.as_view(), name='rolemapping_detail'),
58+
path(
59+
route='projects/<int:project_id>/auto-labeling-templates',
60+
view=AutoLabelingTemplateListAPI.as_view(),
61+
name='auto_labeling_templates'
62+
),
63+
path(
64+
route='projects/<int:project_id>/auto-labeling-templates/<str:option_name>',
65+
view=AutoLabelingTemplateDetailAPI.as_view(),
66+
name='auto_labeling_template'
67+
),
68+
path(
69+
route='projects/<int:project_id>/auto-labeling-configs',
70+
view=AutoLabelingConfigList.as_view(),
71+
name='auto_labeling_configs'
72+
),
73+
path(
74+
route='projects/<int:project_id>/auto-labeling-configs/<int:config_id>',
75+
view=AutoLabelingConfigDetail.as_view(),
76+
name='auto_labeling_config'
77+
),
78+
path(
79+
route='projects/<int:project_id>/auto-labeling-config-testing',
80+
view=AutoLabelingConfigTest.as_view(),
81+
name='auto_labeling_config_test'
82+
),
83+
path(
84+
route='projects/<int:project_id>/docs/<int:doc_id>/auto-labeling',
85+
view=AutoLabelingAnnotation.as_view(),
86+
name='auto_labeling_annotation'
87+
),
88+
path(
89+
route='auto-labeling-parameter-testing',
90+
view=AutoLabelingConfigParameterTest.as_view(),
91+
name='auto_labeling_parameter_testing'
92+
),
93+
path(
94+
route='projects/<int:project_id>/auto-labeling-template-testing',
95+
view=AutoLabelingTemplateTest.as_view(),
96+
name='auto_labeling_template_test'
97+
),
98+
path(
99+
route='projects/<int:project_id>/auto-labeling-mapping-testing',
100+
view=AutoLabelingMappingTest.as_view(),
101+
name='auto_labeling_mapping_test'
102+
)
55103
]
56104

57105
# urlpatterns = format_suffix_patterns(urlpatterns, allowed=['json', 'xml'])

0 commit comments

Comments
 (0)