使用label-studio和YOLO识别验证码

安装label-studio

  • 依赖:pyproject.toml
    # python -m pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple
    [project]
    name = 'yolo-verification-code-training'
    version = '0.0.0'
    requires-python = '<3.13'
    dependencies = [
      # label-studio
      'label-studio==1.15.0',
      'psycopg[binary]==3.2.3',
      # yolo
      'ultralytics==8.3.51; python_version<"3.13"',
      'pillow==11.0.0',
      'playwright==1.49.1'
    ]
    
    [tool.distutils.egg_info]
    egg_base = "target"
    
  • 安装后启动: 安装时的环境变量,可以参考目录:
    .venv/Lib/site-packages/label_studio/core/settings/label_studio.py
    
    启动时的命令:
    .venv\Scripts\label-studio.exe start --init -db postgresql --username postgres --password postgres -p 13500 --data-dir .label-studio-data
    
    启动时不创建新的project,可以使用命令
    .venv\Scripts\label-studio.exe -db postgresql --username postgres --password postgres -p 13500 --data-dir .label-studio-data
    

安装YOLO

YOLO安装时要注意Python的版本,超过要求的版本是无法安装的。

编写label interface:

<View>
  <Image name="image_object" value="$image_url" />
    <RectangleLabels name="rectangles" toName="image_object" showInline="true">
      <Label value="top" background="red" />
      <Label value="right" background="yellow" />
      <Label value="bottom" background="blue"/>
      <Label value="left" background="green"/>
      <Label value="top-right" background="magenta" />
      <Label value="top-left" background="cyan" />
      <Label value="bottom-right" background="purple" />
      <Label value="bottom-left" background="orange" />
    </RectangleLabels>
</View>

转换

准备导出的文件

目录结构如下:
  • classes.txt
  • notes.json
  • images
    • test
    • train
    • val
  • labels
    • test
    • train
    • val
  • data.yaml
其中data.yaml如下配置:
# dataset path
train: D:/repo/project/datasets/yolo/v8-1/images/train
val: D:/repo/project/datasets/yolo/v8-1/images/val
test: D:/repo/project/datasets/yolo/v8-1/images/test

# number of classes
nc: 1

# class names
names: [
  'name1' # 对应notes.json中的id和name
]

准备验证码图片

使用playwright拦截和下载验证码图片。
import time
from playwright._impl._errors import Error as PlaywrightError
from playwright.sync_api import sync_playwright, Playwright, Response

def handle_route(response: Response):
    """
    监听响应,并下载图片
    """
    if 'static.geetest.com/captcha_v4/policy/' in response.request.url:
        name = response.request.url.split('/')[-1]
        with open(f'resources/original-yanzhengma-images/{name}', 'wb') as f:
            f.write(response.body())
    response.finished()

def run(playwright: Playwright):
    """
    使用无头浏览器下载图片
    """
    urllib = 'https://demo'
    chromium = playwright.chromium
    browser = chromium.launch(executable_path='C:/Program Files/Google/Chrome/Application/chrome.exe', headless=False, timeout=0)
    page = browser.new_page()
    page.on('response', handle_route)
    page.goto(urllib)
    # 输入并触发验证码
    page.wait_for_selector('#orgName').type('科技公司')
    page.wait_for_selector('.conditionLine .btn-primary').click()
    # 拦截并下载验证码
    index = 0
    while index < 100:
        print(f'下载第{index}张图片。')
        page.wait_for_selector('.geetest_bg')
        time.sleep(5)
        page.wait_for_selector('.geetest_refresh').click()
        index += 1
    browser.close()

try:
    with sync_playwright() as playwright:
        run(playwright)
except PlaywrightError as err:
    pass

将文件分组

python
from typing import TypedDict
import os
import json
from shutil import move
from settings import TrainSetting

class NoteJsonInfo(TypedDict):
    """ NoteJson['info'] """
    year: int
    version: str
    contributor: str


class NoteJsonCategoriesItem(TypedDict):
    """ NoteJson['categories'] """
    id: int
    name: str


class NoteJson(TypedDict):
    """ 定义notes.json的类型 """
    categories: list[NoteJsonCategoriesItem]
    info: NoteJsonInfo


train_datasets_yolo = '/train_datasets_yolo/file'
notes_json = '/train_datasets_yolo/file/notes.json'
train_yolo_yaml_path = '/train_datasets_yolo/file/data.yaml'


def delete_space(dir_name: str) -> None:
    """ 去掉文件中的尾空格 """
    dir1: str = train_datasets_yolo + '/' + dir_name
    file_list: list[str] = os.listdir(dir1)
    for file in file_list:
        with open(dir1 + '/' + file, encoding='utf-8') as f:
            text: str = f.read()
            new_text: list[str] = text.split('\n')
            new_text.remove('')
            with open(dir1 + '/' + file, mode='w+', encoding='utf-8') as f2:
                f2.write('\n'.join(new_text))


def copy_files_group() -> None:
    """
    对图片分组
    train 80%
    val   15%
    test  5%
    """
    images_dir: str = train_datasets_yolo + '/images/'
    labels_dir: str = train_datasets_yolo + '/labels/'
    file_list: list[str] = os.listdir(images_dir)
    # 文件分组
    for x in ['train', 'val', 'test']:
        os.mkdir(images_dir + x)
        os.mkdir(labels_dir + x)
    train_images_len: int = int(len(file_list) * 0.8)
    val_images_len: int = int(len(file_list) * 0.15) + train_images_len
    images_train_dir: str = images_dir + 'train/'
    images_val_dir: str = images_dir + 'val/'
    images_test_dir: str = images_dir + 'test/'
    labels_train_dir: str = labels_dir + 'train/'
    labels_val_dir: str = labels_dir + 'val/'
    labels_test_dir: str = labels_dir + 'test/'
    for index, file in enumerate(file_list):
        file_name: str = os.path.basename(file).split('.')[0]
        if index < train_images_len:
            move(images_dir + file , images_train_dir + file)
            move(labels_dir + file_name + '.txt' , labels_train_dir + file_name + '.txt')
        elif train_images_len <= index < val_images_len:
            move(images_dir + file , images_val_dir + file)
            move(labels_dir + file_name + '.txt' , labels_val_dir + file_name + '.txt')
        else:
            move(images_dir + file , images_test_dir + file)
            move(labels_dir + file_name + '.txt' , labels_test_dir + file_name + '.txt')


def write_yaml() -> None:
    """ 写入yaml文件 """
    notes_json: NoteJson = json.load(open(notes_json, encoding='utf-8'))
    textwrap: list[str] = []
    for category in notes_json['categories']:
        textwrap.append(f"'{category['name']}'")
    with open(train_yolo_yaml_path, mode='w+', encoding='utf-8') as f:
        f.write(f'train: {train_datasets_yolo}/images/train\n')
        f.write(f'val: {train_datasets_yolo}/images/val\n')
        f.write(f'test: {train_datasets_yolo}/images/test\n')
        f.write(f"nc: {len(notes_json['categories'])}\n")
        f.write(f"names: [{', '.join(textwrap)}]\n")


delete_space('labels')
copy_files_group()
write_yaml()

转换的代码

from ultralytics import YOLO

model = YOLO()
# https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/default.yaml
model.train(cfg='train.yaml', data='datasets/yolo/v8-1/data.yaml')

验证

from ultralytics import YOLO
from PIL import Image

model = YOLO('runs/detect/train/weights/best.pt')
results = model([
    'resources/images/2149ca73-3f174c60951d4ca1a6191cacd66b2154.jpg',
    'resources/images/cf311413-ef0bac7490474f09820a7aaf53741abf.jpg',
])

for i, r in enumerate(results):
    im_bgr = r.plot()
    im_rgb = Image.fromarray(im_bgr[..., ::-1])
    r.show()
    # r.save(filename=f'resources/result-images/results{i}.jpg')
    print(r.to_json())