Commit 55046fd4 authored by Arthur Sudbrack Ibarra's avatar Arthur Sudbrack Ibarra :grin:
Browse files

Merge branch 'E02-US02-extracao-chevrolet' into 'develop'

[E02-US02] Task 11 - Extração Tabela Modelos

See merge request veiculos-via-montadora/backend!6
parents 71737177 9d4f6a9f
Showing with 512 additions and 18 deletions
+512 -18
......@@ -23,7 +23,6 @@ variables:
# DB_ENVIRONMENT is set to test to run the tests in the pipeline.
DB_ENVIRONMENT: test
# Stages.
# The stages are used to organize the jobs.
# They run in the order they are defined.
......@@ -42,6 +41,9 @@ test:
stage: test
image: python:3.10.11-bullseye
script:
# Install Java 11 (Needed to use Tabula-Py).
- echo "[Installing Java 11 to use Tabula-Py]"
- apt update && apt install -y openjdk-11-jre openjdk-11-jdk
# Install Dependencies.
- echo "[Installing Poetry]"
- pip install "poetry==1.4.1"
......
# Using the official Python image.
# Python 3.8.0.
# Using the official Python 3.10.11 image for Debian Bullseye.
FROM python:3.10.11-bullseye
# Arguments for the build.
......@@ -7,7 +6,8 @@ FROM python:3.10.11-bullseye
# ENVIRONMENT: development, production.
# Default: development.
#
ARG ENVIRONMENT=development
# MONGODB_HOST: MongoDB connection string.
ARG ENVIRONMENT="development"
# Environment variables.
ENV ENVIRONMENT=${ENVIRONMENT} \
......@@ -17,12 +17,14 @@ ENV ENVIRONMENT=${ENVIRONMENT} \
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
POETRY_VERSION=1.4.1 \
MONGODB_HOST=<SECRET>
POETRY_VERSION=1.4.1
# Set the working directory.
WORKDIR /app
# Install Java 11 (Needed to use Tabula-Py).
RUN apt update && apt install -y openjdk-11-jre openjdk-11-jdk
# Install Poetry.
RUN pip install "poetry==$POETRY_VERSION"
......
......@@ -33,4 +33,6 @@ class Veiculo(BaseModel):
num_portas: str = ""
num_renavam: str = ""
status: str = "PENDENTE"
producao: str = ""
desc_vendas: str = ""
pdf_names: List[str] = []
from fastapi import APIRouter, File, UploadFile
from fastapi import APIRouter, File, Form, UploadFile
from app.api.models.veiculo import Veiculo
from app.database.mongo import get_database
from app.api.repositories.veiculo_repository import VeiculoRepository
......@@ -7,7 +7,8 @@ from app.api.services.veiculo_service import VeiculoService
# Car router.
#
# Here we define the routes for the car resource.
# We call the CarController to handle the requests.
# This router also acts as the controller for the car resource.
# It receives the requests, calls the service and returns the response.
# These variables start with an underscore to indicate that they are 'private'.
# They are not meant to be used outside of this file.
......@@ -17,6 +18,8 @@ _veiculo_service = VeiculoService(_repository)
_veiculo_router = APIRouter(prefix="/veiculos")
## Routes - START ##
@_veiculo_router.get("/")
def get_veiculos() -> list[Veiculo]:
return _veiculo_service.get_all()
......@@ -42,17 +45,14 @@ def delete_veiculo(sigla: str) -> str:
return _veiculo_service.delete(sigla)
# It contains a single endpoint that receives the PDF file.
@_veiculo_router.post("/upload/pdf")
def create_upload_file(form_data: UploadFile = File(...)):
contents = form_data.file.read() # This function reads the pdf bytes.
# Here we have the pdf bytes saved in the application memory.
# The ideia is to call a funtion which will handle the pdf bytes and extract them.
def create_veiculo_by_pdf(file: UploadFile = File(...), montadora: str = Form(...)):
pdf_bytes = file.file.read()
file_name = file.filename
return _veiculo_service.create_by_pdf(file_name, pdf_bytes, montadora)
# This is the file name in memory. It will be used to save the veiculo JSON in the database.
name = form_data.filename
return {"filename": name}
## Routes - END ##
# This function is used to get the car router.
......
from io import BytesIO
from fastapi import HTTPException
from app.api.repositories.veiculo_repository import VeiculoRepository
from app.api.models.veiculo import Veiculo
from app.pdf.chevrolet.ChevroletPDFReader import ChevroletPDFReader
CHEVROLET_MONTADORA = "chevrolet"
# Service class.
#
# This class is responsible for handling the business logic.
# It will call the repository layer to get the data and return the response.
# It will also handle any exceptions that are raised by the repository layer.
class VeiculoService:
def __init__(self, repository: VeiculoRepository):
self._repository = repository
......@@ -39,3 +45,92 @@ class VeiculoService:
raise HTTPException(
status_code=400, detail="Dado nao encontrado para deletar.")
return sigla
# This function will create a Veiculo object using the data read from a PDF file.
# It redirects the call to the correct function based on the 'montadora' parameter.
def create_by_pdf(self, file_name: str, pdf_bytes: bytes, montadora: str) -> Veiculo:
if str.lower(montadora) == CHEVROLET_MONTADORA:
return self._create_by_pdf_chevrolet(file_name, pdf_bytes)
else:
raise HTTPException(
status_code=400, detail="Montadora inválida.")
# This function will create a Veiculo object using the data read from a PDF file.
# It is specific for Chevrolet PDFs.
def _create_by_pdf_chevrolet(self, file_name: str, pdf_bytes: bytes) -> list[Veiculo]:
bytes_io = BytesIO(pdf_bytes)
# PDFs from 2023 tend to only work with the lattice mode on,
# while PDFs from 2022 tend to only work with the lattice mode off.
# This is for sure not a rule, it is what has been observed MOST of the time.
# This is also a TERRIBLE way to do this, but there is no good way to do this.
# Each PDF has its own way of being read, no patterns, and there is no way to know beforehand.
#
# From tabula-py documentation:
#
# "lattice: Force PDF to be extracted using lattice-mode extraction
# (if there are ruling lines separating each cell, as in a PDF of an Excel spreadsheet)"
lattice = True if '2023' in file_name else False
pdf_reader = ChevroletPDFReader(bytes_io, lattice=lattice)
# DEBUG: Print the PDF tables.
# pdf_reader.print_tables()
# Here we are reading the data from the first tables in the PDF.
vehicles_data = []
table_group = ChevroletPDFReader.INTRODUCTION_GROUP
for i in range(pdf_reader.get_tables_count(table_group)):
for j in range(pdf_reader.get_lines_count(table_group, i)):
line_data = pdf_reader.get_line_values(table_group, i, j, {
# Data of the column with name 85% similar to 'CÓDIGO VENDAS' will be stored in the 'codigo_vendas' key.
("CÓDIGO VENDAS", 85): "sigla",
("DESCRIÇÃO VENDAS", 85): "desc_vendas",
("MARCA/MODELO", 50): "num_renavam",
("DESCRIÇÃO NO CAT", 85): "desc_cat",
("PRODUÇÃO", 85): "producao",
})
vehicles_data.append(line_data)
# List of vehicles to be returned by the API.
vehicles = []
for vehicle_dict in vehicles_data:
# First we try to find the vehicle in the database using the 'sigla' as the search key.
sigla = vehicle_dict["sigla"]
vehicle_found = None
try:
vehicle_found = self.get_by_sigla(sigla)
# Ok, found!
# We will update the vehicle below.
except:
# Not found, no problem.
# We will create the vehicle below.
pass
# Setting the data from the PDF to the Veiculo object.
desc_cat = vehicle_dict["desc_cat"]
num_renavam = vehicle_dict["num_renavam"]
producao = vehicle_dict["producao"]
desc_vendas = vehicle_dict["desc_vendas"]
vehicle = Veiculo(sigla=sigla, desc_cat=desc_cat,
num_renavam=num_renavam, producao=producao, desc_vendas=desc_vendas)
vehicle.status = "PROCESSADO"
# If the vehicle was not found, we create a new one.
# If the vehicle was found, we update it.
if vehicle_found is None:
vehicle.pdf_names = [file_name]
new_vehicle = self.create(vehicle)
vehicles.append(new_vehicle)
else:
vehicle.pdf_names = vehicle_found.pdf_names + [file_name]
updated_vehicle = self.update(sigla, vehicle)
vehicles.append(updated_vehicle)
# Returning the list of vehicles.
return vehicles
# This function will create a Veiculo object using the data read from a PDF file.
# It is specific for Jeep PDFs.
# (NOT IMPLEMENTED YET)
def _create_by_pdf_jeep(self, file_name: str, pdf_bytes: bytes) -> Veiculo:
pass
import tabula
from fuzzywuzzy import fuzz
from pandas import DataFrame
from typing import Union, List, Dict, Tuple
from io import BytesIO
from app.pdf.exceptions import *
from os import name as os_name
# This class reads a PDF file and separates the tables into groups.
# The tables are stored in a dictionary where the key is the table group name.
# The value is a list of dataframes.
#
# The table groups are:
# Introduction
# Configuration
# Specification
# Accessories
# Unknown
#
# This class offers methods so that manipulations with tabula-py are abstracted away.
class ChevroletPDFReader:
# Possible table groups.
INTRODUCTION_GROUP = 'Introduction'
CONFIGURATION_GROUP = 'Configuration'
CONFIGURATION_GROUP_2 = 'Configuration 2'
SPECIFICATION_GROUP = 'Specification'
ACCESSORIES_1_GROUP = 'Accessories'
ACCESSORIES_2_GROUP = 'Accessories 2'
UNKNOWN_GROUP = 'Unknown'
# Specification table group and unknown group are not added here because they are special cases.
_TABLE_GROUP_NAMES = [INTRODUCTION_GROUP, CONFIGURATION_GROUP, CONFIGURATION_GROUP_2,
ACCESSORIES_1_GROUP, ACCESSORIES_2_GROUP]
def __init__(self, pdf_bytes: BytesIO, lattice: bool = True, fuzzy_matching_ratio_threshold: int = 75):
# Check the OS, if it is Windows, then use ANSI encoding.
# Otherwise, use UTF-8.
encoding = 'ANSI' if os_name == 'nt' else 'utf-8'
# Read all tables from the PDF file.
dataframes = tabula.read_pdf(
pdf_bytes, pages='all', lattice=lattice, multiple_tables=True, encoding=encoding)
# Set the fuzzy matching ratio threshold.
# This is used to match column names and line names.
self._fuzzy_matching_ratio_threshold = fuzzy_matching_ratio_threshold
# Call the initial setup method.
self._initial_setup(dataframes)
# This method separates the tables into groups and sanitizes the dataframes.
def _initial_setup(self, dataframes: List[DataFrame]) -> None:
# Map of tables by group.
self._tables_by_group: Dict[str, List[DataFrame]] = {}
# Initialize the map with empty lists.
for table_group in self._TABLE_GROUP_NAMES:
self._tables_by_group[table_group] = []
self._tables_by_group[ChevroletPDFReader.SPECIFICATION_GROUP] = []
self._tables_by_group[ChevroletPDFReader.UNKNOWN_GROUP] = []
# Variable to keep track of the current table group.
current_table_group_index = 0
# Removing new lines from column names and removing unnamed columns.
# Also removing empty dataframes.
for dataframe in dataframes:
table_group = self._TABLE_GROUP_NAMES[current_table_group_index] if current_table_group_index < len(
self._TABLE_GROUP_NAMES) else ChevroletPDFReader.UNKNOWN_GROUP
# Don't consider empty tables.
if dataframe.empty or len(dataframe.columns) == 0:
continue
for column in dataframe.columns:
# Transform all column data to string to avoid errors found.
dataframe[column] = dataframe[column].astype(str)
# Remove new lines from column names.
dataframe.rename(
columns={column: column.replace('\r', ' ')}, inplace=True)
# Remove unnamed columns.
if 'unnamed' in str.lower(column):
del dataframe[column]
# Don't consider tables with only one column.
# Unless it is a technical specifications table.
if len(dataframe.columns) == 1:
# Check if it is a technical specifications table.
# (this table is weird and must be handled separately).
# Get the only column name.
column_name = dataframe.columns[0]
# Check if the column name is similar to "Especificações Técnicas".
ratio = fuzz.ratio(str.lower(column_name),
'especificações técnicas')
if ratio < 50:
continue
# If it is, then it is a technical specifications table.
self._tables_by_group[ChevroletPDFReader.SPECIFICATION_GROUP].append(
dataframe)
continue
# Is the current table of the same group as the previous one?
# Or is it a completely new group?
if len(self._tables_by_group[table_group]) == 0:
self._tables_by_group[table_group].append(dataframe)
else:
# Check if the current table is of the same group as the previous one.
# To do that, check if the number of columns is the same and if the columns are the same.
previous_table = self._tables_by_group[table_group][-1]
if len(previous_table.columns) != len(dataframe.columns):
current_table_group_index += 1
else:
# Check if all columns are the same.
same_columns = True
for column in previous_table.columns:
if column not in dataframe.columns:
same_columns = False
break
if not same_columns:
# Increment the current table group index.
# This is to change the current table group from now on.
current_table_group_index += 1
# Append the table to the current table group.
table_group = self._TABLE_GROUP_NAMES[current_table_group_index] if current_table_group_index < len(
self._TABLE_GROUP_NAMES) else ChevroletPDFReader.UNKNOWN_GROUP
self._tables_by_group[table_group].append(dataframe)
# This is a very complex method, which will be explained below:
#
# This method basically returns the value of a cell in a table.
# However, this method is very flexible and can be used in many ways:
#
# Two parameters are always the same:
#
# table_group: str => The table group name.
# table_index: int => The table index (position) in the table group.
#
# The last two parameters can be used in two ways:
#
# column_index_or_name: int OR str => The column index or the column name.
# line_number_or_name: int OR Tuple[int, str] => The line number or a tuple (column_number, line_name).
#
# The column_index_or_name parameter can be an integer or a string. If it is an integer, it will be used as the
# column index. If it is a string, it will be used as the column name. Column names are matched using fuzzywuzzy
# (fuzzy string matching).
#
# The line_number_or_name parameter can be an integer or a tuple (column_number, line_name). If it is an integer,
# it will be used as the line number. If it is a tuple, it will be used as a tuple (column_number, line_name).
# Line names are also matched using fuzzywuzzy.
def get_cell_value(self,
table_group: str,
table_index: int,
column_index_or_name: Union[int, str],
line_number_or_name: Union[int, Tuple[int, str]]) -> str:
# Raise an exception if the table group is not found.
if table_group not in self._tables_by_group:
raise TableGroupNotFoundException(
f'Table group "{table_group}" not found.')
# Raise an exception if the table index is out of range.
if table_index >= len(self._tables_by_group[table_group]):
raise TableIndexOutOfBoundsException(
f'Table index "{table_index}" out of range for table group "{table_group}".')
table = self._tables_by_group[table_group][table_index]
# Which column to use?
# Should we use the column name or the column index?
# It depends on the type of the column_index_or_name parameter (int or str).
column_index = -1
# If using column name...
if isinstance(column_index_or_name, str):
# Using fuzzywuzzy to find the most similar column name.
most_similar_column_index = -1
most_similar_column_ratio = -1
for index, name in enumerate(table.columns):
ratio = fuzz.ratio(
str.lower(name), str.lower(column_index_or_name))
if ratio >= self._fuzzy_matching_ratio_threshold and ratio > most_similar_column_ratio:
most_similar_column_index = index
most_similar_column_ratio = ratio
column_index = most_similar_column_index
# If using column index...
elif isinstance(column_index_or_name, int):
column_index = column_index_or_name
else:
# Raise an exception if the column index or name is of an invalid type.
raise InvalidArgumentTypeException(
f'Invalid argument type for column_index_or_name: {type(column_index_or_name)}. Expected int or str.')
# Ok, now we have the column index.
# Check if column index is still -1.
# If so, that means that the column name was not found.
if column_index == -1:
raise ColumnNotFoundException(
f'Column with name "{column_index_or_name}" or similar not found for table group "{table_group}" and table index "{table_index}".')
# Check if the column index is out of range.
if column_index >= len(table.columns):
raise ColumnIndexOutOfBoundsException(
f'Column index "{column_index}" out of range for table group "{table_group}", table index "{table_index}" and column index "{column_index_or_name}".')
# Now, should we use the line number or a tuple (column_number, line_name)?
# It depends on the type of the line_number_or_name parameter (int or Tuple[int, str]).
# If using line number...
if isinstance(line_number_or_name, int):
# Raise an exception if the line number is out of range.
if line_number_or_name >= len(table):
raise LineIndexOutOfBoundsException(
f'Line index "{line_number_or_name}" out of range for table group "{table_group}" and table index "{table_index}".')
return table.iloc[line_number_or_name, column_index]
# If using tuple (column_number, line_name)...
elif isinstance(line_number_or_name, Tuple):
# Using fuzzywuzzy to find the most similar line name.
most_similar_line_index = -1
most_similar_line_ratio = -1
column_number = line_number_or_name[0]
line_name = line_number_or_name[1]
for index, name in enumerate(table.iloc[:, column_number]):
ratio = fuzz.ratio(str.lower(name), str.lower(line_name))
if ratio >= self._fuzzy_matching_ratio_threshold and ratio > most_similar_line_ratio:
most_similar_line_index = index
most_similar_line_ratio = ratio
# Raise an exception if the line name was not found.
if most_similar_line_ratio == -1:
raise LineNotFoundException(
f'Line with name "{line_name}" or similar not found for table group "{table_group}", table index "{table_index}" and column index "{column_index}".')
return table.iloc[most_similar_line_index, column_index]
else:
# Raise an exception if the line number or name is of an invalid type.
raise InvalidArgumentTypeException(
f'Invalid argument type for line_number_or_name: {type(line_number_or_name)}. Expected int or tuple (column_number, line_name).')
# This function will return all the data from one line of a table.
# This method also needs the table group name and the table index.
#
# The line number is the line index (position) in the table.
#
# The column_similarity_rules is an optional parameter that can be used to mold how
# the dictionary data will be returned:
#
# Example:
#
# column_similarity_rules = {
# ('CODIGO DE VENDAS', 80): 'cod_vendas', # Using fuzzy matching with a ratio of 80.
# ('MARCA/MODELO', 85): 'brand', # Using fuzzy matching with a ratio of 85.
# }
#
# Through string fuzzy matching, the column names will be compared to the keys of the
# column_similarity_rules dictionary. When matches are found, the value of the dictionary
# will be used as the key of the returned dictionary, as below:
#
# {
# 'cod_vendas': '123456',
# 'brand': 'FORD FOCUS',
# }
def get_line_values(self,
table_group: str,
table_index: int,
line_number: int,
column_similarity_rules: Union[Dict[Tuple[str, int], str], None] = None) -> Dict[str, str]:
# Raise an exception if the table group is not found.
if table_group not in self._tables_by_group:
raise TableGroupNotFoundException(
f'Table group "{table_group}" not found.')
# Raise an exception if the table index is out of range.
if table_index >= len(self._tables_by_group[table_group]):
raise TableIndexOutOfBoundsException(
f'Table index "{table_index}" out of range for table group "{table_group}".')
table = self._tables_by_group[table_group][table_index]
# Raise an exception if the line number is out of range.
if line_number >= len(table):
raise LineIndexOutOfBoundsException(
f'Line index "{line_number}" out of range for table group "{table_group}" and table index "{table_index}".')
line_data = {}
for column_name, column_value in table.iloc[line_number].items():
if column_similarity_rules is not None:
# If using column similarity rules, use the rule key as the key of the returned dictionary.
for column_name_ratio_tuple_rule, column_key_rule in column_similarity_rules.items():
column_name_rule = column_name_ratio_tuple_rule[0]
column_ratio_rule = column_name_ratio_tuple_rule[1]
ratio = fuzz.ratio(
str.lower(column_name), str.lower(column_name_rule))
if ratio >= column_ratio_rule:
line_data[column_key_rule] = column_value
break
else:
# If not using column similarity rules, just use the column name as the key.
line_data[column_name] = column_value
return line_data
# This function returns the number of tables in a table group.
def get_tables_count(self, table_group: str) -> int:
# Raise an exception if the table group is not found.
if table_group not in self._tables_by_group:
raise TableGroupNotFoundException(
f'Table group "{table_group}" not found.')
return len(self._tables_by_group[table_group])
# This function returns the number of columns in a table.
def get_columns_count(self, table_group: str, table_index: int) -> int:
# Raise an exception if the table group is not found.
if table_group not in self._tables_by_group:
raise TableGroupNotFoundException(
f'Table group "{table_group}" not found.')
# Raise an exception if the table index is out of range.
if table_index >= len(self._tables_by_group[table_group]):
raise TableIndexOutOfBoundsException(
f'Table index "{table_index}" out of range for table group "{table_group}".')
return len(self._tables_by_group[table_group][table_index].columns)
# This function returns the number of lines in a table.
def get_lines_count(self, table_group: str, table_index: int) -> int:
# Raise an exception if the table group is not found.
if table_group not in self._tables_by_group:
raise TableGroupNotFoundException(
f'Table group "{table_group}" not found.')
# Raise an exception if the table index is out of range.
if table_index >= len(self._tables_by_group[table_group]):
raise TableIndexOutOfBoundsException(
f'Table index "{table_index}" out of range for table group "{table_group}".')
return len(self._tables_by_group[table_group][table_index])
# Only for debugging purposes.
# Prints all the tables in the reader.
def print_tables(self):
for group_name, tables in self._tables_by_group.items():
print(f"Group: {group_name}")
for index, table in enumerate(tables):
print(f"Table {index}:")
print(table)
print('')
# ======================================================================================================================
# Demo usage, uncomment to test.
# # Instantiate the reader.
# reader = ChevroletPDFReader('carros.pdf')
# # Example 1: Get the value of the cell in the 'Marca/Modelo' column in the first line of the first table in the
# # 'Introduction' group.
# value_1 = reader.get_cell_value(
# ChevroletPDFReader.INTRODUCTION_GROUP, 0, 'Marca/Modelo', 0)
# print(f"Value 1: {value_1}")
# # Example 2: Get the value of the cell in the first column, in the first line of the second table in the
# # 'Introduction' group.
# value_2 = reader.get_cell_value(
# ChevroletPDFReader.INTRODUCTION_GROUP, 1, 0, 0)
# print(f"Value 2: {value_2}")
# value_3 = reader.get_cell_value(
# ChevroletPDFReader.CONFIGURATION_GROUP, 0, 0, 0)
# print(f"Value 3: {value_3}")
# # Using the first configuration table, check if the 'LT Turbo 116cv' configuration has the 'Brake Light' option.
# # This is done by checking if the value of the cell in the 'LT Turbo 116cv' column in the 'Brake Light' line is
# # a X or not.
# value_4 = reader.get_cell_value(
# ChevroletPDFReader.CONFIGURATION_GROUP, 0, 'LT Turbo 116cv', (0, 'Brake Light'))
# print(f"Value 4: {value_4}")
# Custom exceptions classes - START.
class TableGroupNotFoundException(Exception):
pass
class TableIndexOutOfBoundsException(Exception):
pass
class ColumnIndexOutOfBoundsException(Exception):
pass
class LineIndexOutOfBoundsException(Exception):
pass
class ColumnNotFoundException(Exception):
pass
class LineNotFoundException(Exception):
pass
class InvalidArgumentTypeException(Exception):
pass
# Custom exceptions classes - END.
......@@ -24,7 +24,7 @@ services:
- ENVIRONMENT=production
container_name: veiculos-via-montadora-api
environment:
- MONGODB_HOST=<SECRET>
- MONGODB_HOST=mongodb://mongo_user:mongo_password@host.docker.internal:27017
ports:
- "8000:8000"
depends_on:
......
......@@ -165,6 +165,21 @@ dev = ["pre-commit (>=2.17.0,<3.0.0)", "ruff (==0.0.138)", "uvicorn[standard] (>
doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "typer-cli (>=0.0.13,<0.0.14)", "typer[all] (>=0.6.1,<0.8.0)"]
test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6.5.0,<8.0)", "databases[sqlite] (>=0.3.2,<0.7.0)", "email-validator (>=1.1.1,<2.0.0)", "flask (>=1.1.2,<3.0.0)", "httpx (>=0.23.0,<0.24.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.982)", "orjson (>=3.2.1,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "peewee (>=3.13.3,<4.0.0)", "pytest (>=7.1.3,<8.0.0)", "python-jose[cryptography] (>=3.3.0,<4.0.0)", "python-multipart (>=0.0.5,<0.0.7)", "pyyaml (>=5.3.1,<7.0.0)", "ruff (==0.0.138)", "sqlalchemy (>=1.3.18,<1.4.43)", "types-orjson (==3.6.2)", "types-ujson (==5.7.0.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)"]
[[package]]
name = "fuzzywuzzy"
version = "0.18.0"
description = "Fuzzy string matching in python"
category = "main"
optional = false
python-versions = "*"
files = [
{file = "fuzzywuzzy-0.18.0-py2.py3-none-any.whl", hash = "sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993"},
{file = "fuzzywuzzy-0.18.0.tar.gz", hash = "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8"},
]
[package.extras]
speedup = ["python-levenshtein (>=0.12)"]
[[package]]
name = "h11"
version = "0.14.0"
......@@ -759,4 +774,4 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)",
[metadata]
lock-version = "2.0"
python-versions = "^3.9.0"
content-hash = "89e974075321546e7841055b5ac967f55304124a2276439cc602e7252ade0f3f"
content-hash = "18d643ac7f84e4997e115ecea512929d8bb392c08232ba092661377077fe82db"
......@@ -15,6 +15,7 @@ pymongo = "4.3.3"
tabula-py = "2.7.0"
httpx = "0.23.3"
python-multipart = "0.0.6"
fuzzywuzzy = "0.18.0"
[tool.poetry.group.dev.dependencies]
autopep8 = "2.0.2"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment