add embeddings, remove userId from business & commercialProperty replaced by email

This commit is contained in:
2024-07-13 19:44:07 +02:00
parent bf4bd69337
commit b7b34dacab
17 changed files with 123 additions and 763 deletions

View File

@@ -7,6 +7,7 @@ import { join } from 'path';
import pkg from 'pg';
import { rimraf } from 'rimraf';
import sharp from 'sharp';
import winston from 'winston';
import { BusinessListing, CommercialPropertyListing, User, UserData } from '../models/db.model.js';
import { emailToDirName, KeyValueStyle } from '../models/main.model.js';
import * as schema from './schema.js';
@@ -35,7 +36,9 @@ const connectionString = process.env.DATABASE_URL;
// const pool = new Pool({connectionString})
const client = new Pool({ connectionString });
const db = drizzle(client, { schema, logger: true });
const logger = winston.createLogger({
transports: [new winston.transports.Console()],
});
//Delete Content
await db.delete(schema.commercials);
await db.delete(schema.businesses);
@@ -59,7 +62,10 @@ deleteFilesOfDir(targetPathProperty);
fs.ensureDirSync(`./pictures/logo`);
fs.ensureDirSync(`./pictures/profile`);
fs.ensureDirSync(`./pictures/property`);
for (const userData of usersData) {
type UserProfile = Omit<User, 'created' | 'updated' | 'hasCompanyLogo' | 'hasProfile' | 'id'>;
//for (const userData of usersData) {
for (let index = 0; index < usersData.length; index++) {
const userData = usersData[index];
const user: User = { firstname: '', lastname: '', email: '' };
user.licensedIn = [];
userData.licensedIn.forEach(l => {
@@ -87,7 +93,22 @@ for (const userData of usersData) {
user.customerSubType = 'broker';
user.created = new Date();
user.updated = new Date();
const u = await db.insert(schema.users).values(user).returning({ insertedId: schema.users.id, gender: schema.users.gender, email: schema.users.email });
const createUserProfile = (user: User): UserProfile => {
const { id, created, updated, hasCompanyLogo, hasProfile, ...userProfile } = user;
return userProfile;
};
const userProfile = createUserProfile(user);
logger.info(`${index} - ${JSON.stringify(userProfile)}`);
const embedding = await createEmbedding(JSON.stringify(userProfile));
sleep(500);
const u = await db
.insert(schema.users)
.values({
...user,
embedding: embedding,
})
.returning({ insertedId: schema.users.id, gender: schema.users.gender, email: schema.users.email });
// const u = await db.insert(schema.users).values(user).returning({ insertedId: schema.users.id, gender: schema.users.gender, email: schema.users.email });
generatedUserData.push(u[0]);
i++;
@@ -113,7 +134,7 @@ for (const business of businessJsonData) {
business.created = new Date(business.created);
business.updated = new Date(business.created);
const user = getRandomItem(generatedUserData);
business.userId = user.insertedId;
business.email = user.email;
business.imageName = emailToDirName(user.email);
const embeddingText = JSON.stringify({
type: typesOfBusiness.find(b => b.value === String(business.type))?.name,
@@ -133,6 +154,7 @@ for (const business of businessJsonData) {
reasonForSale: business.reasonForSale,
});
const embedding = await createEmbedding(embeddingText);
sleep(300);
await db.insert(schema.businesses).values({
...business,
embedding: embedding,
@@ -151,10 +173,9 @@ for (const commercial of commercialJsonData) {
const insertionDate = getRandomDateWithinLastYear();
commercial.created = insertionDate;
commercial.updated = insertionDate;
commercial.userId = user.insertedId;
commercial.email = user.email;
commercial.draft = false;
const result = await db.insert(schema.commercials).values(commercial).returning();
//fs.ensureDirSync(`./pictures/property/${result[0].imagePath}/${result[0].serialId}`);
try {
fs.copySync(`./pictures_base/property/${id}`, `./pictures/property/${result[0].imagePath}/${result[0].serialId}`);
} catch (err) {
@@ -165,6 +186,9 @@ for (const commercial of commercialJsonData) {
//End
await client.end();
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function createEmbedding(text: string): Promise<number[]> {
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',