changes to imports & import Embeddings

This commit is contained in:
2024-07-12 22:00:29 +02:00
parent b4644ea295
commit bf4bd69337
20 changed files with 751 additions and 140 deletions

View File

@@ -1,15 +1,30 @@
import 'dotenv/config';
import { drizzle } from 'drizzle-orm/node-postgres';
import { existsSync, readFileSync, readdirSync, statSync, unlinkSync } from 'fs';
import { existsSync, readdirSync, readFileSync, statSync, unlinkSync } from 'fs';
import fs from 'fs-extra';
import OpenAI from 'openai';
import { join } from 'path';
import pkg from 'pg';
import { rimraf } from 'rimraf';
import sharp from 'sharp';
import { BusinessListing, CommercialPropertyListing, User, UserData } from 'src/models/db.model.js';
import { emailToDirName } from 'src/models/main.model.js';
import { BusinessListing, CommercialPropertyListing, User, UserData } from '../models/db.model.js';
import { emailToDirName, KeyValueStyle } from '../models/main.model.js';
import * as schema from './schema.js';
const typesOfBusiness: Array<KeyValueStyle> = [
{ name: 'Automotive', value: '1', icon: 'fa-solid fa-car', textColorClass: 'text-green-400' },
{ name: 'Industrial Services', value: '2', icon: 'fa-solid fa-industry', textColorClass: 'text-yellow-400' },
{ name: 'Real Estate', value: '3', icon: 'fa-solid fa-building', textColorClass: 'text-blue-400' },
{ name: 'Uncategorized', value: '4', icon: 'fa-solid fa-question', textColorClass: 'text-cyan-400' },
{ name: 'Retail', value: '5', icon: 'fa-solid fa-money-bill-wave', textColorClass: 'text-pink-400' },
{ name: 'Oilfield SVE and MFG.', value: '6', icon: 'fa-solid fa-oil-well', textColorClass: 'text-indigo-400' },
{ name: 'Service', value: '7', icon: 'fa-solid fa-umbrella', textColorClass: 'text-teal-400' },
{ name: 'Advertising', value: '8', icon: 'fa-solid fa-rectangle-ad', textColorClass: 'text-orange-400' },
{ name: 'Agriculture', value: '9', icon: 'fa-solid fa-wheat-awn', textColorClass: 'text-sky-400' },
{ name: 'Franchise', value: '10', icon: 'fa-solid fa-star', textColorClass: 'text-purple-400' },
{ name: 'Professional', value: '11', icon: 'fa-solid fa-user-gear', textColorClass: 'text-gray-400' },
{ name: 'Manufacturing', value: '12', icon: 'fa-solid fa-industry', textColorClass: 'text-red-400' },
{ name: 'Food and Restaurant', value: '13', icon: 'fa-solid fa-utensils', textColorClass: 'text-amber-700' },
];
const { Pool } = pkg;
const openai = new OpenAI({
@@ -88,11 +103,11 @@ for (const userData of usersData) {
const data = readFileSync(`./pictures_base/logo/${i}.jpg`);
await storeCompanyLogo(data, emailToDirName(u[0].email));
}
//Business Listings
filePath = `./data/businesses.json`;
data = readFileSync(filePath, 'utf8');
const businessJsonData = JSON.parse(data) as BusinessListing[]; // Erwartet ein Array von Objekten
for (const business of businessJsonData) {
delete business.id;
business.created = new Date(business.created);
@@ -100,7 +115,28 @@ for (const business of businessJsonData) {
const user = getRandomItem(generatedUserData);
business.userId = user.insertedId;
business.imageName = emailToDirName(user.email);
await db.insert(schema.businesses).values(business);
const embeddingText = JSON.stringify({
type: typesOfBusiness.find(b => b.value === String(business.type))?.name,
title: business.title,
description: business.description,
city: business.city,
state: business.state,
price: business.price,
realEstateIncluded: business.realEstateIncluded,
leasedLocation: business.leasedLocation,
franchiseResale: business.franchiseResale,
salesRevenue: business.salesRevenue,
cashFlow: business.cashFlow,
supportAndTraining: business.supportAndTraining,
employees: business.employees,
established: business.established,
reasonForSale: business.reasonForSale,
});
const embedding = await createEmbedding(embeddingText);
await db.insert(schema.businesses).values({
...business,
embedding: embedding,
});
}
//Corporate Listings
filePath = `./data/commercials.json`;
@@ -131,7 +167,7 @@ await client.end();
async function createEmbedding(text: string): Promise<number[]> {
const response = await openai.embeddings.create({
model: 'text-embedding-ada-002',
model: 'text-embedding-3-small',
input: text,
});
return response.data[0].embedding;

View File

@@ -1,13 +1,12 @@
import 'dotenv/config';
import { drizzle } from 'drizzle-orm/node-postgres';
import pkg from 'pg';
const { Pool } = pkg;
import * as schema from './schema.js';
import { migrate } from 'drizzle-orm/node-postgres/migrator';
const connectionString = process.env.DATABASE_URL
const pool = new Pool({connectionString})
const { Pool } = pkg;
const connectionString = process.env.DATABASE_URL;
const pool = new Pool({ connectionString });
const db = drizzle(pool, { schema });
// This will run migrations on the database, skipping the ones already applied
await migrate(db, { migrationsFolder: './src/drizzle/migrations' });
//await migrate(db, { migrationsFolder: './src/drizzle/migrations' });
// Don't forget to close the connection, otherwise the script will hang
await pool.end();
//await pool.end();

View File

@@ -0,0 +1 @@
ALTER TABLE "businesses" ADD COLUMN "embedding" vector(1536);

View File

@@ -1,10 +1,8 @@
{
"id": "f8241dfe-8f15-4656-aeb5-c9ef0ad65f28",
"prevId": "00000000-0000-0000-0000-000000000000",
"version": "5",
"dialect": "pg",
"version": "7",
"dialect": "postgresql",
"tables": {
"businesses": {
"public.businesses": {
"name": "businesses",
"schema": "",
"columns": {
@@ -183,21 +181,21 @@
"businesses_userId_users_id_fk": {
"name": "businesses_userId_users_id_fk",
"tableFrom": "businesses",
"tableTo": "users",
"columnsFrom": [
"userId"
],
"tableTo": "users",
"columnsTo": [
"id"
],
"onDelete": "no action",
"onUpdate": "no action"
"onUpdate": "no action",
"onDelete": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
},
"commercials": {
"public.commercials": {
"name": "commercials",
"schema": "",
"columns": {
@@ -352,21 +350,21 @@
"commercials_userId_users_id_fk": {
"name": "commercials_userId_users_id_fk",
"tableFrom": "commercials",
"tableTo": "users",
"columnsFrom": [
"userId"
],
"tableTo": "users",
"columnsTo": [
"id"
],
"onDelete": "no action",
"onUpdate": "no action"
"onUpdate": "no action",
"onDelete": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
},
"users": {
"public.users": {
"name": "users",
"schema": "",
"columns": {
@@ -499,36 +497,41 @@
}
},
"enums": {
"customerSubType": {
"public.customerSubType": {
"name": "customerSubType",
"values": {
"broker": "broker",
"cpa": "cpa",
"attorney": "attorney",
"titleCompany": "titleCompany",
"surveyor": "surveyor",
"appraiser": "appraiser"
}
"schema": "public",
"values": [
"broker",
"cpa",
"attorney",
"titleCompany",
"surveyor",
"appraiser"
]
},
"customerType": {
"public.customerType": {
"name": "customerType",
"values": {
"buyer": "buyer",
"professional": "professional"
}
"schema": "public",
"values": [
"buyer",
"professional"
]
},
"gender": {
"public.gender": {
"name": "gender",
"values": {
"male": "male",
"female": "female"
}
"schema": "public",
"values": [
"male",
"female"
]
}
},
"schemas": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
"tables": {},
"columns": {}
},
"id": "f8241dfe-8f15-4656-aeb5-c9ef0ad65f28",
"prevId": "00000000-0000-0000-0000-000000000000"
}

View File

@@ -0,0 +1,547 @@
{
"id": "aac13d99-fd8d-44f9-bfb0-f00cef85809d",
"prevId": "f8241dfe-8f15-4656-aeb5-c9ef0ad65f28",
"version": "7",
"dialect": "postgresql",
"tables": {
"public.businesses": {
"name": "businesses",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "uuid",
"primaryKey": true,
"notNull": true,
"default": "gen_random_uuid()"
},
"userId": {
"name": "userId",
"type": "uuid",
"primaryKey": false,
"notNull": false
},
"type": {
"name": "type",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"title": {
"name": "title",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false
},
"city": {
"name": "city",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"state": {
"name": "state",
"type": "char(2)",
"primaryKey": false,
"notNull": false
},
"price": {
"name": "price",
"type": "double precision",
"primaryKey": false,
"notNull": false
},
"favoritesForUser": {
"name": "favoritesForUser",
"type": "varchar(30)[]",
"primaryKey": false,
"notNull": false
},
"draft": {
"name": "draft",
"type": "boolean",
"primaryKey": false,
"notNull": false
},
"listingsCategory": {
"name": "listingsCategory",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"realEstateIncluded": {
"name": "realEstateIncluded",
"type": "boolean",
"primaryKey": false,
"notNull": false
},
"leasedLocation": {
"name": "leasedLocation",
"type": "boolean",
"primaryKey": false,
"notNull": false
},
"franchiseResale": {
"name": "franchiseResale",
"type": "boolean",
"primaryKey": false,
"notNull": false
},
"salesRevenue": {
"name": "salesRevenue",
"type": "double precision",
"primaryKey": false,
"notNull": false
},
"cashFlow": {
"name": "cashFlow",
"type": "double precision",
"primaryKey": false,
"notNull": false
},
"supportAndTraining": {
"name": "supportAndTraining",
"type": "text",
"primaryKey": false,
"notNull": false
},
"employees": {
"name": "employees",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"established": {
"name": "established",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"internalListingNumber": {
"name": "internalListingNumber",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"reasonForSale": {
"name": "reasonForSale",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"brokerLicencing": {
"name": "brokerLicencing",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"internals": {
"name": "internals",
"type": "text",
"primaryKey": false,
"notNull": false
},
"imagePath": {
"name": "imagePath",
"type": "varchar(200)",
"primaryKey": false,
"notNull": false
},
"created": {
"name": "created",
"type": "timestamp",
"primaryKey": false,
"notNull": false
},
"updated": {
"name": "updated",
"type": "timestamp",
"primaryKey": false,
"notNull": false
},
"visits": {
"name": "visits",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"lastVisit": {
"name": "lastVisit",
"type": "timestamp",
"primaryKey": false,
"notNull": false
},
"embedding": {
"name": "embedding",
"type": "vector(1536)",
"primaryKey": false,
"notNull": false
}
},
"indexes": {},
"foreignKeys": {
"businesses_userId_users_id_fk": {
"name": "businesses_userId_users_id_fk",
"tableFrom": "businesses",
"tableTo": "users",
"columnsFrom": [
"userId"
],
"columnsTo": [
"id"
],
"onDelete": "no action",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
},
"public.commercials": {
"name": "commercials",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "uuid",
"primaryKey": true,
"notNull": true,
"default": "gen_random_uuid()"
},
"serial_id": {
"name": "serial_id",
"type": "serial",
"primaryKey": false,
"notNull": true
},
"userId": {
"name": "userId",
"type": "uuid",
"primaryKey": false,
"notNull": false
},
"type": {
"name": "type",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"title": {
"name": "title",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false
},
"city": {
"name": "city",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"state": {
"name": "state",
"type": "char(2)",
"primaryKey": false,
"notNull": false
},
"price": {
"name": "price",
"type": "double precision",
"primaryKey": false,
"notNull": false
},
"favoritesForUser": {
"name": "favoritesForUser",
"type": "varchar(30)[]",
"primaryKey": false,
"notNull": false
},
"listingsCategory": {
"name": "listingsCategory",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"hideImage": {
"name": "hideImage",
"type": "boolean",
"primaryKey": false,
"notNull": false
},
"draft": {
"name": "draft",
"type": "boolean",
"primaryKey": false,
"notNull": false
},
"zipCode": {
"name": "zipCode",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"county": {
"name": "county",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"email": {
"name": "email",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"website": {
"name": "website",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"phoneNumber": {
"name": "phoneNumber",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"imageOrder": {
"name": "imageOrder",
"type": "varchar(200)[]",
"primaryKey": false,
"notNull": false
},
"imagePath": {
"name": "imagePath",
"type": "varchar(200)",
"primaryKey": false,
"notNull": false
},
"created": {
"name": "created",
"type": "timestamp",
"primaryKey": false,
"notNull": false
},
"updated": {
"name": "updated",
"type": "timestamp",
"primaryKey": false,
"notNull": false
},
"visits": {
"name": "visits",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"lastVisit": {
"name": "lastVisit",
"type": "timestamp",
"primaryKey": false,
"notNull": false
}
},
"indexes": {},
"foreignKeys": {
"commercials_userId_users_id_fk": {
"name": "commercials_userId_users_id_fk",
"tableFrom": "commercials",
"tableTo": "users",
"columnsFrom": [
"userId"
],
"columnsTo": [
"id"
],
"onDelete": "no action",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
},
"public.users": {
"name": "users",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "uuid",
"primaryKey": true,
"notNull": true,
"default": "gen_random_uuid()"
},
"firstname": {
"name": "firstname",
"type": "varchar(255)",
"primaryKey": false,
"notNull": true
},
"lastname": {
"name": "lastname",
"type": "varchar(255)",
"primaryKey": false,
"notNull": true
},
"email": {
"name": "email",
"type": "varchar(255)",
"primaryKey": false,
"notNull": true
},
"phoneNumber": {
"name": "phoneNumber",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false
},
"companyName": {
"name": "companyName",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"companyOverview": {
"name": "companyOverview",
"type": "text",
"primaryKey": false,
"notNull": false
},
"companyWebsite": {
"name": "companyWebsite",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"companyLocation": {
"name": "companyLocation",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"offeredServices": {
"name": "offeredServices",
"type": "text",
"primaryKey": false,
"notNull": false
},
"areasServed": {
"name": "areasServed",
"type": "jsonb",
"primaryKey": false,
"notNull": false
},
"hasProfile": {
"name": "hasProfile",
"type": "boolean",
"primaryKey": false,
"notNull": false
},
"hasCompanyLogo": {
"name": "hasCompanyLogo",
"type": "boolean",
"primaryKey": false,
"notNull": false
},
"licensedIn": {
"name": "licensedIn",
"type": "jsonb",
"primaryKey": false,
"notNull": false
},
"gender": {
"name": "gender",
"type": "gender",
"typeSchema": "public",
"primaryKey": false,
"notNull": false
},
"customerType": {
"name": "customerType",
"type": "customerType",
"typeSchema": "public",
"primaryKey": false,
"notNull": false
},
"customerSubType": {
"name": "customerSubType",
"type": "customerSubType",
"typeSchema": "public",
"primaryKey": false,
"notNull": false
},
"created": {
"name": "created",
"type": "timestamp",
"primaryKey": false,
"notNull": false
},
"updated": {
"name": "updated",
"type": "timestamp",
"primaryKey": false,
"notNull": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
}
},
"enums": {
"public.customerSubType": {
"name": "customerSubType",
"schema": "public",
"values": [
"broker",
"cpa",
"attorney",
"titleCompany",
"surveyor",
"appraiser"
]
},
"public.customerType": {
"name": "customerType",
"schema": "public",
"values": [
"buyer",
"professional"
]
},
"public.gender": {
"name": "gender",
"schema": "public",
"values": [
"male",
"female"
]
}
},
"schemas": {},
"sequences": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
}

View File

@@ -8,6 +8,13 @@
"when": 1717933164279,
"tag": "0000_known_havok",
"breakpoints": true
},
{
"idx": 1,
"version": "7",
"when": 1720777203223,
"tag": "0001_eager_sandman",
"breakpoints": true
}
]
}

View File

@@ -1,5 +1,5 @@
import { boolean, char, doublePrecision, integer, jsonb, pgEnum, pgTable, serial, text, timestamp, uuid, varchar, vector } from 'drizzle-orm/pg-core';
import { AreasServed, LicensedIn } from 'src/models/db.model';
import { AreasServed, LicensedIn } from '../models/db.model';
export const PG_CONNECTION = 'PG_CONNECTION';
export const genderEnum = pgEnum('gender', ['male', 'female']);
export const customerTypeEnum = pgEnum('customerType', ['buyer', 'professional']);